Union with two collections is not outputting expected result

I am usign mongo version as v5.0.19

I have 2 collections with the data in it as below:

Collection 1 - external_S_P_FLAT_main_api:

[
    {
      "_id": {
        "$oid": "656497ff4f6a4c11e61b865e"
      },
      "data.pricing.material": "TG11",
      "data.pricing.controlling_area": "AJ00",
      "data.pricing.cost_center": "DE000001"
    },
    {
      "_id": {
        "$oid": "656497ff4f6a4c11e61b865f"
      },
      "data.pricing.material": "TG12",
      "data.pricing.controlling_area": "AJ00"
    },
    {
      "_id": {
        "$oid": "656497ff4f6a4c11e61b8660"
      },
      "data.pricing.material": "TG14",
      "data.pricing.controlling_area": "AJ00",
      "data.pricing.cost_center": "DE000003"
    },
    {
      "_id": {
        "$oid": "656497ff4f6a4c11e61b8661"
      },
      "data.pricing.material": "TG2341",
      "data.pricing.controlling_area": "AJ00"
    }
  ]

Collection 2 - external_S_C_FLAT_main_api:

[
    {
      "_id": {
        "$oid": "656498004f6a4c11e61b8662"
      },
      "data.costcenter.controlling_area": "AJ00",
      "data.costcenter.cost_center": "DE000001",
      "data.costcenter.company_code": "DE00",
      "data.costcenter.hierarchy_area": "AJ00",
      "data.costcenter.long_description": "CC DE000001 - 3rd"
    },
    {
      "_id": {
        "$oid": "656498004f6a4c11e61b8663"
      },
      "data.costcenter.controlling_area": "AJ00",
      "data.costcenter.cost_center": "DE000003",
      "data.costcenter.company_code": "DE00",
      "data.costcenter.hierarchy_area": "AJ00",
      "data.costcenter.long_description": "CC DE000003 - 1st"
    },
    {
      "_id": {
        "$oid": "656498004f6a4c11e61b8610"
      },
      "data.costcenter.controlling_area": "AJ00",
      "data.costcenter.cost_center": "DE000005",
      "data.costcenter.company_code": "DE00",
      "data.costcenter.hierarchy_area": "AJ00",
      "data.costcenter.long_description": "CC DE000005 - 5th"
    }
  ]

I am trying to make union with join with below query

db.external_S_P_FLAT_main_api.aggregate([
    {
      "$addFields": {
        "external_S_P_FLAT_main_api_data.pricing.cost_center": "$data.pricing.cost_center"
      }
    },
    {
      "$lookup": {
        from: "external_S_C_FLAT_main_api",
        let: {
          let_data__pricing__cost_center: {
            "$getField": "data.pricing.cost_center"
          }
        },
        pipeline: [
          {
            "$match": {
              "$expr": {
                "$and": [
                  {
                    "$eq": [
                      {
                        "$getField": "data.costcenter.cost_center"
                      },
                      "$$let_data__pricing__cost_center"
                    ]
                  }
                ]
              }
            }
          }
        ],
        as: "from_external_S_C_FLAT_main_api"
      }
    },
    {
      "$unwind": {
        path: "$from_external_S_C_FLAT_main_api",
        preserveNullAndEmptyArrays: true
      }
    },
    {
      "$replaceRoot": {
        newRoot: {
          "$mergeObjects": [
            "$from_external_S_C_FLAT_main_api",
            "$$ROOT"
          ]
        }
      }
    },
    {
      $unionWith: {
        coll: "external_S_C_FLAT_main_api",
        pipeline: [
          {
            "$match": {
              "$expr": {
                "$and": [
                  {
                    "$ne": [
                      {
                        "$getField": "data.costcenter.cost_center"
                      },
                      false
                    ]
                  }
                ]
              }
            }
          },
          {
            $project: {
              _id: 0
            }
          }
        ]
      }
    },
    {
      "$project": {
        _id: 0,
        "external_S_P_FLAT_main_api_data.pricing.controlling_area": 0,
        "external_S_P_FLAT_main_api_data.pricing.cost_center": 0,
        external_S_P_FLAT_main_api_IS_INVALID: 0,
        external_S_P_FLAT_main_api_USER_ID: 0,
        from_external_S_C_FLAT_main_api: 0
      }
    }
  ])

The result i am getting is as below:

[
    {
      "data.costcenter.company_code": "DE00",
      "data.costcenter.controlling_area": "AJ00",
      "data.costcenter.cost_center": "DE000001",
      "data.costcenter.hierarchy_area": "AJ00",
      "data.costcenter.long_description": "CC DE000001 - 3rd",
      "data.pricing.controlling_area": "AJ00",
      "data.pricing.cost_center": "DE000001",
      "data.pricing.material": "TG11",
      "external_S_P_FLAT_main_api_data": {
        "pricing": {}
      }
    },
    {
      "data.pricing.controlling_area": "AJ00",
      "data.pricing.material": "TG12",
      "external_S_P_FLAT_main_api_data": {
        "pricing": {}
      }
    },
    {
      "data.costcenter.company_code": "DE00",
      "data.costcenter.controlling_area": "AJ00",
      "data.costcenter.cost_center": "DE000003",
      "data.costcenter.hierarchy_area": "AJ00",
      "data.costcenter.long_description": "CC DE000003 - 1st",
      "data.pricing.controlling_area": "AJ00",
      "data.pricing.cost_center": "DE000003",
      "data.pricing.material": "TG14",
      "external_S_P_FLAT_main_api_data": {
        "pricing": {}
      }
    },
    {
      "data.pricing.controlling_area": "AJ00",
      "data.pricing.material": "TG2341",
      "external_S_P_FLAT_main_api_data": {
        "pricing": {}
      }
    },
    {
      "data.costcenter.company_code": "DE00",
      "data.costcenter.controlling_area": "AJ00",
      "data.costcenter.cost_center": "DE000001",
      "data.costcenter.hierarchy_area": "AJ00",
      "data.costcenter.long_description": "CC DE000001 - 3rd"
    },
    {
      "data.costcenter.company_code": "DE00",
      "data.costcenter.controlling_area": "AJ00",
      "data.costcenter.cost_center": "DE000003",
      "data.costcenter.hierarchy_area": "AJ00",
      "data.costcenter.long_description": "CC DE000003 - 1st"
    },
    {
      "data.costcenter.company_code": "DE00",
      "data.costcenter.controlling_area": "AJ00",
      "data.costcenter.cost_center": "DE000005",
      "data.costcenter.hierarchy_area": "AJ00",
      "data.costcenter.long_description": "CC DE000005 - 5th"
    }
  ]

The expected result I want is as below where the matched records from collection 2 (“data.costcenter.long_description”: “CC DE000001 - 3rd”, & “data.costcenter.long_description”: “CC DE000003 - 1st”)
should not include again in the union result.
Thus the expected result is as below:

[
    {
      "data.costcenter.company_code": "DE00",
      "data.costcenter.controlling_area": "AJ00",
      "data.costcenter.cost_center": "DE000001",
      "data.costcenter.hierarchy_area": "AJ00",
      "data.costcenter.long_description": "CC DE000001 - 3rd",
      "data.pricing.controlling_area": "AJ00",
      "data.pricing.cost_center": "DE000001",
      "data.pricing.material": "TG11",
      "external_S_P_FLAT_main_api_data": {
        "pricing": {}
      }
    },
    {
      "data.pricing.controlling_area": "AJ00",
      "data.pricing.material": "TG12",
      "external_S_P_FLAT_main_api_data": {
        "pricing": {}
      }
    },
    {
      "data.costcenter.company_code": "DE00",
      "data.costcenter.controlling_area": "AJ00",
      "data.costcenter.cost_center": "DE000003",
      "data.costcenter.hierarchy_area": "AJ00",
      "data.costcenter.long_description": "CC DE000003 - 1st",
      "data.pricing.controlling_area": "AJ00",
      "data.pricing.cost_center": "DE000003",
      "data.pricing.material": "TG14",
      "external_S_P_FLAT_main_api_data": {
        "pricing": {}
      }
    },
    {
      "data.pricing.controlling_area": "AJ00",
      "data.pricing.material": "TG2341",
      "external_S_P_FLAT_main_api_data": {
        "pricing": {}
      }
    },
    {
      "data.costcenter.company_code": "DE00",
      "data.costcenter.controlling_area": "AJ00",
      "data.costcenter.cost_center": "DE000005",
      "data.costcenter.hierarchy_area": "AJ00",
      "data.costcenter.long_description": "CC DE000005 - 5th"
    }
  ]

Playground: Mongo playground

Above is the playground for the same

I have tried the below query. In that, I can see the output but need to get the array which is in comparison with 

db.external_S_P_FLAT_main_api.aggregate([
  {
    "$addFields": {
      "external_S_P_FLAT_main_api_data.pricing.cost_center": "$data.pricing.cost_center"
    }
  },
  {
    "$lookup": {
      from: "external_S_C_FLAT_main_api",
      let: {
        let_data__pricing__cost_center: {
          "$getField": "data.pricing.cost_center"
        }
      },
      pipeline: [
        {
          "$match": {
            "$expr": {
              "$and": [
                {
                  "$eq": [
                    {
                      "$getField": "data.costcenter.cost_center"
                    },
                    "$$let_data__pricing__cost_center"
                  ]
                }
              ]
            }
          }
        }
      ],
      as: "from_external_S_C_FLAT_main_api"
    }
  },
  //{
  //  "$match": {
  //    "from_external_S_C_FLAT_main_api": {
  //      $ne: []
  //    }
  //  }
  //},
  //{
  //  "$addFields": {
  //    "from_external_S_C_FLAT_main_api_bk": {
  //      $objectToArray: "$from_external_S_C_FLAT_main_api"
  //    }
  //  }
  //},
  {
    "$unwind": {
      path: "$from_external_S_C_FLAT_main_api",
      preserveNullAndEmptyArrays: true
    }
  },
  {
    "$replaceRoot": {
      newRoot: {
        "$mergeObjects": [
          "$from_external_S_C_FLAT_main_api",
          "$$ROOT"
        ]
      }
    }
  },
  {
    $unionWith: {
      coll: "external_S_C_FLAT_main_api",
      pipeline: [
        {
          "$match": {
            "$expr": {
              "$not": [
                //{
                //  "$ne": [
                //    {
                //      "$getField": "data.costcenter.cost_center"
                //    },
                //    "DE000001"
                //  ]
                //},
                {
                  "$in": [
                    {
                      "$getField": "data.costcenter.cost_center"
                    },
                    [
                      "DE000001",
                      "DE000003"
                    ]
                  ]
                }
              ]
            }
          }
        },
        {
          $project: {
            _id: 0
          }
        }
      ]
    }
  },
  {
    "$project": {
      _id: 0,
      "external_S_P_FLAT_main_api_data.pricing.controlling_area": 0,
      "external_S_P_FLAT_main_api_data.pricing.cost_center": 0,
      external_S_P_FLAT_main_api_IS_INVALID: 0,
      external_S_P_FLAT_main_api_USER_ID: 0,
      from_external_S_C_FLAT_main_api: 0
    }
  }
])


I want this array  ["DE000001",  "DE000003"] to be prepared in the previous stage and passed on to the next stage.

Can anyone help me with this
Below is the expected query

`
db.external_S_P_FLAT_main_api.aggregate([
  {
    $unionWith: {
      coll: "external_S_C_FLAT_main_api"
    }
  },
  {
    "$lookup": {
      from: "external_S_C_FLAT_main_api",
      let: {
        let_data__pricing__cost_center: {
          "$getField": "data.pricing.cost_center"
        }
      },
      pipeline: [
        {
          "$match": {
            "$expr": {
              "$and": [
                {
                  "$eq": [
                    {
                      "$getField": "data.costcenter.cost_center"
                    },
                    "$$let_data__pricing__cost_center"
                  ]
                }
              ]
            }
          }
        }
      ],
      as: "from_external_S_C_FLAT_main_api"
    }
  },
  {
    "$addFields": {
      "otherId": {
        "$cond": [
          {
            "$eq": [
              {
                $getField: "from_external_S_C_FLAT_main_api"
              },
              []
            ]
          },
          {
            "$toString": "$_id"
          },
          {
            "$toString": {
              $arrayElemAt: [
                "$from_external_S_C_FLAT_main_api._id",
                0
              ]
            }
          },
        ]
      }
    }
  },
  {
    "$unwind": {
      path: "$from_external_S_C_FLAT_main_api",
      preserveNullAndEmptyArrays: true
    }
  },
  {
    "$replaceRoot": {
      newRoot: {
        "$mergeObjects": [
          "$from_external_S_C_FLAT_main_api",
          "$$ROOT"
        ]
      }
    }
  },
  {
    "$lookup": {
      from: "external_S_P_FLAT_main_api",
      let: {
        let_data__costcenter__cost_center: {
          "$getField": "data.costcenter.cost_center"
        }
      },
      pipeline: [
        {
          "$match": {
            "$expr": {
              "$and": [
                {
                  "$eq": [
                    {
                      "$getField": "data.pricing.cost_center"
                    },
                    "$$let_data__costcenter__cost_center"
                  ]
                }
              ]
            }
          }
        }
      ],
      as: "from_external_S_P_FLAT_main_api"
    }
  },
  {
    $group: {
      _id: "$otherId",
      duplicates: {
        $push: "$$ROOT"
      }
    }
  },
  {
    $project: {
      result: {
        $arrayElemAt: [
          "$duplicates",
          0
        ]
      }
    }
  },
  {
    "$unwind": {
      path: "$result",
      preserveNullAndEmptyArrays: true
    }
  },
  {
    "$replaceRoot": {
      newRoot: {
        "$mergeObjects": [
          "$result",
          "$$ROOT"
        ]
      }
    }
  },
  {
    "$project": {
      _id: 0,
      "external_S_P_FLAT_main_api_data.pricing.controlling_area": 0,
      "external_S_P_FLAT_main_api_data.pricing.cost_center": 0,
      external_S_P_FLAT_main_api_IS_INVALID: 0,
      external_S_P_FLAT_main_api_USER_ID: 0,
      from_external_S_C_FLAT_main_api: 0,
      from_external_S_P_FLAT_main_api: 0,
      result: 0,
      otherId: 0
    }
  }
])
`

This topic was automatically closed 5 days after the last reply. New replies are no longer allowed.