Aggregate lookups with subarray

I need to lookup data from other collection, I have done this before but when trying to get ot subarray, it is turning painful and I am not sure what I am doing wrong.

Below is a sample from values collection.

{
    "_id" : "328925Atuador cerâmico, símbolo X",
    "valueName" : "Atuador cerâmico, símbolo X",
    "valueDescription" : "",
    "language" : "Portuguese",
    "changedOn" : "2/2/2021 5:36 PM",
    "valueAlias" : "",
    "vid" : "328925",
    "createdOn" : "9/18/2019 10:53 AM",
    "timestamp" : ISODate("2023-01-26T18:08:20.118+0000")
}
{
    "_id" : "328925แกนเซรามิก",
    "valueName" : "แกนเซรามิก",
    "valueDescription" : "",
    "language" : "Thai",
    "changedOn" : "",
    "valueAlias" : "",
    "vid" : "328925",
    "createdOn" : "9/18/2019 10:53 AM",
    "timestamp" : ISODate("2023-01-26T18:08:20.118+0000")
}

sample document from backbone collection


{
    "_id" : "164",
    "structureGroupIdentifier" : "164",
    "structureGroupParentIdentifier" : "11",
    "structureGroupName" : "Power Transformers",
    "features" : [
        {
            "pid" : "69",
            "parameterName" : "Mounting Type (69)",
            "vid" : [
                "1",
                " 328925",
                " 339014",
                " 384993",
                " 409393",
                " 411897",
                " 420487"
            ],
            "rank" : "375",
            "priority" : "Filterable",
            "isRange" : "",
            "dataType" : "Character string",
            "multivalue" : "No",
            "leadingParameterIdentifier" : "",
            "followingParameterIdentifier" : "",
            "createdOn" : "9/17/2019 4:06 PM",
            "changedOn" : "7/20/2020 12:30 PM",
            "longTailKeyword" : ""
        },
       
    ],
    "timestamp" : ISODate("2023-01-31T01:31:05.787+0000")
}

I developed a pipeline to merge all languages from values collection under a vid

 [
        {
            "$match" : {
                "vid" : "328925"
            }
        }, 
        {
            "$group" : {
                "_id" : "$vid",
                "vidLanguages" : {
                    "$push" : "$$CURRENT"
                }
            }
        }
    ]

I used this pipeline for a lookup in the backbone collection, I am confused about using $$CURRENT in the pipeline of a lookup, but I am not sure of how else accomplish what I need.

Here is the pipeline.


[
        {
            "$match" : {
                "structureGroupIdentifier" : "164"
            }
        }, 
        {
            "$unwind" : {
                "path" : "$features",
                "preserveNullAndEmptyArrays" : true
            }
        }, 
        {
            "$lookup" : {
                "from" : "values_master_collection",
                "let" : {
                    "backbone_vid" : "$features.vid"
                },
                "pipeline" : [
                    {
                        "$match" : {
                            "vid" : "$$backbone_vid"
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$vid",
                            "vidLanguages" : {
                                "$push" : "$$CURRENT"
                            }
                        }
                    }
                ],
                "as" : "feature.vid"
            }
        }
    ]

what I am getting is


{
    "_id" : "164",
    "structureGroupIdentifier" : "164",
    "structureGroupParentIdentifier" : "11",
    "structureGroupName" : "Power Transformers",
    "features" : {
        "pid" : "69",
        "parameterName" : "Mounting Type (69)",
        "vid" : [

        ],
        "rank" : "375",
        "priority" : "Filterable",
        "isRange" : "",
        "dataType" : "Character string",
        "multivalue" : "No",
        "leadingParameterIdentifier" : "",
        "followingParameterIdentifier" : "",
        "createdOn" : "9/17/2019 4:06 PM",
        "changedOn" : "7/20/2020 12:30 PM",
        "longTailKeyword" : ""
    },
    "timestamp" : ISODate("2023-01-31T01:31:05.787+0000")
}

what I need is for it look like this

{
    "_id" : "164",
    "structureGroupIdentifier" : "164",
    "structureGroupParentIdentifier" : "11",
    "structureGroupName" : "Power Transformers",
    "features" : {
        "pid" : "69",
        "parameterName" : "Mounting Type (69)",
        "vid" : [
            {
                "_id" : "328925",
                "vidLanguages" : [
                    {
                        "_id" : "328925Atuador cerâmico, símbolo X",
                        "valueName" : "Atuador cerâmico, símbolo X",
                        "valueDescription" : "",
                        "language" : "Portuguese",
                        "changedOn" : "2/2/2021 5:36 PM",
                        "valueAlias" : "",
                        "vid" : "328925",
                        "createdOn" : "9/18/2019 10:53 AM",
                        "timestamp" : ISODate("2023-01-26T18:08:20.118+0000")
                    },
                    {
                        "_id" : "328925แกนเซรามิก",
                        "valueName" : "แกนเซรามิก",
                        "valueDescription" : "",
                        "language" : "Thai",
                        "changedOn" : "",
                        "valueAlias" : "",
                        "vid" : "328925",
                        "createdOn" : "9/18/2019 10:53 AM",
                        "timestamp" : ISODate("2023-01-26T18:08:20.118+0000")
                    },
                    
                ]
            },
            {}//many more objects

        ],
        "rank" : "375",
        "priority" : "Filterable",
        "isRange" : "",
        "dataType" : "Character string",
        "multivalue" : "No",
        "leadingParameterIdentifier" : "",
        "followingParameterIdentifier" : "",
        "createdOn" : "9/17/2019 4:06 PM",
        "changedOn" : "7/20/2020 12:30 PM",
        "longTailKeyword" : ""
    },
    "timestamp" : ISODate("2023-01-31T01:31:05.787+0000")
}

all help is appreciated

Hello @venkata_sreekanth_bhagavatula, Welcome to the MongoDB community Forum :slight_smile:

There are a few fixes needed in your query,

  1. $match stage in $lookup, you need to use $expr operator to match internal properties and $in operator for an array of ids,
{
    "$match": {
        "$expr": {
            "$in": ["$vid", "$$backbone_vid"]
        }
    }
}
  1. You need to use $$ROOT to access current document with $push operator in $group stage
 "$push": "$$ROOT"
  1. Change to name as property in name in $lookup from "as": "feature.vid" to "as": "features.vid"

Final Query
db.backbone.aggregate([
  {
    "$match": {
      "structureGroupIdentifier": "164"
    }
  },
  {
    "$unwind": {
      "path": "$features",
      "preserveNullAndEmptyArrays": true
    }
  },
  {
    "$lookup": {
      "from": "values_master_collection",
      "let": {
        "backbone_vid": "$features.vid"
      },
      "pipeline": [
        {
          "$match": {
            "$expr": {
              "$in": [
                "$vid",
                "$$backbone_vid"
              ]
            }
          }
        },
        {
          "$group": {
            "_id": "$vid",
            "vidLanguages": {
              "$push": "$$ROOT"
            }
          }
        }
      ],
      "as": "features.vid"
    }
  }
])
2 Likes

Thank you for the response. It does generate the result required but it is very slow. The in operator is doing a collection scan of 7 million documents against the backbone array which has like 5 elements. some structures have hundreds of values. I thought I could replace the $in with a function and perform something like db.collection.find to speed it up, but from what I read db object isn’t accessible from function.

Anyway thanks for the help, if you know something about how to speed it up please do let me know

You can improve your query by creating an indexs on match properties, like on structureGroupIdentifier,

Second, the $expr operator can use indexes on properties if you are using MongoDB 5+ version,

If you are using MongoDB’s <5 lower versions then you have to use $lookup with localField and foreignField syntax to support the index. and do other pipeline operations outside $lookup or do it after the query on the front-end side.

For more clearification you need to post explain() result of your query,