MongoDB query optimisation for nested array of objects

Hi All,

I need help regarding query optimisation for nested array of objects.

We have collection with large number of documents and every single document contains nested array of objects up to 3rd level as below :

Collection name : products

Total documents in collection : 20 million

Size of each document : >= 500 kb

    {
       _id: ObjectId(""),
       id: 999,
       name: "prod1",
       // all other product fields
       sellers:[
    		{
    			"_id" : ObjectId(""),
    			"seller_id" : 99,
    			"name" : "Business 1",
                           // all other fields
                            "providers":[
                                 {
                                      seller_id:99,
                                      provider_id:1,
                                      provider_name:"prov 1"
                                      quantity:50,
                                      order_allowed:40,
                                      notification_on_stock:true 
                                 },
                                 {
                                      seller_id:99,
                                      provider_id:2,
                                      provider_name:"prov 2"
                                      quantity:20,
                                      order_allowed:20,
                                      notification_on_stock:true 
                                 }
                             ],
                            // next fields
    		},{
                            "_id" : ObjectId(""),
    			"seller_id" : 9,
    			"name" : "Business 2",
                           // all other fields
                            "providers":[
                                 {
                                      seller_id:9,
                                      provider_id:3,
                                      provider_name:"prov 3"
                                      quantity:50,
                                      order_allowed:40,
                                      notification_on_stock:true 
                                 },
                                 {
                                      seller_id:9,
                                      provider_id:4,
                                      provider_name:"prov 4"
                                      quantity:20,
                                      order_allowed:20,
                                      notification_on_stock:true 
                                 }
                             ],
                            // next fields
                    }
    	],
    },
    {
       _id: ObjectId(""),
       id: 1000,
       name: "prod 2",
       // all other product fields
       sellers:[
    		{
    			"_id" : ObjectId(""),
    			"seller_id" : 44,
                            "product_id" : 2,
    			"name" : "Business 22",
                           // all other fields
                            "providers":[
                                 {
                                      seller_id:44,
                                      provider_id:3,
                                      provider_name:"prov 3"
                                      quantity:50,
                                      order_allowed:40,
                                      notification_on_stock:true 
                                 },
                                 {
                                      seller_id:44,
                                      provider_id:4,
                                      provider_name:"prov 4"
                                      quantity:20,
                                      order_allowed:20,
                                      notification_on_stock:true 
                                 }
                             ],
                            // next fields
    		},{
                            "_id" : ObjectId(""),
    			"seller_id" : 91,
    			"name" : "Business 21",
                           // all other fields
                            "providers":[
                                 {
                                      seller_id:91,
                                      provider_id:1,
                                      provider_name:"prov 1"
                                      quantity:50,
                                      order_allowed:40,
                                      notification_on_stock:true 
                                 },
                                 {
                                      seller_id:91,
                                      provider_id:2,
                                      provider_name:"prov 2"
                                      quantity:20,
                                      order_allowed:20,
                                      notification_on_stock:true 
                                 }
                             ],
                            // next fields
                    }
    	],
    },
    {
       _id: ObjectId(""),
       id: 1001,
       name: "prod 3",
       // all other product fields
       sellers:[
    		{
    			"_id" : ObjectId(""),
    			"seller_id" : 33,
    			"name" : "Business 112",
                           // all other fields
                            "providers":[
                                 {
                                      seller_id:33,
                                      provider_id:1,
                                      provider_name:"prov 1"
                                      quantity:50,
                                      order_allowed:40,
                                      notification_on_stock:true 
                                 },
                                 {
                                      seller_id:33,
                                      provider_id:2,
                                      provider_name:"prov 2"
                                      quantity:20,
                                      order_allowed:20,
                                      notification_on_stock:true 
                                 }
                             ],
                            // next fields
    		},{
                            "_id" : ObjectId(""),
    			"seller_id" : 32,
    			"name" : "Business 2",
                           // all other fields
                            "providers":[
                                 {
                                      seller_id:32,
                                      provider_id:1,
                                      provider_name:"prov 1"
                                      quantity:50,
                                      order_allowed:40,
                                      notification_on_stock:true 
                                 },
                                 {
                                      seller_id:32,
                                      provider_id:2,
                                      provider_name:"prov 2"
                                      quantity:20,
                                      order_allowed:20,
                                      notification_on_stock:true 
                                 }
                             ],
                            // next fields
                    }
    	],
    },

I have added below indexes for my products collection as below,

  1. index on products.id
    {
    id:1
    }
  2. index on nested array document p
    {
    “products.sellers.seller_id” : 1
    }
  3. {
    “id” : 1,
    “sellers.seller_id” : 1,
    “sellers.providers.provider_id” : 1
    }

My Query :

db.products.find({
“id”: 999,
“sellers”: { “$elemMatch”: { “providers”: { “$elemMatch”: { “seller_id”: 30098, “provider_id”: 517 } } } }
});

My issue is query always picks up the first index on field id and query took time around 800ms which I need to optimise.

1 Like

Hi @Pradip_Chavda and welcome in the MongoDB Community :muscle: !

First, I have done some cleaning on your 3 sample documents so they are easier to insert in a test collection:

db.foo.insertMany([{
    "_id":ObjectId("603ce892addf7e2a40bb301b"), 
    "id":999,
    "name":"prod1",
    "sellers":
    [{
        "_id": ObjectId("603ce892addf7e2a40bb301c"),
        "seller_id": 99,
        "name": "Business 1",
        "providers": [{
            "seller_id": 99,
            "provider_id": 1,
            "provider_name": "prov 1",
            "quantity": 50,
            "order_allowed": 40,
            "notification_on_stock": true
        }, {
            "seller_id": 99,
            "provider_id": 2,
            "provider_name": "prov 2",
            "quantity": 20,
            "order_allowed": 20,
            "notification_on_stock": true
        }]
    }, {
        "_id": ObjectId("603ce892addf7e2a40bb301d"),
        "seller_id": 9,
        "name": "Business 2",
        "providers": [{
            "seller_id": 9,
            "provider_id": 3,
            "provider_name": "prov 3",
            "quantity": 50,
            "order_allowed": 40,
            "notification_on_stock": true
        }, {
            "seller_id": 9,
            "provider_id": 4,
            "provider_name": "prov 4",
            "quantity": 20,
            "order_allowed": 20,
            "notification_on_stock": true
        }]
    }]
},
{
    "_id":ObjectId("603ce892addf7e2a40bb301e"),
    "id":1000,
    "name":"prod 2",
    "sellers":
    [{
        "_id": ObjectId("603ce892addf7e2a40bb301f"),
        "seller_id": 44,
        "product_id": 2,
        "name": "Business 22",
        "providers": [{
            "seller_id": 44,
            "provider_id": 3,
            "provider_name": "prov 3",
            "quantity": 50,
            "order_allowed": 40,
            "notification_on_stock": true
        }, {
            "seller_id": 44,
            "provider_id": 4,
            "provider_name": "prov 4",
            "quantity": 20,
            "order_allowed": 20,
            "notification_on_stock": true
        }]
    }, {
        "_id": ObjectId("603ce892addf7e2a40bb3020"),
        "seller_id": 91,
        "name": "Business 21",
        "providers": [{
            "seller_id": 91,
            "provider_id": 1,
            "provider_name": "prov 1",
            "quantity": 50,
            "order_allowed": 40,
            "notification_on_stock": true
        }, {
            "seller_id": 91,
            "provider_id": 2,
            "provider_name": "prov 2",
            "quantity": 20,
            "order_allowed": 20,
            "notification_on_stock": true
        }]
    }]
},
{
    "_id": ObjectId("603ce892addf7e2a40bb3021"),
    "id": 1001,
    "name": "prod 3",
    "sellers":
    [{
        "_id": ObjectId("603ce892addf7e2a40bb3022"),
        "seller_id": 33,
        "name": "Business 112",
        "providers": [{
            "seller_id": 33,
            "provider_id": 1,
            "provider_name": "prov 1",
            "quantity": 50,
            "order_allowed": 40,
            "notification_on_stock": true
        }, {
            "seller_id": 33,
            "provider_id": 2,
            "provider_name": "prov 2",
            "quantity": 20,
            "order_allowed": 20,
            "notification_on_stock": true
        }]
    }, {
        "_id": ObjectId("603ce892addf7e2a40bb3023"),
        "seller_id": 32,
        "name": "Business 2",
        "providers": [{
            "seller_id": 32,
            "provider_id": 1,
            "provider_name": "prov 1",
            "quantity": 50,
            "order_allowed": 40,
            "notification_on_stock": true
        }, {
            "seller_id": 32,
            "provider_id": 2,
            "provider_name": "prov 2",
            "quantity": 20,
            "order_allowed": 20,
            "notification_on_stock": true
        }]
    }]
}])

Then I created this index:

db.foo.createIndex({id:1,"sellers.providers.seller_id":1, "sellers.providers.provider_id":1 })

Note that this index is different than yours. You are indexing sellers.seller_id which is one level above. In your query, you are looking for sellers.providers.seller_id which is one level deeper.

I removed the first elemMatch in your query because I think it’s useless. It doesn’t change the result though, both return the same document & use the index so it’s up to you really.

db.foo.find({id:999, "sellers.providers":  {"$elemMatch": {"seller_id": 99, "provider_id": 1}}}).explain()
{
	"queryPlanner" : {
		"plannerVersion" : 1,
		"namespace" : "test.foo",
		"indexFilterSet" : false,
		"parsedQuery" : {
			"$and" : [
				{
					"sellers.providers" : {
						"$elemMatch" : {
							"$and" : [
								{
									"provider_id" : {
										"$eq" : 1
									}
								},
								{
									"seller_id" : {
										"$eq" : 99
									}
								}
							]
						}
					}
				},
				{
					"id" : {
						"$eq" : 999
					}
				}
			]
		},
		"queryHash" : "E1D4971C",
		"planCacheKey" : "9B12C768",
		"winningPlan" : {
			"stage" : "FETCH",
			"filter" : {
				"sellers.providers" : {
					"$elemMatch" : {
						"$and" : [
							{
								"seller_id" : {
									"$eq" : 99
								}
							},
							{
								"provider_id" : {
									"$eq" : 1
								}
							}
						]
					}
				}
			},
			"inputStage" : {
				"stage" : "IXSCAN",
				"keyPattern" : {
					"id" : 1,
					"sellers.providers.seller_id" : 1,
					"sellers.providers.provider_id" : 1
				},
				"indexName" : "id_1_sellers.providers.seller_id_1_sellers.providers.provider_id_1",
				"isMultiKey" : true,
				"multiKeyPaths" : {
					"id" : [ ],
					"sellers.providers.seller_id" : [
						"sellers",
						"sellers.providers"
					],
					"sellers.providers.provider_id" : [
						"sellers",
						"sellers.providers"
					]
				},
				"isUnique" : false,
				"isSparse" : false,
				"isPartial" : false,
				"indexVersion" : 2,
				"direction" : "forward",
				"indexBounds" : {
					"id" : [
						"[999.0, 999.0]"
					],
					"sellers.providers.seller_id" : [
						"[99.0, 99.0]"
					],
					"sellers.providers.provider_id" : [
						"[1.0, 1.0]"
					]
				}
			}
		},
		"rejectedPlans" : [ ]
	},
	"serverInfo" : {
		"host" : "hafx",
		"port" : 27017,
		"version" : "4.4.4",
		"gitVersion" : "8db30a63db1a9d84bdcad0c83369623f708e0397"
	},
	"ok" : 1,
	"$clusterTime" : {
		"clusterTime" : Timestamp(1614606674, 1),
		"signature" : {
			"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
			"keyId" : NumberLong(0)
		}
	},
	"operationTime" : Timestamp(1614606674, 1)
}

As you can see, it’s using the index as expected.

Also, please note that having 2 indexes like the 2 below is a waste of ressources because the second one also contains the first one. So you can remove the first one & save some RAM. Any query that was using the first one can also use the second one instead.

{ _id:1 }
{ _id:1, something: 1 }

Cheers,
Maxime.

2 Likes

If doing $and,$or queries, do I need to index each of the props separately? I think I read that somewhere…
thx :pray:

I’ll go with “it depends”. Please open a new topic for your question and provide context, sample documents and your queries so we can check & help. Bonus points if we can copy & paste the sample docs easily into the DB. (see the insertMany above in this thread).