Can this query be more performant?

Hello :smile:

I am kind of new to mongoDB (I just implemented my first complex query) and I am stuck on a quite common problem: the performance of groupBy action (at least, this is what I think is the problem).

Let me describe a bit the context:
I have 3 collections: bookingTariffKey, bookingCache and deletedBookingsCache

bookingTariffKey contains about 2M documents, but they are not complex
bookingCache contains about 600k documents
deletedBookingsCache contains about 82k documents
bookingCache and deletedBookingsCache have the same stucture, but the documentes are quite complex (with many fields, arrays …)
I have indexes for the most-used fields.

The scope of my query is to find all the exisitng bookings (the ones from bookingCache) which have a tariff (lookup in bookingTariffKey) and respect a time condition plus all the deleted bookings (the ones from deletedBookingsCache) that respect the same time condition.

And this is how the query looks like:

[{
 $match: {
  appId: 'myApp'
 }
}, {
 $lookup: {
  from: 'bookingCache',
  localField: 'bookingCacheId',
  foreignField: '_id',
  as: 'bookingsWithTariff'
 }
}, {
 $unwind: {
  path: '$bookingsWithTariff'
 }
}, {
 $group: {
  _id: {
   locationId: '$bookingsWithTariff.locationID._id'
  },
  bookings: {
   $push: '$bookingsWithTariff'
  }}
}, {
 $match: {
  bookings: {
   $elemMatch: {
    'metadata.updatedAt': {
     $gte: ISODate('2021-03-29T14:13:38.046Z'),
     $lte: ISODate('2022-03-29T15:00:00.000Z')
    }}}}
}, {
 $unionWith: {
  coll: 'deletedbookingsCache',
  pipeline: [{
    $match: {
     'locationID._id': {
      $exists: true
     }}
   },{
    $group: {
     _id: {
      locationId: '$locationID._id'
     },
     bookings: {
      $push: '$$ROOT'
     }}
   },{
    $match: {
     bookings: {
      $elemMatch: {
       'metadata.updatedAt': {
        $gte: ISODate('2022-03-29T14:13:38.046Z'),
        $lte: ISODate('2022-03-29T15:00:00.000Z')
       }}}}}]}
}, {
 $group: {
  _id: '$_id.locationId',
  bookings: {
   $addToSet: '$bookings'
  }}
}, {
 $facet: {
  result: [{
    $count: 'total'
   }],
  data: [{
    $sort: {
     _id: 1
    }
   },{
    $skip: 0
   },{
    $limit: 10
   }]}}]

And this is the explain:

{
  "explainVersion": "1",
  "stages": [
    {
      "$cursor": {
        "queryPlanner": {
          "namespace": "c58d9a7a-786b-4e7f-8092-6ef9f6990f8e.bookingTariffKey",
          "indexFilterSet": false,
          "parsedQuery": {
            "appId": {
              "$eq": "myApp"
            }
          },
          "queryHash": "B196AC43",
          "planCacheKey": "3CEC6D3F",
          "maxIndexedOrSolutionsReached": false,
          "maxIndexedAndSolutionsReached": false,
          "maxScansToExplodeReached": false,
          "winningPlan": {
            "stage": "PROJECTION_SIMPLE",
            "transformBy": {
              "bookingCacheId": 1,
              "bookingsWithTariff": 1,
              "_id": 0
            },
            "inputStage": {
              "stage": "FETCH",
              "inputStage": {
                "stage": "IXSCAN",
                "keyPattern": {
                  "appId": 1
                },
                "indexName": "appId",
                "isMultiKey": false,
                "multiKeyPaths": {
                  "appId": []
                },
                "isUnique": false,
                "isSparse": false,
                "isPartial": false,
                "indexVersion": 2,
                "direction": "forward",
                "indexBounds": {
                  "appId": [
                    "[\"myApp\", \"myApp\"]"
                  ]
                }
              }
            }
          },
          "rejectedPlans": []
        },
        "executionStats": {
          "executionSuccess": true,
          "nReturned": 368473,
          "executionTimeMillis": 70076,
          "totalKeysExamined": 368473,
          "totalDocsExamined": 368473,
          "executionStages": {
            "stage": "PROJECTION_SIMPLE",
            "nReturned": 368473,
            "executionTimeMillisEstimate": 429,
            "works": 368474,
            "advanced": 368473,
            "needTime": 0,
            "needYield": 0,
            "saveState": 384,
            "restoreState": 384,
            "isEOF": 1,
            "transformBy": {
              "bookingCacheId": 1,
              "bookingsWithTariff": 1,
              "_id": 0
            },
            "inputStage": {
              "stage": "FETCH",
              "nReturned": 368473,
              "executionTimeMillisEstimate": 398,
              "works": 368474,
              "advanced": 368473,
              "needTime": 0,
              "needYield": 0,
              "saveState": 384,
              "restoreState": 384,
              "isEOF": 1,
              "docsExamined": 368473,
              "alreadyHasObj": 0,
              "inputStage": {
                "stage": "IXSCAN",
                "nReturned": 368473,
                "executionTimeMillisEstimate": 107,
                "works": 368474,
                "advanced": 368473,
                "needTime": 0,
                "needYield": 0,
                "saveState": 384,
                "restoreState": 384,
                "isEOF": 1,
                "keyPattern": {
                  "appId": 1
                },
                "indexName": "appId",
                "isMultiKey": false,
                "multiKeyPaths": {
                  "appId": []
                },
                "isUnique": false,
                "isSparse": false,
                "isPartial": false,
                "indexVersion": 2,
                "direction": "forward",
                "indexBounds": {
                  "appId": [
                    "[\"myApp\", \"myApp\"]"
                  ]
                },
                "keysExamined": 368473,
                "seeks": 1,
                "dupsTested": 0,
                "dupsDropped": 0
              }
            }
          },
          "allPlansExecution": []
        }
      },
      "nReturned": 368473,
      "executionTimeMillisEstimate": 1211
    },
    {
      "$lookup": {
        "from": "bookingCache",
        "as": "bookingsWithTariff",
        "localField": "bookingCacheId",
        "foreignField": "_id",
        "unwinding": {
          "preserveNullAndEmptyArrays": false
        }
      },
      "nReturned": 367963,
      "executionTimeMillisEstimate": 45117
    },
    {
      "$group": {
        "_id": {
          "locationId": "$bookingsWithTariff.locationID._id"
        },
        "bookings": {
          "$push": "$bookingsWithTariff"
        }
      },
      "maxAccumulatorMemoryUsageBytes": {
        "bookings": 102766973
      },
      "totalOutputDataSizeBytes": {
        "$numberLong": "2218104218"
      },
      "usedDisk": true,
      "nReturned": 131365,
      "executionTimeMillisEstimate": 63719
    },
    {
      "$match": {
        "bookings": {
          "$elemMatch": {
            "$and": [
              {
                "metadata.updatedAt": {
                  "$gte": {
                    "$date": {
                      "$numberLong": "1617027218046"
                    }
                  }
                }
              },
              {
                "metadata.updatedAt": {
                  "$lte": {
                    "$date": {
                      "$numberLong": "1648566000000"
                    }
                  }
                }
              }
            ]
          }
        }
      },
      "nReturned": 660,
      "executionTimeMillisEstimate": 65703
    },
    {
      "$unionWith": {
        "coll": "deletedbookingsCache",
        "pipeline": [
          {
            "$cursor": {
              "queryPlanner": {
                "namespace": "c58d9a7a-786b-4e7f-8092-6ef9f6990f8e.deletedbookingsCache",
                "indexFilterSet": false,
                "parsedQuery": {
                  "locationID._id": {
                    "$exists": true
                  }
                },
                "queryHash": "E5759F8E",
                "planCacheKey": "00C12082",
                "maxIndexedOrSolutionsReached": false,
                "maxIndexedAndSolutionsReached": false,
                "maxScansToExplodeReached": false,
                "winningPlan": {
                  "stage": "FETCH",
                  "filter": {
                    "locationID._id": {
                      "$exists": true
                    }
                  },
                  "inputStage": {
                    "stage": "IXSCAN",
                    "keyPattern": {
                      "locationID._id": 1
                    },
                    "indexName": "locationID._id",
                    "collation": {
                      "locale": "en",
                      "caseLevel": false,
                      "caseFirst": "off",
                      "strength": 2,
                      "numericOrdering": false,
                      "alternate": "non-ignorable",
                      "maxVariable": "punct",
                      "normalization": false,
                      "backwards": false,
                      "version": "57.1"
                    },
                    "isMultiKey": false,
                    "multiKeyPaths": {
                      "locationID._id": []
                    },
                    "isUnique": false,
                    "isSparse": false,
                    "isPartial": false,
                    "indexVersion": 2,
                    "direction": "forward",
                    "indexBounds": {
                      "locationID._id": [
                        "[MinKey, MaxKey]"
                      ]
                    }
                  }
                },
                "rejectedPlans": []
              },
              "executionStats": {
                "executionSuccess": true,
                "nReturned": 0,
                "executionTimeMillis": 70076,
                "totalKeysExamined": 0,
                "totalDocsExamined": 0,
                "executionStages": {
                  "stage": "FETCH",
                  "filter": {
                    "locationID._id": {
                      "$exists": true
                    }
                  },
                  "nReturned": 0,
                  "executionTimeMillisEstimate": 0,
                  "works": 0,
                  "advanced": 0,
                  "needTime": 0,
                  "needYield": 0,
                  "saveState": 1,
                  "restoreState": 0,
                  "isEOF": 0,
                  "docsExamined": 0,
                  "alreadyHasObj": 0,
                  "inputStage": {
                    "stage": "IXSCAN",
                    "nReturned": 0,
                    "executionTimeMillisEstimate": 0,
                    "works": 0,
                    "advanced": 0,
                    "needTime": 0,
                    "needYield": 0,
                    "saveState": 1,
                    "restoreState": 0,
                    "isEOF": 0,
                    "keyPattern": {
                      "locationID._id": 1
                    },
                    "indexName": "locationID._id",
                    "collation": {
                      "locale": "en",
                      "caseLevel": false,
                      "caseFirst": "off",
                      "strength": 2,
                      "numericOrdering": false,
                      "alternate": "non-ignorable",
                      "maxVariable": "punct",
                      "normalization": false,
                      "backwards": false,
                      "version": "57.1"
                    },
                    "isMultiKey": false,
                    "multiKeyPaths": {
                      "locationID._id": []
                    },
                    "isUnique": false,
                    "isSparse": false,
                    "isPartial": false,
                    "indexVersion": 2,
                    "direction": "forward",
                    "indexBounds": {
                      "locationID._id": [
                        "[MinKey, MaxKey]"
                      ]
                    },
                    "keysExamined": 0,
                    "seeks": 0,
                    "dupsTested": 0,
                    "dupsDropped": 0
                  }
                },
                "allPlansExecution": []
              }
            },
            "nReturned": 0,
            "executionTimeMillisEstimate": 0
          },
          {
            "$group": {
              "_id": {
                "locationId": "$locationID._id"
              },
              "bookings": {
                "$push": "$$ROOT"
              }
            },
            "maxAccumulatorMemoryUsageBytes": {
              "bookings": 103598076
            },
            "totalOutputDataSizeBytes": 459907739,
            "usedDisk": true,
            "nReturned": 28829,
            "executionTimeMillisEstimate": 3775
          },
          {
            "$match": {
              "bookings": {
                "$elemMatch": {
                  "$and": [
                    {
                      "metadata.updatedAt": {
                        "$gte": {
                          "$date": {
                            "$numberLong": "1648563218046"
                          }
                        }
                      }
                    },
                    {
                      "metadata.updatedAt": {
                        "$lte": {
                          "$date": {
                            "$numberLong": "1648566000000"
                          }
                        }
                      }
                    }
                  ]
                }
              }
            },
            "nReturned": 0,
            "executionTimeMillisEstimate": 4250
          }
        ]
      },
      "nReturned": 660,
      "executionTimeMillisEstimate": 69954
    },
    {
      "$group": {
        "_id": "$_id.locationId",
        "bookings": {
          "$addToSet": "$bookings"
        }
      },
      "maxAccumulatorMemoryUsageBytes": {
        "bookings": 29217178
      },
      "totalOutputDataSizeBytes": 29370958,
      "usedDisk": false,
      "nReturned": 660,
      "executionTimeMillisEstimate": 70011
    },
    {
      "$facet": {
        "result": [
          {
            "$teeConsumer": {},
            "nReturned": 660,
            "executionTimeMillisEstimate": 70021
          },
          {
            "$group": {
              "_id": {
                "$const": null
              },
              "total": {
                "$sum": {
                  "$const": 1
                }
              }
            },
            "maxAccumulatorMemoryUsageBytes": {
              "total": 72
            },
            "totalOutputDataSizeBytes": 229,
            "usedDisk": false,
            "nReturned": 1,
            "executionTimeMillisEstimate": 70021
          },
          {
            "$project": {
              "total": true,
              "_id": false
            },
            "nReturned": 1,
            "executionTimeMillisEstimate": 70021
          }
        ],
        "data": [
          {
            "$teeConsumer": {},
            "nReturned": 660,
            "executionTimeMillisEstimate": 51
          },
          {
            "$sort": {
              "sortKey": {
                "_id": 1
              },
              "limit": 10
            },
            "totalDataSizeSortedBytesEstimate": 1100867,
            "usedDisk": false,
            "nReturned": 10,
            "executionTimeMillisEstimate": 51
          }
        ]
      },
      "nReturned": 1,
      "executionTimeMillisEstimate": 70072
    }
  ],
  "serverInfo": {
    "host": "st0cvm200117.internal-mongodb.de1.bosch-iot-cloud.com",
    "port": 30000,
    "version": "5.0.4",
    "gitVersion": "62a84ede3cc9a334e8bc82160714df71e7d3a29e"
  },
  "serverParameters": {
    "internalQueryFacetBufferSizeBytes": 104857600,
    "internalQueryFacetMaxOutputDocSizeBytes": 104857600,
    "internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
    "internalDocumentSourceGroupMaxMemoryBytes": 104857600,
    "internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
    "internalQueryProhibitBlockingMergeOnMongoS": 0,
    "internalQueryMaxAddToSetBytes": 104857600,
    "internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600
  },
  "command": {
    "aggregate": "bookingTariffKey",
    "pipeline": [
      {
        "$match": {
          "appId": "myApp"
        }
      },
      {
        "$lookup": {
          "from": "bookingCache",
          "localField": "bookingCacheId",
          "foreignField": "_id",
          "as": "bookingsWithTariff"
        }
      },
      {
        "$unwind": {
          "path": "$bookingsWithTariff"
        }
      },
      {
        "$group": {
          "_id": {
            "locationId": "$bookingsWithTariff.locationID._id"
          },
          "bookings": {
            "$push": "$bookingsWithTariff"
          }
        }
      },
      {
        "$match": {
          "bookings": {
            "$elemMatch": {
              "metadata.updatedAt": {
                "$gte": {
                  "$date": {
                    "$numberLong": "1617027218046"
                  }
                },
                "$lte": {
                  "$date": {
                    "$numberLong": "1648566000000"
                  }
                }
              }
            }
          }
        }
      },
      {
        "$unionWith": {
          "coll": "deletedbookingsCache",
          "pipeline": [
            {
              "$match": {
                "locationID._id": {
                  "$exists": true
                }
              }
            },
            {
              "$group": {
                "_id": {
                  "locationId": "$locationID._id"
                },
                "bookings": {
                  "$push": "$$ROOT"
                }
              }
            },
            {
              "$match": {
                "bookings": {
                  "$elemMatch": {
                    "metadata.updatedAt": {
                      "$gte": {
                        "$date": {
                          "$numberLong": "1648563218046"
                        }
                      },
                      "$lte": {
                        "$date": {
                          "$numberLong": "1648566000000"
                        }
                      }
                    }
                  }
                }
              }
            }
          ]
        }
      },
      {
        "$group": {
          "_id": "$_id.locationId",
          "bookings": {
            "$addToSet": "$bookings"
          }
        }
      },
      {
        "$facet": {
          "result": [
            {
              "$count": "total"
            }
          ],
          "data": [
            {
              "$sort": {
                "_id": 1
              }
            },
            {
              "$skip": 0
            },
            {
              "$limit": 10
            }
          ]
        }
      }
    ],
    "allowDiskUse": true,
    "cursor": {},
    "maxTimeMS": 60000,
    "$db": "c58d9a7a-786b-4e7f-8092-6ef9f6990f8e"
  },
  "ok": 1,
  "$clusterTime": {
    "clusterTime": {
      "$timestamp": {
        "t": 1665327827,
        "i": 8
      }
    },
    "signature": {
      "hash": {
        "$binary": {
          "base64": "EfUsMmgMURqthwu/ROmLDpz9vkg=",
          "subType": "00"
        }
      },
      "keyId": {
        "$numberLong": "7124917905849843910"
      }
    }
  },
  "operationTime": {
    "$timestamp": {
      "t": 1665327827,
      "i": 8
    }
  }
}

I know it is a lot, but I would really appreciate if you can help me improve the performance of this query.
Thank you in advance! :smile:

Hi @Alina_Bolindu, and welcome to the MongoDB Community forums! :wave:

After your $group stage, I see you’re doing a $match on a very narrow time period range. Could the query be rewritten in a way to do that $match first, even if that means you have to perform your $lookup the other way? With the amount of data being pushed through the pipeline it would be hard for us to simulate and test other options to see what might help.

3 Likes

Or maybe even move the "$match" into the first "$lookup" with a "pipeline"?

2 Likes

Hi @Alina_Bolindu and welcome to the MongoDB community!!

It would be very helpful if you could help me with a few details for better understanding

  1. A sample document for the collections which could help me replicate in my local environment.
  2. MongoDB version you are using.
  3. The indexes being used for the query.

Generally the query performance you can consider the documentation on Optimising the Query Performance

Also, for the query, as @Doug_Duncan mentions, if the query can be modified in such a way that the $match can be used only once at mostly at the first stage of the pipeline as it would filter out the collection based on the condition.

Let us know if you have any further concerns.

Best Regards
Aasawari