Can this query be more performant?

Hello :smile:

I am kind of new to mongoDB (I just implemented my first complex query) and I am stuck on a quite common problem: the performance of groupBy action (at least, this is what I think is the problem).

Let me describe a bit the context:
I have 3 collections: bookingTariffKey, bookingCache and deletedBookingsCache

bookingTariffKey contains about 2M documents, but they are not complex
bookingCache contains about 600k documents
deletedBookingsCache contains about 82k documents
bookingCache and deletedBookingsCache have the same stucture, but the documentes are quite complex (with many fields, arrays …)
I have indexes for the most-used fields.

The scope of my query is to find all the exisitng bookings (the ones from bookingCache) which have a tariff (lookup in bookingTariffKey) and respect a time condition plus all the deleted bookings (the ones from deletedBookingsCache) that respect the same time condition.

And this is how the query looks like:

[{
 $match: {
  appId: 'myApp'
 }
}, {
 $lookup: {
  from: 'bookingCache',
  localField: 'bookingCacheId',
  foreignField: '_id',
  as: 'bookingsWithTariff'
 }
}, {
 $unwind: {
  path: '$bookingsWithTariff'
 }
}, {
 $group: {
  _id: {
   locationId: '$bookingsWithTariff.locationID._id'
  },
  bookings: {
   $push: '$bookingsWithTariff'
  }}
}, {
 $match: {
  bookings: {
   $elemMatch: {
    'metadata.updatedAt': {
     $gte: ISODate('2021-03-29T14:13:38.046Z'),
     $lte: ISODate('2022-03-29T15:00:00.000Z')
    }}}}
}, {
 $unionWith: {
  coll: 'deletedbookingsCache',
  pipeline: [{
    $match: {
     'locationID._id': {
      $exists: true
     }}
   },{
    $group: {
     _id: {
      locationId: '$locationID._id'
     },
     bookings: {
      $push: '$$ROOT'
     }}
   },{
    $match: {
     bookings: {
      $elemMatch: {
       'metadata.updatedAt': {
        $gte: ISODate('2022-03-29T14:13:38.046Z'),
        $lte: ISODate('2022-03-29T15:00:00.000Z')
       }}}}}]}
}, {
 $group: {
  _id: '$_id.locationId',
  bookings: {
   $addToSet: '$bookings'
  }}
}, {
 $facet: {
  result: [{
    $count: 'total'
   }],
  data: [{
    $sort: {
     _id: 1
    }
   },{
    $skip: 0
   },{
    $limit: 10
   }]}}]

And this is the explain:

{
  "explainVersion": "1",
  "stages": [
    {
      "$cursor": {
        "queryPlanner": {
          "namespace": "c58d9a7a-786b-4e7f-8092-6ef9f6990f8e.bookingTariffKey",
          "indexFilterSet": false,
          "parsedQuery": {
            "appId": {
              "$eq": "myApp"
            }
          },
          "queryHash": "B196AC43",
          "planCacheKey": "3CEC6D3F",
          "maxIndexedOrSolutionsReached": false,
          "maxIndexedAndSolutionsReached": false,
          "maxScansToExplodeReached": false,
          "winningPlan": {
            "stage": "PROJECTION_SIMPLE",
            "transformBy": {
              "bookingCacheId": 1,
              "bookingsWithTariff": 1,
              "_id": 0
            },
            "inputStage": {
              "stage": "FETCH",
              "inputStage": {
                "stage": "IXSCAN",
                "keyPattern": {
                  "appId": 1
                },
                "indexName": "appId",
                "isMultiKey": false,
                "multiKeyPaths": {
                  "appId": []
                },
                "isUnique": false,
                "isSparse": false,
                "isPartial": false,
                "indexVersion": 2,
                "direction": "forward",
                "indexBounds": {
                  "appId": [
                    "[\"myApp\", \"myApp\"]"
                  ]
                }
              }
            }
          },
          "rejectedPlans": []
        },
        "executionStats": {
          "executionSuccess": true,
          "nReturned": 368473,
          "executionTimeMillis": 70076,
          "totalKeysExamined": 368473,
          "totalDocsExamined": 368473,
          "executionStages": {
            "stage": "PROJECTION_SIMPLE",
            "nReturned": 368473,
            "executionTimeMillisEstimate": 429,
            "works": 368474,
            "advanced": 368473,
            "needTime": 0,
            "needYield": 0,
            "saveState": 384,
            "restoreState": 384,
            "isEOF": 1,
            "transformBy": {
              "bookingCacheId": 1,
              "bookingsWithTariff": 1,
              "_id": 0
            },
            "inputStage": {
              "stage": "FETCH",
              "nReturned": 368473,
              "executionTimeMillisEstimate": 398,
              "works": 368474,
              "advanced": 368473,
              "needTime": 0,
              "needYield": 0,
              "saveState": 384,
              "restoreState": 384,
              "isEOF": 1,
              "docsExamined": 368473,
              "alreadyHasObj": 0,
              "inputStage": {
                "stage": "IXSCAN",
                "nReturned": 368473,
                "executionTimeMillisEstimate": 107,
                "works": 368474,
                "advanced": 368473,
                "needTime": 0,
                "needYield": 0,
                "saveState": 384,
                "restoreState": 384,
                "isEOF": 1,
                "keyPattern": {
                  "appId": 1
                },
                "indexName": "appId",
                "isMultiKey": false,
                "multiKeyPaths": {
                  "appId": []
                },
                "isUnique": false,
                "isSparse": false,
                "isPartial": false,
                "indexVersion": 2,
                "direction": "forward",
                "indexBounds": {
                  "appId": [
                    "[\"myApp\", \"myApp\"]"
                  ]
                },
                "keysExamined": 368473,
                "seeks": 1,
                "dupsTested": 0,
                "dupsDropped": 0
              }
            }
          },
          "allPlansExecution": []
        }
      },
      "nReturned": 368473,
      "executionTimeMillisEstimate": 1211
    },
    {
      "$lookup": {
        "from": "bookingCache",
        "as": "bookingsWithTariff",
        "localField": "bookingCacheId",
        "foreignField": "_id",
        "unwinding": {
          "preserveNullAndEmptyArrays": false
        }
      },
      "nReturned": 367963,
      "executionTimeMillisEstimate": 45117
    },
    {
      "$group": {
        "_id": {
          "locationId": "$bookingsWithTariff.locationID._id"
        },
        "bookings": {
          "$push": "$bookingsWithTariff"
        }
      },
      "maxAccumulatorMemoryUsageBytes": {
        "bookings": 102766973
      },
      "totalOutputDataSizeBytes": {
        "$numberLong": "2218104218"
      },
      "usedDisk": true,
      "nReturned": 131365,
      "executionTimeMillisEstimate": 63719
    },
    {
      "$match": {
        "bookings": {
          "$elemMatch": {
            "$and": [
              {
                "metadata.updatedAt": {
                  "$gte": {
                    "$date": {
                      "$numberLong": "1617027218046"
                    }
                  }
                }
              },
              {
                "metadata.updatedAt": {
                  "$lte": {
                    "$date": {
                      "$numberLong": "1648566000000"
                    }
                  }
                }
              }
            ]
          }
        }
      },
      "nReturned": 660,
      "executionTimeMillisEstimate": 65703
    },
    {
      "$unionWith": {
        "coll": "deletedbookingsCache",
        "pipeline": [
          {
            "$cursor": {
              "queryPlanner": {
                "namespace": "c58d9a7a-786b-4e7f-8092-6ef9f6990f8e.deletedbookingsCache",
                "indexFilterSet": false,
                "parsedQuery": {
                  "locationID._id": {
                    "$exists": true
                  }
                },
                "queryHash": "E5759F8E",
                "planCacheKey": "00C12082",
                "maxIndexedOrSolutionsReached": false,
                "maxIndexedAndSolutionsReached": false,
                "maxScansToExplodeReached": false,
                "winningPlan": {
                  "stage": "FETCH",
                  "filter": {
                    "locationID._id": {
                      "$exists": true
                    }
                  },
                  "inputStage": {
                    "stage": "IXSCAN",
                    "keyPattern": {
                      "locationID._id": 1
                    },
                    "indexName": "locationID._id",
                    "collation": {
                      "locale": "en",
                      "caseLevel": false,
                      "caseFirst": "off",
                      "strength": 2,
                      "numericOrdering": false,
                      "alternate": "non-ignorable",
                      "maxVariable": "punct",
                      "normalization": false,
                      "backwards": false,
                      "version": "57.1"
                    },
                    "isMultiKey": false,
                    "multiKeyPaths": {
                      "locationID._id": []
                    },
                    "isUnique": false,
                    "isSparse": false,
                    "isPartial": false,
                    "indexVersion": 2,
                    "direction": "forward",
                    "indexBounds": {
                      "locationID._id": [
                        "[MinKey, MaxKey]"
                      ]
                    }
                  }
                },
                "rejectedPlans": []
              },
              "executionStats": {
                "executionSuccess": true,
                "nReturned": 0,
                "executionTimeMillis": 70076,
                "totalKeysExamined": 0,
                "totalDocsExamined": 0,
                "executionStages": {
                  "stage": "FETCH",
                  "filter": {
                    "locationID._id": {
                      "$exists": true
                    }
                  },
                  "nReturned": 0,
                  "executionTimeMillisEstimate": 0,
                  "works": 0,
                  "advanced": 0,
                  "needTime": 0,
                  "needYield": 0,
                  "saveState": 1,
                  "restoreState": 0,
                  "isEOF": 0,
                  "docsExamined": 0,
                  "alreadyHasObj": 0,
                  "inputStage": {
                    "stage": "IXSCAN",
                    "nReturned": 0,
                    "executionTimeMillisEstimate": 0,
                    "works": 0,
                    "advanced": 0,
                    "needTime": 0,
                    "needYield": 0,
                    "saveState": 1,
                    "restoreState": 0,
                    "isEOF": 0,
                    "keyPattern": {
                      "locationID._id": 1
                    },
                    "indexName": "locationID._id",
                    "collation": {
                      "locale": "en",
                      "caseLevel": false,
                      "caseFirst": "off",
                      "strength": 2,
                      "numericOrdering": false,
                      "alternate": "non-ignorable",
                      "maxVariable": "punct",
                      "normalization": false,
                      "backwards": false,
                      "version": "57.1"
                    },
                    "isMultiKey": false,
                    "multiKeyPaths": {
                      "locationID._id": []
                    },
                    "isUnique": false,
                    "isSparse": false,
                    "isPartial": false,
                    "indexVersion": 2,
                    "direction": "forward",
                    "indexBounds": {
                      "locationID._id": [
                        "[MinKey, MaxKey]"
                      ]
                    },
                    "keysExamined": 0,
                    "seeks": 0,
                    "dupsTested": 0,
                    "dupsDropped": 0
                  }
                },
                "allPlansExecution": []
              }
            },
            "nReturned": 0,
            "executionTimeMillisEstimate": 0
          },
          {
            "$group": {
              "_id": {
                "locationId": "$locationID._id"
              },
              "bookings": {
                "$push": "$$ROOT"
              }
            },
            "maxAccumulatorMemoryUsageBytes": {
              "bookings": 103598076
            },
            "totalOutputDataSizeBytes": 459907739,
            "usedDisk": true,
            "nReturned": 28829,
            "executionTimeMillisEstimate": 3775
          },
          {
            "$match": {
              "bookings": {
                "$elemMatch": {
                  "$and": [
                    {
                      "metadata.updatedAt": {
                        "$gte": {
                          "$date": {
                            "$numberLong": "1648563218046"
                          }
                        }
                      }
                    },
                    {
                      "metadata.updatedAt": {
                        "$lte": {
                          "$date": {
                            "$numberLong": "1648566000000"
                          }
                        }
                      }
                    }
                  ]
                }
              }
            },
            "nReturned": 0,
            "executionTimeMillisEstimate": 4250
          }
        ]
      },
      "nReturned": 660,
      "executionTimeMillisEstimate": 69954
    },
    {
      "$group": {
        "_id": "$_id.locationId",
        "bookings": {
          "$addToSet": "$bookings"
        }
      },
      "maxAccumulatorMemoryUsageBytes": {
        "bookings": 29217178
      },
      "totalOutputDataSizeBytes": 29370958,
      "usedDisk": false,
      "nReturned": 660,
      "executionTimeMillisEstimate": 70011
    },
    {
      "$facet": {
        "result": [
          {
            "$teeConsumer": {},
            "nReturned": 660,
            "executionTimeMillisEstimate": 70021
          },
          {
            "$group": {
              "_id": {
                "$const": null
              },
              "total": {
                "$sum": {
                  "$const": 1
                }
              }
            },
            "maxAccumulatorMemoryUsageBytes": {
              "total": 72
            },
            "totalOutputDataSizeBytes": 229,
            "usedDisk": false,
            "nReturned": 1,
            "executionTimeMillisEstimate": 70021
          },
          {
            "$project": {
              "total": true,
              "_id": false
            },
            "nReturned": 1,
            "executionTimeMillisEstimate": 70021
          }
        ],
        "data": [
          {
            "$teeConsumer": {},
            "nReturned": 660,
            "executionTimeMillisEstimate": 51
          },
          {
            "$sort": {
              "sortKey": {
                "_id": 1
              },
              "limit": 10
            },
            "totalDataSizeSortedBytesEstimate": 1100867,
            "usedDisk": false,
            "nReturned": 10,
            "executionTimeMillisEstimate": 51
          }
        ]
      },
      "nReturned": 1,
      "executionTimeMillisEstimate": 70072
    }
  ],
  "serverInfo": {
    "host": "st0cvm200117.internal-mongodb.de1.bosch-iot-cloud.com",
    "port": 30000,
    "version": "5.0.4",
    "gitVersion": "62a84ede3cc9a334e8bc82160714df71e7d3a29e"
  },
  "serverParameters": {
    "internalQueryFacetBufferSizeBytes": 104857600,
    "internalQueryFacetMaxOutputDocSizeBytes": 104857600,
    "internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
    "internalDocumentSourceGroupMaxMemoryBytes": 104857600,
    "internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
    "internalQueryProhibitBlockingMergeOnMongoS": 0,
    "internalQueryMaxAddToSetBytes": 104857600,
    "internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600
  },
  "command": {
    "aggregate": "bookingTariffKey",
    "pipeline": [
      {
        "$match": {
          "appId": "myApp"
        }
      },
      {
        "$lookup": {
          "from": "bookingCache",
          "localField": "bookingCacheId",
          "foreignField": "_id",
          "as": "bookingsWithTariff"
        }
      },
      {
        "$unwind": {
          "path": "$bookingsWithTariff"
        }
      },
      {
        "$group": {
          "_id": {
            "locationId": "$bookingsWithTariff.locationID._id"
          },
          "bookings": {
            "$push": "$bookingsWithTariff"
          }
        }
      },
      {
        "$match": {
          "bookings": {
            "$elemMatch": {
              "metadata.updatedAt": {
                "$gte": {
                  "$date": {
                    "$numberLong": "1617027218046"
                  }
                },
                "$lte": {
                  "$date": {
                    "$numberLong": "1648566000000"
                  }
                }
              }
            }
          }
        }
      },
      {
        "$unionWith": {
          "coll": "deletedbookingsCache",
          "pipeline": [
            {
              "$match": {
                "locationID._id": {
                  "$exists": true
                }
              }
            },
            {
              "$group": {
                "_id": {
                  "locationId": "$locationID._id"
                },
                "bookings": {
                  "$push": "$$ROOT"
                }
              }
            },
            {
              "$match": {
                "bookings": {
                  "$elemMatch": {
                    "metadata.updatedAt": {
                      "$gte": {
                        "$date": {
                          "$numberLong": "1648563218046"
                        }
                      },
                      "$lte": {
                        "$date": {
                          "$numberLong": "1648566000000"
                        }
                      }
                    }
                  }
                }
              }
            }
          ]
        }
      },
      {
        "$group": {
          "_id": "$_id.locationId",
          "bookings": {
            "$addToSet": "$bookings"
          }
        }
      },
      {
        "$facet": {
          "result": [
            {
              "$count": "total"
            }
          ],
          "data": [
            {
              "$sort": {
                "_id": 1
              }
            },
            {
              "$skip": 0
            },
            {
              "$limit": 10
            }
          ]
        }
      }
    ],
    "allowDiskUse": true,
    "cursor": {},
    "maxTimeMS": 60000,
    "$db": "c58d9a7a-786b-4e7f-8092-6ef9f6990f8e"
  },
  "ok": 1,
  "$clusterTime": {
    "clusterTime": {
      "$timestamp": {
        "t": 1665327827,
        "i": 8
      }
    },
    "signature": {
      "hash": {
        "$binary": {
          "base64": "EfUsMmgMURqthwu/ROmLDpz9vkg=",
          "subType": "00"
        }
      },
      "keyId": {
        "$numberLong": "7124917905849843910"
      }
    }
  },
  "operationTime": {
    "$timestamp": {
      "t": 1665327827,
      "i": 8
    }
  }
}

I know it is a lot, but I would really appreciate if you can help me improve the performance of this query.
Thank you in advance! :smile:

Hi @Alina_Bolindu, and welcome to the MongoDB Community forums! :wave:

After your $group stage, I see you’re doing a $match on a very narrow time period range. Could the query be rewritten in a way to do that $match first, even if that means you have to perform your $lookup the other way? With the amount of data being pushed through the pipeline it would be hard for us to simulate and test other options to see what might help.

3 Likes

Or maybe even move the "$match" into the first "$lookup" with a "pipeline"?

2 Likes

Hi @Alina_Bolindu and welcome to the MongoDB community!!

It would be very helpful if you could help me with a few details for better understanding

  1. A sample document for the collections which could help me replicate in my local environment.
  2. MongoDB version you are using.
  3. The indexes being used for the query.

Generally the query performance you can consider the documentation on Optimising the Query Performance

Also, for the query, as @Doug_Duncan mentions, if the query can be modified in such a way that the $match can be used only once at mostly at the first stage of the pipeline as it would filter out the collection based on the condition.

Let us know if you have any further concerns.

Best Regards
Aasawari

This topic was automatically closed 60 days after the last reply. New replies are no longer allowed.