$count with $lookup performance

Hi,

I wonder if there is a way to improve the performance of a $count aggregation pipeline that also uses $lookup.
This is a simplified scenario of my case:
Query is running on the “orders” collection, I want to count all orders that belongs to users from a specific “ug” (user group).
(in the real query I have 1 more $lookup)

Thx!

[
  {
    $lookup: {
      from: "users",
      localField: "user",
      foreignField: "_id",
      let: {},
      pipeline: [
        {$project: {_id:1, ug: 1}}
      ],
      as: "user"
    }
  },
  {
    $addFields: {
      ug: { $arrayElemAt: ["$user.ug", 0] },
    }
  },
  { $match: {
    ug: 1
  }},
  { $count: "total" }
]

Here is the “explain” output if relevant:
(this is data from dev, in production there are many more docs).

{
  "explainVersion": "1",
  "stages": [
    {
      "$cursor": {
        "queryPlanner": {
          "namespace": "mydb.orders",
          "parsedQuery": {},
          "indexFilterSet": false,
          "queryHash": "B06B32BC",
          "planCacheShapeHash": "B06B32BC",
          "planCacheKey": "38AED6D4",
          "optimizationTimeMillis": 0,
          "maxIndexedOrSolutionsReached": false,
          "maxIndexedAndSolutionsReached": false,
          "maxScansToExplodeReached": false,
          "prunedSimilarIndexes": false,
          "winningPlan": {
            "isCached": false,
            "stage": "PROJECTION_SIMPLE",
            "transformBy": {
              "user": 1,
              "_id": 0
            },
            "inputStage": {
              "stage": "COLLSCAN",
              "direction": "forward"
            }
          },
          "rejectedPlans": []
        },
        "executionStats": {
          "executionSuccess": true,
          "nReturned": 10825,
          "executionTimeMillis": 1180,
          "totalKeysExamined": 0,
          "totalDocsExamined": 10825,
          "executionStages": {
            "isCached": false,
            "stage": "PROJECTION_SIMPLE",
            "nReturned": 10825,
            "executionTimeMillisEstimate": 3,
            "works": 10826,
            "advanced": 10825,
            "needTime": 0,
            "needYield": 0,
            "saveState": 15,
            "restoreState": 15,
            "isEOF": 1,
            "transformBy": {
              "user": 1,
              "_id": 0
            },
            "inputStage": {
              "stage": "COLLSCAN",
              "nReturned": 10825,
              "executionTimeMillisEstimate": 1,
              "works": 10826,
              "advanced": 10825,
              "needTime": 0,
              "needYield": 0,
              "saveState": 15,
              "restoreState": 15,
              "isEOF": 1,
              "direction": "forward",
              "docsExamined": 10825
            }
          }
        }
      },
      "nReturned": 10825,
      "executionTimeMillisEstimate": 5
    },
    {
      "$lookup": {
        "from": "users",
        "as": "user",
        "localField": "user",
        "foreignField": "_id",
        "let": {},
        "pipeline": [
          { "$project": { "_id": 1, "ug": 1 } }
        ]
      },
      "totalDocsExamined": 2912,
      "totalKeysExamined": 2912,
      "collectionScans": 0,
      "indexesUsed": ["_id_"],
      "nReturned": 10825,
      "executionTimeMillisEstimate": 1180
    },
    {
      "$addFields": {
        "ug": {
          "$arrayElemAt": [
            "$user.ug",
            { "$const": 0 }
          ]
        }
      },
      "nReturned": 10825,
      "executionTimeMillisEstimate": 1180
    },
    {
      "$match": { "ug": { "$eq": 1 } },
      "nReturned": 2909,
      "executionTimeMillisEstimate": 1180
    },
    {
      "$group": {
        "_id": { "$const": null },
        "total": { "$sum": { "$const": 1 } }
      },
      "maxAccumulatorMemoryUsageBytes": {
        "total": 128
      },
      "totalOutputDataSizeBytes": 245,
      "usedDisk": false,
      "spills": 0,
      "spilledDataStorageSize": 0,
      "numBytesSpilledEstimate": 0,
      "spilledRecords": 0,
      "nReturned": 1,
      "executionTimeMillisEstimate": 1180
    },
    {
      "$project": { "total": true, "_id": false },
      "nReturned": 1,
      "executionTimeMillisEstimate": 1180
    }
  ],
  "queryShapeHash": "8E9379FA7A90636A5C842A0B5C8A40306DFC9DD12ED99B9543A097D163325573",
  "serverInfo": {
    "host": "atlas-xxxx.ckupt.mongodb.net",
    "port": 27017,
    "version": "8.0.11",
    "gitVersion": "bed99f699da6cb2b74262aa6d473446c41476643"
  },
  "serverParameters": {
    "internalQueryFacetBufferSizeBytes": 104857600,
    "internalQueryFacetMaxOutputDocSizeBytes": 104857600,
    "internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
    "internalDocumentSourceGroupMaxMemoryBytes": 104857600,
    "internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
    "internalQueryProhibitBlockingMergeOnMongoS": 0,
    "internalQueryMaxAddToSetBytes": 104857600,
    "internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600,
    "internalQueryFrameworkControl": "trySbeRestricted",
    "internalQueryPlannerIgnoreIndexWithCollationForRegex": 1
  },
  "command": {
    "aggregate": "orders",
    "pipeline": [
      {
        "$lookup": {
          "from": "users",
          "localField": "user",
          "foreignField": "_id",
          "let": {},
          "pipeline": [
            { "$project": { "_id": 1, "ug": 1 } }
          ],
          "as": "user"
        }
      },
      {
        "$addFields": {
          "ug": {
            "$arrayElemAt": ["$user.ug", 0]
          }
        }
      },
      { "$match": { "ug": 1 } },
      { "$count": "total" }
    ],
    "cursor": {},
    "maxTimeMS": 60000,
    "$db": "mydb"
  },
  "ok": 1,
  "$clusterTime": {
    "clusterTime": {
      "$timestamp": "7532205600080396301"
    },
    "signature": {
      "hash": "QJ8QDXK+OmmZJOPQw8B67jQO2yQ=",
      "keyId": {
        "low": 1,
        "high": 1745258851,
        "unsigned": false
      }
    }
  },
  "operationTime": {
    "$timestamp": "7532205600080396301"
  }
}

what is that lookup? It’s better to show the real query, as suggestions for this one may be different.

Looking at just what you shared, it may be faster if you create index on ug in users collection, index on user in orders collection. Start aggregation on user collection with $match on ug:1 at the beginning of the pipeline, then do lookup from orders.
smth like

[{$match: {ug:1}},
 {$lookup: {
   from: 'orders',
   localField: '_id',
   foreignField: 'user',
   as: 'orders'
 }},
 {$group: {
   _id: null,
   fieldN: {
     $sum: {$size: '$orders'}
   }
 }}
]
1 Like

Thanks,

I tried this revised aggregate and it won’t even finish, I get a timeout “PlanExecutor error during aggregation :: caused by :: operation exceeded time limit”

So this got me thinking that something is wrong with the DB, it was just upgraded from 6 to 7 and from 7 to 8.
I exported the data and imported it to a side db and viola, no issues there, this is quite upsetting !