MongoDB 4.2 (and 4.4) aggregate is doing $sort before $match resulting in a sort of the whole collection and poor performance

Dear all,

We are having issues with the aggregate pipeline doing optimization steps which harm the queries performance. It is choosing to do the $sort before the $match stage (my assumption is because the $sort has a better index than the $match stage) which is causing the query to sort the whole database instead of a filtered set.

I have added examples with explain executionStats below to provide more detail. The first example uses an $sort on _id which is indexed making it the first stage in the aggregate. The second example uses an $sort on header.receiverNode (own created field) which is also indexed.
The first example using _id is doing $sort first while the second example is doing the $match first.

db.records.explain("executionStats").aggregate([
   {
      "$match":{
         "$and":[
            {
               "status":"new",
               "messageDeleted":{
                  "$exists":false
               }
            },
            {
               "header.receiverNode":{
                  "$in":[
                     "224572"
                  ]
               }
            }
         ]
      }
   },
   {
      "$sort":{
         "_id":1
      }
   },
   {
      "$limit":100
   }
]");"{
   "stages":[
      {
         "$cursor":{
            "query":{
               "$and":[
                  {
                     "status":"new",
                     "messageDeleted":{
                        "$exists":false
                     }
                  },
                  {
                     "header.receiverNode":{
                        "$in":[
                           "224572"
                        ]
                     }
                  }
               ]
            },
            "sort":{
               "_id":1
            },
            "limit":NumberLong(100),
            "queryPlanner":{
               "plannerVersion":1,
               "namespace":"hdnapi.records",
               "indexFilterSet":false,
               "parsedQuery":{
                  "$and":[
                     {
                        "header.receiverNode":{
                           "$eq":"224572"
                        }
                     },
                     {
                        "status":{
                           "$eq":"new"
                        }
                     },
                     {
                        "messageDeleted":{
                           "$not":{
                              "$exists":true
                           }
                        }
                     }
                  ]
               },
               "queryHash":"6D467E3F",
               "planCacheKey":"28A44C04",
               "winningPlan":{
                  "stage":"FETCH",
                  "filter":{
                     "$and":[
                        {
                           "header.receiverNode":{
                              "$eq":"224572"
                           }
                        },
                        {
                           "status":{
                              "$eq":"new"
                           }
                        },
                        {
                           "messageDeleted":{
                              "$not":{
                                 "$exists":true
                              }
                           }
                        }
                     ]
                  },
                  "inputStage":{
                     "stage":"IXSCAN",
                     "keyPattern":{
                        "_id":1
                     },
                     "indexName":"_id_",
                     "isMultiKey":false,
                     "multiKeyPaths":{
                        "_id":[
                           
                        ]
                     },
                     "isUnique":true,
                     "isSparse":false,
                     "isPartial":false,
                     "indexVersion":2,
                     "direction":"forward",
                     "indexBounds":{
                        "_id":[
                           "[MinKey, MaxKey]"
                        ]
                     }
                  }
               },
               "rejectedPlans":[
                  
               ]
            },
            "executionStats":{
               "executionSuccess":true,
               "nReturned":100,
               "executionTimeMillis":23729,
               "totalKeysExamined":2530452,
               "totalDocsExamined":2530452,
               "executionStages":{
                  "stage":"FETCH",
                  "filter":{
                     "$and":[
                        {
                           "header.receiverNode":{
                              "$eq":"224572"
                           }
                        },
                        {
                           "status":{
                              "$eq":"new"
                           }
                        },
                        {
                           "messageDeleted":{
                              "$not":{
                                 "$exists":true
                              }
                           }
                        }
                     ]
                  },
                  "nReturned":100,
                  "executionTimeMillisEstimate":6277,
                  "works":2530452,
                  "advanced":100,
                  "needTime":2530352,
                  "needYield":0,
                  "saveState":19838,
                  "restoreState":19838,
                  "isEOF":0,
                  "docsExamined":2530452,
                  "alreadyHasObj":0,
                  "inputStage":{
                     "stage":"IXSCAN",
                     "nReturned":2530452,
                     "executionTimeMillisEstimate":481,
                     "works":2530452,
                     "advanced":2530452,
                     "needTime":0,
                     "needYield":0,
                     "saveState":19838,
                     "restoreState":19838,
                     "isEOF":0,
                     "keyPattern":{
                        "_id":1
                     },
                     "indexName":"_id_",
                     "isMultiKey":false,
                     "multiKeyPaths":{
                        "_id":[
                           
                        ]
                     },
                     "isUnique":true,
                     "isSparse":false,
                     "isPartial":false,
                     "indexVersion":2,
                     "direction":"forward",
                     "indexBounds":{
                        "_id":[
                           "[MinKey, MaxKey]"
                        ]
                     },
                     "keysExamined":2530452,
                     "seeks":1,
                     "dupsTested":0,
                     "dupsDropped":0
                  }
               }
            }
         }
      }
   ],
   "serverInfo":{
      "host":"cse04-mongo",
      "port":27017,
      "version":"4.2.20",
      "gitVersion":"15c0712952c356cb711c13a42cb3bca8617d4ebc"
   },
   "ok":1,
   "$clusterTime":{
      "clusterTime":Timestamp(1666614215,
      1),
      "signature":{
         "hash":BinData(0,
         "AAAAAAAAAAAAAAAAAAAAAAAAAAA="")",
         "keyId":NumberLong(0)
      }
   },
   "operationTime":Timestamp(1666614215,
   1)
}

Here we can see “totalKeysExamined” : 2530452 which is the same number as the amount of records in the database. And for total time “executionTimeMillis”:23729.

db.records.explain("executionStats").aggregate([
   {
      "$match":{
         "$and":[
            {
               "status":"new",
               "messageDeleted":{
                  "$exists":false
               }
            },
            {
               "header.receiverNode":{
                  "$in":[
                     "224572"
                  ]
               }
            }
         ]
      }
   },
   {
      "$sort":{
         "header.receiverNode":1
      }
   },
   {
      "$limit":100
   }
]");"{
   "stages":[
      {
         "$cursor":{
            "query":{
               "$and":[
                  {
                     "status":"new",
                     "messageDeleted":{
                        "$exists":false
                     }
                  },
                  {
                     "header.receiverNode":{
                        "$in":[
                           "224572"
                        ]
                     }
                  }
               ]
            },
            "sort":{
               "header.receiverNode":1
            },
            "limit":NumberLong(100),
            "queryPlanner":{
               "plannerVersion":1,
               "namespace":"hdnapi.records",
               "indexFilterSet":false,
               "parsedQuery":{
                  "$and":[
                     {
                        "header.receiverNode":{
                           "$eq":"224572"
                        }
                     },
                     {
                        "status":{
                           "$eq":"new"
                        }
                     },
                     {
                        "messageDeleted":{
                           "$not":{
                              "$exists":true
                           }
                        }
                     }
                  ]
               },
               "queryHash":"CBB00445",
               "planCacheKey":"8B2613FA",
               "winningPlan":{
                  "stage":"FETCH",
                  "filter":{
                     "$and":[
                        {
                           "status":{
                              "$eq":"new"
                           }
                        },
                        {
                           "messageDeleted":{
                              "$not":{
                                 "$exists":true
                              }
                           }
                        }
                     ]
                  },
                  "inputStage":{
                     "stage":"IXSCAN",
                     "keyPattern":{
                        "header.receiverNode":1
                     },
                     "indexName":"header.receiverNode_1",
                     "isMultiKey":false,
                     "multiKeyPaths":{
                        "header.receiverNode":[
                           
                        ]
                     },
                     "isUnique":false,
                     "isSparse":false,
                     "isPartial":false,
                     "indexVersion":2,
                     "direction":"forward",
                     "indexBounds":{
                        "header.receiverNode":[
                           "[\"224572\", \"224572\"]"
                        ]
                     }
                  }
               },
               "rejectedPlans":[
                  
               ]
            },
            "executionStats":{
               "executionSuccess":true,
               "nReturned":100,
               "executionTimeMillis":21,
               "totalKeysExamined":1316,
               "totalDocsExamined":1316,
               "executionStages":{
                  "stage":"FETCH",
                  "filter":{
                     "$and":[
                        {
                           "status":{
                              "$eq":"new"
                           }
                        },
                        {
                           "messageDeleted":{
                              "$not":{
                                 "$exists":true
                              }
                           }
                        }
                     ]
                  },
                  "nReturned":100,
                  "executionTimeMillisEstimate":9,
                  "works":1316,
                  "advanced":100,
                  "needTime":1216,
                  "needYield":0,
                  "saveState":11,
                  "restoreState":11,
                  "isEOF":0,
                  "docsExamined":1316,
                  "alreadyHasObj":0,
                  "inputStage":{
                     "stage":"IXSCAN",
                     "nReturned":1316,
                     "executionTimeMillisEstimate":0,
                     "works":1316,
                     "advanced":1316,
                     "needTime":0,
                     "needYield":0,
                     "saveState":11,
                     "restoreState":11,
                     "isEOF":0,
                     "keyPattern":{
                        "header.receiverNode":1
                     },
                     "indexName":"header.receiverNode_1",
                     "isMultiKey":false,
                     "multiKeyPaths":{
                        "header.receiverNode":[
                           
                        ]
                     },
                     "isUnique":false,
                     "isSparse":false,
                     "isPartial":false,
                     "indexVersion":2,
                     "direction":"forward",
                     "indexBounds":{
                        "header.receiverNode":[
                           "[\"224572\", \"224572\"]"
                        ]
                     },
                     "keysExamined":1316,
                     "seeks":1,
                     "dupsTested":0,
                     "dupsDropped":0
                  }
               }
            }
         }
      }
   ],
   "serverInfo":{
      "host":"cse04-mongo",
      "port":27017,
      "version":"4.2.20",
      "gitVersion":"15c0712952c356cb711c13a42cb3bca8617d4ebc"
   },
   "ok":1,
   "$clusterTime":{
      "clusterTime":Timestamp(1666616755,
      1),
      "signature":{
         "hash":BinData(0,
         "AAAAAAAAAAAAAAAAAAAAAAAAAAA="")",
         "keyId":NumberLong(0)
      }
   },
   "operationTime":Timestamp(1666616755,
   1)
}

Here we can see “totalKeysExamined”: 1316 which is the matched subset. And for total time “executionTimeMillis”:21.

So the same query is behaving different with a sort on _id or “header.receiverNode”. The query with the sort on _id is doing the sort first which leads to teriible performance and the query with “header.receiverNode” is working just fine.

What is causing the different behaviour between these queries? Why is the first query doing a sort before the match?

Hello,

The query sorting on “header.receiverNode” is using the index on “header.receiverNode” for both filtering and sorting which is ideal.

On the other hand when you filter with “header.receiverNode” and sort on “_id” ( which is not part of the query filters ) this means that the optimizer needs to use a separate index for sorting other than the one filtering the data in the $match stage.

When the optimizer needs to use more than one index, it’s called index intersection, However, it doesn’t work when the sort operation requires an index completely separate from the query predicate. and that would explain why when you sort with “_id” the performance degrades as it’s not using the selective index on “header.receiverNode” which in your case matched only 1316 keys.

I can see that this behaviour is consistent and reproducible as I did in my testing environment even with 2 records inserted:

Atlas atlas-whhaeg-shard-0 [primary] test>  db.records.insertOne({"status":"new","header":{"receiverNode":"224572"}})
{
  acknowledged: true,
  insertedId: ObjectId("6356a8a24481e6bdd5090f75")
}
Atlas atlas-whhaeg-shard-0 [primary] test>  db.records.insertOne({"status":"new","header":{"receiverNode":"224572"}})
{
  acknowledged: true,
  insertedId: ObjectId("6356a8a54481e6bdd5090f76")
}
Atlas atlas-whhaeg-shard-0 [primary] test>  db.records.createIndex({"header.receiverNode":1})
header.receiverNode_1
Atlas atlas-whhaeg-shard-0 [primary] test>  db.records.explain("executionStats").aggregate([ { "$match": { "$and": [ { "status": "new", "messageDeleted": { "$exists": false } }, { "header.receiverNode": { "$in": [ "224572"] } }] } }, { "$sort": { "_id": 1 } }, { "$limit": 100 }])
{
  explainVersion: '1',
  queryPlanner: {
    namespace: 'test.records',
    indexFilterSet: false,
    parsedQuery: {
      '$and': [
        { 'header.receiverNode': { '$eq': '224572' } },
        { status: { '$eq': 'new' } },
        { messageDeleted: { '$not': { '$exists': true } } }
      ]
    },
    optimizedPipeline: true,
    maxIndexedOrSolutionsReached: false,
    maxIndexedAndSolutionsReached: false,
    maxScansToExplodeReached: false,
    winningPlan: {
      stage: 'LIMIT',
      limitAmount: 100,
      inputStage: {
        stage: 'FETCH',
        filter: {
          '$and': [
            { 'header.receiverNode': { '$eq': '224572' } },
            { status: { '$eq': 'new' } },
            { messageDeleted: { '$not': { '$exists': true } } }
          ]
        },
        inputStage: {
          stage: 'IXSCAN',
          keyPattern: { _id: 1 },
          indexName: '_id_',
          isMultiKey: false,
          multiKeyPaths: { _id: [] },
          isUnique: true,
          isSparse: false,
          isPartial: false,
          indexVersion: 2,
          direction: 'forward',
          indexBounds: { _id: [ '[MinKey, MaxKey]' ] }
        }
      }
    },
    rejectedPlans: [
      {
        stage: 'SORT',
        sortPattern: { _id: 1 },
        memLimit: 33554432,
        limitAmount: 100,
        type: 'simple',
        inputStage: {
          stage: 'FETCH',
          filter: {
            '$and': [
              { messageDeleted: { '$not': { '$exists': true } } },
              { status: { '$eq': 'new' } }
            ]
          },
          inputStage: {
            stage: 'IXSCAN',
            keyPattern: { 'header.receiverNode': 1 },
            indexName: 'header.receiverNode_1',
            isMultiKey: false,
            multiKeyPaths: { 'header.receiverNode': [] },
            isUnique: false,
            isSparse: false,
            isPartial: false,
            indexVersion: 2,
            direction: 'forward',
            indexBounds: { 'header.receiverNode': [ '["224572", "224572"]' ] }
          }
        }
      }
    ]
  },
  executionStats: {
    executionSuccess: true,
    nReturned: 2,
    executionTimeMillis: 1,
    totalKeysExamined: 2,
    totalDocsExamined: 2,
    executionStages: {
      stage: 'LIMIT',
      nReturned: 2,
      executionTimeMillisEstimate: 0,
      works: 4,
      advanced: 2,
      needTime: 0,
      needYield: 0,
      saveState: 0,
      restoreState: 0,
      isEOF: 1,
      limitAmount: 100,
      inputStage: {
        stage: 'FETCH',
        filter: {
          '$and': [
            { 'header.receiverNode': { '$eq': '224572' } },
            { status: { '$eq': 'new' } },
            { messageDeleted: { '$not': { '$exists': true } } }
          ]
        },
        nReturned: 2,
        executionTimeMillisEstimate: 0,
        works: 4,
        advanced: 2,
        needTime: 0,
        needYield: 0,
        saveState: 0,
        restoreState: 0,
        isEOF: 1,
        docsExamined: 2,
        alreadyHasObj: 0,
        inputStage: {
          stage: 'IXSCAN',
          nReturned: 2,
          executionTimeMillisEstimate: 0,
          works: 3,
          advanced: 2,
          needTime: 0,
          needYield: 0,
          saveState: 0,
          restoreState: 0,
          isEOF: 1,
          keyPattern: { _id: 1 },
          indexName: '_id_',
          isMultiKey: false,
          multiKeyPaths: { _id: [] },
          isUnique: true,
          isSparse: false,
          isPartial: false,
          indexVersion: 2,
          direction: 'forward',
          indexBounds: { _id: [ '[MinKey, MaxKey]' ] },
          keysExamined: 2,
          seeks: 1,
          dupsTested: 0,
          dupsDropped: 0
        }
      }
    }
  },
  command: {
    aggregate: 'records',
    pipeline: [
      {
        '$match': {
          '$and': [
            { status: 'new', messageDeleted: { '$exists': false } },
            { 'header.receiverNode': { '$in': [ '224572' ] } }
          ]
        }
      },
      { '$sort': { _id: 1 } },
      { '$limit': 100 }
    ],
    cursor: {},
    '$db': 'test'
  },
  serverInfo: {
    host: 'ac-mqumzc7-shard-00-02.yzcbsla.mongodb.net',
    port: 27017,
    version: '5.0.13',
    gitVersion: 'cfb7690563a3144d3d1175b3a20c2ec81b662a8f'
  },
  serverParameters: {
    internalQueryFacetBufferSizeBytes: 104857600,
    internalQueryFacetMaxOutputDocSizeBytes: 104857600,
    internalLookupStageIntermediateDocumentMaxSizeBytes: 16793600,
    internalDocumentSourceGroupMaxMemoryBytes: 104857600,
    internalQueryMaxBlockingSortMemoryUsageBytes: 33554432,
    internalQueryProhibitBlockingMergeOnMongoS: 0,
    internalQueryMaxAddToSetBytes: 104857600,
    internalDocumentSourceSetWindowFieldsMaxMemoryBytes: 104857600
  },
  ok: 1,
  '$clusterTime': {
    clusterTime: Timestamp({ t: 1666623668, i: 70 }),
    signature: {
      hash: Binary(Buffer.from("f655b6234ec133c083f5e3ea932602643aad6dd3", "hex"), 0),
      keyId: Long("7108711770176356355")
    }
  },
  operationTime: Timestamp({ t: 1666623668, i: 70 })
}

As you can see it still used the index for “_id” as it considers it better for the sort operation and put the plan using “header.receiverNode” index in the rejected plans.

If your query requires sorting with a field that is not part of the selective index used, you can consider including it in the index, in the below example I created an index that uses both “header.receiverNode” and “_id” and I can see the optimizer is picking it up accordingly.

Atlas atlas-whhaeg-shard-0 [primary] test>  db.records.createIndex({"header.receiverNode":1,"_id":1})
header.receiverNode_1__id_1

Atlas atlas-whhaeg-shard-0 [primary] test>  db.records.explain("executionStats").aggregate([ { "$match": { "$and": [ { "status": "new", "messageDeleted": { "$exists": false } }, { "header.receiverNode": { "$in": [ "224572"] } }] } }, { "$sort": { "_id": 1 } }, { "$limit": 100 }])
{
  explainVersion: '1',
  queryPlanner: {
    namespace: 'test.records',
    indexFilterSet: false,
    parsedQuery: {
      '$and': [
        { 'header.receiverNode': { '$eq': '224572' } },
        { status: { '$eq': 'new' } },
        { messageDeleted: { '$not': { '$exists': true } } }
      ]
    },
    optimizedPipeline: true,
    maxIndexedOrSolutionsReached: false,
    maxIndexedAndSolutionsReached: false,
    maxScansToExplodeReached: false,
    winningPlan: {
      stage: 'LIMIT',
      limitAmount: 100,
      inputStage: {
        stage: 'FETCH',
        filter: {
          '$and': [
            { messageDeleted: { '$not': { '$exists': true } } },
            { status: { '$eq': 'new' } }
          ]
        },
        inputStage: {
          stage: 'IXSCAN',
          keyPattern: { 'header.receiverNode': 1, _id: 1 },
          indexName: 'header.receiverNode_1__id_1',
          isMultiKey: false,
          multiKeyPaths: { 'header.receiverNode': [], _id: [] },
          isUnique: false,
          isSparse: false,
          isPartial: false,
          indexVersion: 2,
          direction: 'forward',
          indexBounds: {
            'header.receiverNode': [ '["224572", "224572"]' ],
            _id: [ '[MinKey, MaxKey]' ]
          }
        }
      }
    },
    rejectedPlans: [
      {
        stage: 'SORT',
        sortPattern: { _id: 1 },
        memLimit: 33554432,
        limitAmount: 100,
        type: 'simple',
        inputStage: {
          stage: 'FETCH',
          filter: {
            '$and': [
              { messageDeleted: { '$not': { '$exists': true } } },
              { status: { '$eq': 'new' } }
            ]
          },
          inputStage: {
            stage: 'IXSCAN',
            keyPattern: { 'header.receiverNode': 1 },
            indexName: 'header.receiverNode_1',
            isMultiKey: false,
            multiKeyPaths: { 'header.receiverNode': [] },
            isUnique: false,
            isSparse: false,
            isPartial: false,
            indexVersion: 2,
            direction: 'forward',
            indexBounds: { 'header.receiverNode': [ '["224572", "224572"]' ] }
          }
        }
      }
    ]
  },
  executionStats: {
    executionSuccess: true,
    nReturned: 2,
    executionTimeMillis: 1,
    totalKeysExamined: 2,
    totalDocsExamined: 2,
    executionStages: {
      stage: 'LIMIT',
      nReturned: 2,
      executionTimeMillisEstimate: 1,
      works: 4,
      advanced: 2,
      needTime: 0,
      needYield: 0,
      saveState: 0,
      restoreState: 0,
      isEOF: 1,
      limitAmount: 100,
      inputStage: {
        stage: 'FETCH',
        filter: {
          '$and': [
            { messageDeleted: { '$not': { '$exists': true } } },
            { status: { '$eq': 'new' } }
          ]
        },
        nReturned: 2,
        executionTimeMillisEstimate: 1,
        works: 4,
        advanced: 2,
        needTime: 0,
        needYield: 0,
        saveState: 0,
        restoreState: 0,
        isEOF: 1,
        docsExamined: 2,
        alreadyHasObj: 0,
        inputStage: {
          stage: 'IXSCAN',
          nReturned: 2,
          executionTimeMillisEstimate: 1,
          works: 3,
          advanced: 2,
          needTime: 0,
          needYield: 0,
          saveState: 0,
          restoreState: 0,
          isEOF: 1,
          keyPattern: { 'header.receiverNode': 1, _id: 1 },
          indexName: 'header.receiverNode_1__id_1',
          isMultiKey: false,
          multiKeyPaths: { 'header.receiverNode': [], _id: [] },
          isUnique: false,
          isSparse: false,
          isPartial: false,
          indexVersion: 2,
          direction: 'forward',
          indexBounds: {
            'header.receiverNode': [ '["224572", "224572"]' ],
            _id: [ '[MinKey, MaxKey]' ]
          },
          keysExamined: 2,
          seeks: 1,
          dupsTested: 0,
          dupsDropped: 0
        }
      }
    }
  },
  command: {
    aggregate: 'records',
    pipeline: [
      {
        '$match': {
          '$and': [
            { status: 'new', messageDeleted: { '$exists': false } },
            { 'header.receiverNode': { '$in': [ '224572' ] } }
          ]
        }
      },
      { '$sort': { _id: 1 } },
      { '$limit': 100 }
    ],
    cursor: {},
    '$db': 'test'
  },
  serverInfo: {
    host: 'ac-mqumzc7-shard-00-02.yzcbsla.mongodb.net',
    port: 27017,
    version: '5.0.13',
    gitVersion: 'cfb7690563a3144d3d1175b3a20c2ec81b662a8f'
  },
  serverParameters: {
    internalQueryFacetBufferSizeBytes: 104857600,
    internalQueryFacetMaxOutputDocSizeBytes: 104857600,
    internalLookupStageIntermediateDocumentMaxSizeBytes: 16793600,
    internalDocumentSourceGroupMaxMemoryBytes: 104857600,
    internalQueryMaxBlockingSortMemoryUsageBytes: 33554432,
    internalQueryProhibitBlockingMergeOnMongoS: 0,
    internalQueryMaxAddToSetBytes: 104857600,
    internalDocumentSourceSetWindowFieldsMaxMemoryBytes: 104857600
  },
  ok: 1,
  '$clusterTime': {
    clusterTime: Timestamp({ t: 1666624083, i: 5 }),
    signature: {
      hash: Binary(Buffer.from("89789d6903a1435fc259eb87b0091aad24e7d129", "hex"), 0),
      keyId: Long("7108711770176356355")
    }
  },
  operationTime: Timestamp({ t: 1666624083, i: 5 })
}

I hope you find this helpful.

2 Likes

Thanks for the answer and extensive explanation. It is now more clear what is causing the optimizer to have this behaviour and why it is choosing a different index at times.

Is this behaviour from the optimizer expected where it chooses to sort first because there are two completely seperate indexes? Is there a way to enforce the aggregate to do the match stage first?
Does this mean the documentation on Aggregation Pipeline Optimization — MongoDB Manual is not always correct?

Extending the index is a good idea but we want to provide multiple different sort options on different fields than _id. The query with _id is an example but there are similar ones causing the same issue.
Does this mean we need to create an index for all of them or a big index including them all?

The query performance would be fine if it would always match first before the sort but because of the index intersection with completely seperate indexes the query becomes unworkable, sorting the entire database before matching seems not optimized ;). We cant remove the other indexes because they are used for other frequent used queries.

I have tried to add a hint ($hint — MongoDB Manual) to the query to force the “header.receiverNode” index and this works because of the hint it does the $match stage first again.

db.records.explain('executionStats').aggregate([{"$match" : { "$and" : [                                                                 {                                                                         "status" : "new",                                                                         "messageDeleted" : {                                                                                 "$exists" : false                                                                         }                                                                 },                                                                 {                                                                         "header.receiverNode" : {                                                                                 "$in" : [                                                                                         "224572"                                                                                 ]                                                                         }                                                                 }                                                         ]                                                 }                                         },                                         {                                                 "$sort" : {                                                         "_id" : 1                                                 }                                         },                                         {                                                 "$limit" : 100                                         } ], {hint: "header.receiverNode_1"});
{
        "stages" : [
                {
                        "$cursor" : {
                                "query" : {
                                        "$and" : [
                                                {
                                                        "status" : "new",
                                                        "messageDeleted" : {
                                                                "$exists" : false
                                                        }
                                                },
                                                {
                                                        "header.receiverNode" : {
                                                                "$in" : [
                                                                        "224572"
                                                                ]
                                                        }
                                                }
                                        ]
                                },
                                "queryPlanner" : {
                                        "plannerVersion" : 1,
                                        "namespace" : "hdnapi.records",
                                        "indexFilterSet" : false,
                                        "parsedQuery" : {
                                                "$and" : [
                                                        {
                                                                "header.receiverNode" : {
                                                                        "$eq" : "224572"
                                                                }
                                                        },
                                                        {
                                                                "status" : {
                                                                        "$eq" : "new"
                                                                }
                                                        },
                                                        {
                                                                "messageDeleted" : {
                                                                        "$not" : {
                                                                                "$exists" : true
                                                                        }
                                                                }
                                                        }
                                                ]
                                        },
                                        "queryHash" : "9E2A8714",
                                        "planCacheKey" : "FEBB4A4C",
                                        "winningPlan" : {
                                                "stage" : "FETCH",
                                                "filter" : {
                                                        "$and" : [
                                                                {
                                                                        "status" : {
                                                                                "$eq" : "new"
                                                                        }
                                                                },
                                                                {
                                                                        "messageDeleted" : {
                                                                                "$not" : {
                                                                                        "$exists" : true
                                                                                }
                                                                        }
                                                                }
                                                        ]
                                                },
                                                "inputStage" : {
                                                        "stage" : "IXSCAN",
                                                        "keyPattern" : {
                                                                "header.receiverNode" : 1
                                                        },
                                                        "indexName" : "header.receiverNode_1",
                                                        "isMultiKey" : false,
                                                        "multiKeyPaths" : {
                                                                "header.receiverNode" : [ ]
                                                        },
                                                        "isUnique" : false,
                                                        "isSparse" : false,
                                                        "isPartial" : false,
                                                        "indexVersion" : 2,
                                                        "direction" : "forward",
                                                        "indexBounds" : {
                                                                "header.receiverNode" : [
                                                                        "[\"224572\", \"224572\"]"
                                                                ]
                                                        }
                                                }
                                        },
                                        "rejectedPlans" : [ ]
                                },
                                "executionStats" : {
                                        "executionSuccess" : true,
                                        "nReturned" : 5778,
                                        "executionTimeMillis" : 130,
                                        "totalKeysExamined" : 12429,
                                        "totalDocsExamined" : 12429,
                                        "executionStages" : {
                                                "stage" : "FETCH",
                                                "filter" : {
                                                        "$and" : [
                                                                {
                                                                        "status" : {
                                                                                "$eq" : "new"
                                                                        }
                                                                },
                                                                {
                                                                        "messageDeleted" : {
                                                                                "$not" : {
                                                                                        "$exists" : true
                                                                                }
                                                                        }
                                                                }
                                                        ]
                                                },
                                                "nReturned" : 5778,
                                                "executionTimeMillisEstimate" : 9,
                                                "works" : 12430,
                                                "advanced" : 5778,
                                                "needTime" : 6651,
                                                "needYield" : 0,
                                                "saveState" : 101,
                                                "restoreState" : 101,
                                                "isEOF" : 1,
                                                "docsExamined" : 12429,
                                                "alreadyHasObj" : 0,
                                                "inputStage" : {
                                                        "stage" : "IXSCAN",
                                                        "nReturned" : 12429,
                                                        "executionTimeMillisEstimate" : 6,
                                                        "works" : 12430,
                                                        "advanced" : 12429,
                                                        "needTime" : 0,
                                                        "needYield" : 0,
                                                        "saveState" : 101,
                                                        "restoreState" : 101,
                                                        "isEOF" : 1,
                                                        "keyPattern" : {
                                                                "header.receiverNode" : 1
                                                        },
                                                        "indexName" : "header.receiverNode_1",
                                                        "isMultiKey" : false,
                                                        "multiKeyPaths" : {
                                                                "header.receiverNode" : [ ]
                                                        },
                                                        "isUnique" : false,
                                                        "isSparse" : false,
                                                        "isPartial" : false,
                                                        "indexVersion" : 2,
                                                        "direction" : "forward",
                                                        "indexBounds" : {
                                                                "header.receiverNode" : [
                                                                        "[\"224572\", \"224572\"]"
                                                                ]
                                                        },
                                                        "keysExamined" : 12429,
                                                        "seeks" : 1,
                                                        "dupsTested" : 0,
                                                        "dupsDropped" : 0
                                                }
                                        }
                                }
                        }
                },
                {
                        "$sort" : {
                                "sortKey" : {
                                        "_id" : 1
                                },
                                "limit" : NumberLong(100)
                        }
                }
        ],
        "serverInfo" : {
                "host" : "cse04-mongo",
                "port" : 27017,
                "version" : "4.2.20",
                "gitVersion" : "15c0712952c356cb711c13a42cb3bca8617d4ebc"
        },
        "ok" : 1,
        "$clusterTime" : {
                "clusterTime" : Timestamp(1666688198, 1),
                "signature" : {
                        "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
                        "keyId" : NumberLong(0)
                }
        },
        "operationTime" : Timestamp(1666688198, 1)
}

This query shows “totalKeysExamined” : 12429 and “executionTimeMillis” : 130 showing it has properly matched first before sorting.

The problem is that our filter does not always contain the field “header.receiverNode” so hinting this index will only work in some cases. Is there a different way than using a $hint or creating multiple/massive indexes to support multiple $sort operations?
I was hoping for a way/option to enforce the $match operation always go first before the $sort operation.

Hello,

Checking the plan when it used “_id” index for sort, it still does the match first before sort, but it doesn’t use an index for filtering, it uses the “_id” index to access the whole collection looking for the matching documents as it will need it again for sorting.

The problem is that part of the aggregation optimisation is deciding if the aggregation would benefit from indexes or not, both $match and $sort are eligible to benefit from indexes, this is ok if they are using the same index, but because they are using different ones, the optimiser chooses one over the other, it’s possible that sorting using indexes takes priority ( this could need further testing ).

If this is not the only filter or sort field you use, then I see how creating more compound indexes or even using hints would not be practical.

Since $sort in aggregation cannot benefit from indexes if it’s preceded with a $project as mentioned here, I tried to introduce a dummy $project stage that excludes a non-existing field just to prevent $sort from using an index and it worked in my environment as per the below example:

{
  explainVersion: '1',
  stages: [
    {
      '$cursor': {
        queryPlanner: {
          namespace: '633c1839ccc493400fe69b83_test.records',
          indexFilterSet: false,
          parsedQuery: {
            '$and': [
              { 'header.receiverNode': { '$eq': '224572' } },
              { status: { '$eq': 'new' } },
              { messageDeleted: { '$not': { '$exists': true } } }
            ]
          },
          queryHash: '9E2A8714',
          planCacheKey: '08D61773',
          maxIndexedOrSolutionsReached: false,
          maxIndexedAndSolutionsReached: false,
          maxScansToExplodeReached: false,
          winningPlan: {
            stage: 'FETCH',
            filter: {
              '$and': [
                { messageDeleted: { '$not': { '$exists': true } } },
                { status: { '$eq': 'new' } }
              ]
            },
            inputStage: {
              stage: 'IXSCAN',
              keyPattern: { 'header.receiverNode': 1 },
              indexName: 'header.receiverNode_1',
              isMultiKey: false,
              multiKeyPaths: { 'header.receiverNode': [] },
              isUnique: false,
              isSparse: false,
              isPartial: false,
              indexVersion: 2,
              direction: 'forward',
              indexBounds: { 'header.receiverNode': [ '["224572", "224572"]' ] }
            }
          },
          rejectedPlans: []
        },
        executionStats: {
          executionSuccess: true,
          nReturned: 1,
          executionTimeMillis: 0,
          totalKeysExamined: 1,
          totalDocsExamined: 1,
          executionStages: {
            stage: 'FETCH',
            filter: {
              '$and': [
                { messageDeleted: { '$not': { '$exists': true } } },
                { status: { '$eq': 'new' } }
              ]
            },
            nReturned: 1,
            executionTimeMillisEstimate: 0,
            works: 2,
            advanced: 1,
            needTime: 0,
            needYield: 0,
            saveState: 1,
            restoreState: 1,
            isEOF: 1,
            docsExamined: 1,
            alreadyHasObj: 0,
            inputStage: {
              stage: 'IXSCAN',
              nReturned: 1,
              executionTimeMillisEstimate: 0,
              works: 2,
              advanced: 1,
              needTime: 0,
              needYield: 0,
              saveState: 1,
              restoreState: 1,
              isEOF: 1,
              keyPattern: { 'header.receiverNode': 1 },
              indexName: 'header.receiverNode_1',
              isMultiKey: false,
              multiKeyPaths: { 'header.receiverNode': [] },
              isUnique: false,
              isSparse: false,
              isPartial: false,
              indexVersion: 2,
              direction: 'forward',
              indexBounds: { 'header.receiverNode': [ '["224572", "224572"]' ] },
              keysExamined: 1,
              seeks: 1,
              dupsTested: 0,
              dupsDropped: 0
            }
          }
        }
      },
      nReturned: Long("1"),
      executionTimeMillisEstimate: Long("0")
    },
    {
      '$project': { non_existing_field: false, _id: true },
      nReturned: Long("1"),
      executionTimeMillisEstimate: Long("0")
    },
    {
      '$sort': { sortKey: { _id: 1 }, limit: Long("100") },
      totalDataSizeSortedBytesEstimate: Long("188"),
      usedDisk: false,
      nReturned: Long("1"),
      executionTimeMillisEstimate: Long("0")
    }
  ],
  serverInfo: {
    host: 'ac-mqumzc7-shard-00-02.yzcbsla.mongodb.net',
    port: 27017,
    version: '5.0.13',
    gitVersion: 'cfb7690563a3144d3d1175b3a20c2ec81b662a8f'
  },
  serverParameters: {
    internalQueryFacetBufferSizeBytes: 104857600,
    internalQueryFacetMaxOutputDocSizeBytes: 104857600,
    internalLookupStageIntermediateDocumentMaxSizeBytes: 16793600,
    internalDocumentSourceGroupMaxMemoryBytes: 104857600,
    internalQueryMaxBlockingSortMemoryUsageBytes: 33554432,
    internalQueryProhibitBlockingMergeOnMongoS: 0,
    internalQueryMaxAddToSetBytes: 104857600,
    internalDocumentSourceSetWindowFieldsMaxMemoryBytes: 104857600
  },
  command: {
    aggregate: 'records',
    pipeline: [
      {
        '$match': {
          '$and': [
            { status: 'new', messageDeleted: { '$exists': false } },
            { 'header.receiverNode': { '$in': [ '224572' ] } }
          ]
        }
      },
      { '$project': { non_existing_field: 0 } },
      { '$sort': { _id: 1 } },
      { '$limit': 100 }
    ],
    cursor: {},
    '$db': 'test'
  },
  ok: 1,
  '$clusterTime': {
    clusterTime: Timestamp({ t: 1666782156, i: 23 }),
    signature: {
      hash: Binary(Buffer.from("b91a1dcd407bd6ab812af23b3778ba556957b620", "hex"), 0),
      keyId: Long("7108711770176356355")
    }
  },
  operationTime: Timestamp({ t: 1666782156, i: 23 })
}

While this may not be ideal and will require you to tweak the aggregation itself, it will guarantee that you avoid this index intersection issue altogether no matter what sort field you use.

1 Like

Thanks again for another detailed answer. The workaround with an non-existing project field is working and it forces the optimizer to use the index for the match stage instead of the sort index.

We will be using this workaround and looking into restricting some sort/filter options so we can use covered indexes to make the queries even faster and remove the need for an empty project field.

Ill mark your answer as the solution.

Cheers

This topic was automatically closed 5 days after the last reply. New replies are no longer allowed.