Index misses a document?

I have a pretty large DB for 180M documents and some indexes.

One such index is on the field block_height. This field is present in every document.

Now, when I query for a particular block_height, I get the following execution plan (see below).

The query returns 6 documents. Problem is: there are 7 documents in the DB! I can find the missing document using its _id no problem, but it will not show up in the query on block_height.

How is this even possible? Any remedies?

Output of the Explain Plan:

{
  "explainVersion": "1",
  "queryPlanner": {
    "namespace": "db.impacted_addresses",
    "parsedQuery": {
      "block_height": { "$eq": 5521120 }
    },
    "indexFilterSet": false,
    "queryHash": "4D149BD8",
    "planCacheKey": "F3CEF760",
    "optimizationTimeMillis": 0,
    "maxIndexedOrSolutionsReached": false,
    "maxIndexedAndSolutionsReached": false,
    "maxScansToExplodeReached": false,
    "prunedSimilarIndexes": false,
    "winningPlan": {
      "isCached": false,
      "stage": "FETCH",
      "inputStage": {
        "stage": "IXSCAN",
        "keyPattern": { "block_height": 1 },
        "indexName": "block_height_1",
        "isMultiKey": false,
        "multiKeyPaths": { "block_height": [] },
        "isUnique": false,
        "isSparse": false,
        "isPartial": false,
        "indexVersion": 2,
        "direction": "forward",
        "indexBounds": {
          "block_height": ["[5521120, 5521120]"]
        }
      }
    },
    "rejectedPlans": [
      {
        "isCached": false,
        "stage": "FETCH",
        "inputStage": {
          "stage": "IXSCAN",
          "keyPattern": { "block_height": -1 },
          "indexName": "block_height_-1",
          "isMultiKey": false,
          "multiKeyPaths": { "block_height": [] },
          "isUnique": false,
          "isSparse": false,
          "isPartial": false,
          "indexVersion": 2,
          "direction": "forward",
          "indexBounds": {
            "block_height": ["[5521120, 5521120]"]
          }
        }
      }
    ]
  },
  "executionStats": {
    "executionSuccess": true,
    "nReturned": 6,
    "executionTimeMillis": 55,
    "totalKeysExamined": 6,
    "totalDocsExamined": 6,
    "executionStages": {
      "isCached": false,
      "stage": "FETCH",
      "nReturned": 6,
      "executionTimeMillisEstimate": 21,
      "works": 8,
      "advanced": 6,
      "needTime": 0,
      "needYield": 0,
      "saveState": 2,
      "restoreState": 2,
      "isEOF": 1,
      "docsExamined": 6,
      "alreadyHasObj": 0,
      "inputStage": {
        "stage": "IXSCAN",
        "nReturned": 6,
        "executionTimeMillisEstimate": 2,
        "works": 7,
        "advanced": 6,
        "needTime": 0,
        "needYield": 0,
        "saveState": 2,
        "restoreState": 2,
        "isEOF": 1,
        "keyPattern": { "block_height": 1 },
        "indexName": "block_height_1",
        "isMultiKey": false,
        "multiKeyPaths": { "block_height": [] },
        "isUnique": false,
        "isSparse": false,
        "isPartial": false,
        "indexVersion": 2,
        "direction": "forward",
        "indexBounds": {
          "block_height": ["[5521120, 5521120]"]
        },
        "keysExamined": 6,
        "seeks": 1,
        "dupsTested": 0,
        "dupsDropped": 0
      }
    }
  },
  "command": {
    "find": "impacted_addresses",
    "filter": { "block_height": 5521120 },
    "sort": { "block_height": 1 },
    "skip": 0,
    "limit": 0,
    "maxTimeMS": 60000,
    "$db": "db"
  },
  "serverInfo": {
    "host": "e08b0e958b78",
    "port": 27017,
    "version": "8.0.0",
    "gitVersion": "d7cd03b239ac39a3c7d63f7145e91aca36f93db6"
  },
  "serverParameters": {
    "internalQueryFacetBufferSizeBytes": 104857600,
    "internalQueryFacetMaxOutputDocSizeBytes": 104857600,
    "internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
    "internalDocumentSourceGroupMaxMemoryBytes": 104857600,
    "internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
    "internalQueryProhibitBlockingMergeOnMongoS": 0,
    "internalQueryMaxAddToSetBytes": 104857600,
    "internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600,
    "internalQueryFrameworkControl": "trySbeRestricted",
    "internalQueryPlannerIgnoreIndexWithCollationForRegex": 1
  },
  "ok": 1,
  "$clusterTime": {
    "clusterTime": {
      "$timestamp": "7425699293131964435"
    },
    "signature": {
      "hash": "G6BwkviSJ+sPWfj6CT1MCtERInQ=",
      "keyId": {
        "low": 2,
        "high": 1720210990,
        "unsigned": false
      }
    }
  },
  "operationTime": {
    "$timestamp": "7425699293131964435"
  }
}`

Share the document

You might as well share all 7 documents, it could help.

I have an idea of the issue but I want to make sure first.

You could project on _id and block_height. It should be sufficient.

Thank you for your reply.
These are the 6 documents that are found:

[{
  "_id": "2d967ca608a075fb6d38bb175d97950623e51d5a959df86e7e28dbe2afd2c53d-3CLkmo5gaKbxNXjPdDyU3mtGobB5dvLphGySxbEALQGMH2fzm4-transfer_event",
  "block_height": 5521120
},
{
  "_id": "2d967ca608a075fb6d38bb175d97950623e51d5a959df86e7e28dbe2afd2c53d-3e5F13P33VA6taxX2FyB8KaSQRFM64x8ypXe3gVnx22c7RskNZ-transfer_event",
  "block_height": 5521120
},
{
  "_id": "2d967ca608a075fb6d38bb175d97950623e51d5a959df86e7e28dbe2afd2c53d-3yhsVb1gkDNr1souvty573pqDLk9vybCWpG4bLoHJL7X9wtEEp-transfer_event",
  "block_height": 5521120
},
{
  "_id": "2d967ca608a075fb6d38bb175d97950623e51d5a959df86e7e28dbe2afd2c53d-<8587,0>",
  "block_height": 5521120
},
{
  "_id": "b6c40334f46b53c1f437c55d1a41bb00a6cc27229b67e0233c696ca29d003f7b-35CJPZohio6Ztii2zy1AYzJKvuxbG",
  "block_height": 5521120
},
{
  "_id": "2d967ca608a075fb6d38bb175d97950623e51d5a959df86e7e28dbe2afd2c53d-3CLkmo5gaKbxNXjPdDyU3mtGobB5d",
  "block_height": 5521120
}]

This is the document that is missing:

[{
  "_id": "2d967ca608a075fb6d38bb175d97950623e51d5a959df86e7e28dbe2afd2c53d-3CLkmo5gaKbxNXjPdDyU3mtGobB5d",
  "block_height": 5521120
}]

I’d be super interested to learn how the _id influences whether it’s found or not? I’m pretty sure the _ids are unique, as they must be.

Again, thank you!

Text diff shows that your missing document already a part of 6 documents returned. Can you double check?

1 Like

I did and found to my surprise that the ids I’m generating aren’t unique. I know that sounds weird, but I’m storing a log of which ids I’m expecting. Turns out that 2 of the 7 documents have the same generated id, hence I think I’m upserting 7 documents, but in reality I’m only upserting 6…

My faith in indexing is restored. Thank you.

3 Likes