$in query of time series > 10 seconds

Sample data:

    {
        "metadata": {
            "id": "abcd-12",
            "type": "type1"
        },
        "timestamp": "2023-06-27T03:46:49.786099495Z",
        "data": {
            "ids": [
                "abcd-12",
                "efdg-33"
            ],

            "attribute_1": "abc"
        }
    }

query:

db.ts.find({"metadata.type":"type1","data.ids":{"$in":["efdg-33"]}}).sort({"_id":-1}).limit(1)

log from mongoDB:

{
    "t": {
        "$date": "2023-06-27T19:10:04.850+00:00"
    },
    "s": "I",
    "c": "COMMAND",
    "id": 51803,
    "ctx": "conn9",
    "msg": "Slow query",
    "attr": {
        "type": "command",
        "ns": "sampledb.ts",
        "command": {
            "find": "ts",
            "filter": {
                "metadata.type": "type1",
                "data.ids": {
                    "$in": [
                        "efdg-33"
                    ]
                }
            },
            "limit": 1,
            "projection": {
                "data": 0
            },
            "singleBatch": true,
            "sort": {
                "_id": -1
            },
            "lsid": {
                "id": {
                    "$uuid": "e55dfa5f-d438-4767-862a-d4912efc03e3"
                }
            },
            "$db": "sampledb"
        },
        "planSummary": "COLLSCAN",
        "resolvedViews": [
            {
                "viewNamespace": "sampledb.ts",
                "dependencyChain": [
                    "ts",
                    "system.buckets.ts"
                ],
                "resolvedPipeline": [
                    {
                        "$_internalUnpackBucket": {
                            "timeField": "timestamp",
                            "metaField": "metadata",
                            "bucketMaxSpanSeconds": 86400
                        }
                    }
                ]
            }
        ],
        "keysExamined": 0,
        "docsExamined": 2606504,
        "hasSortStage": true,
        "cursorExhausted": true,
        "numYields": 2703,
        "nreturned": 1,
        "queryHash": "130CB8DF",
        "planCacheKey": "130CB8DF",
        "queryFramework": "classic",
        "reslen": 194,
        "locks": {
            "FeatureCompatibilityVersion": {
                "acquireCount": {
                    "r": 2819
                }
            },
            "Global": {
                "acquireCount": {
                    "r": 2819
                }
            },
            "Mutex": {
                "acquireCount": {
                    "r": 116
                }
            }
        },
        "storage": {
            "data": {
                "bytesRead": 7279476780,
                "timeReadingMicros": 7278730
            },
            "timeWaitingMicros": {
                "cache": 2822
            }
        },
        "remote": "129.0.168.1:60736",
        "protocol": "op_msg",
        "durationMillis": 14869
    }
}

it becomes worse when the number of document grows ( about 70 for same metadata.id, 4M for all documents)
Indexing on the data.ids should improve the performance but it looks like the mongoDB ( time series doesn’t support indexing on arrays.

sampldb> db.ts.createIndex( { "data.ids": 1 } )
MongoServerError: Index build failed: b38519f2-ca8f-4244-91fe-845f87f19137: Collection sampldb.system.buckets.ts ( a17c3bff-c74f-4a3d-b18f-aee3bca3404b ) :: caused by :: Indexed measurement field contains an array value

mongoDB version:6.0.6

Please let me know if there is a solution , Thanks.

show output of explain ?

Sure.

sampledb> db.ts.find({"metadata.type":"type1","data.ids":{"$in":["efdg-33"]}}).sort({"_id":-1}).limit(1).explain()
{
  explainVersion: '1',
  stages: [
    {
      '$cursor': {
        queryPlanner: {
          namespace: 'sampledb.system.buckets.ts',
          indexFilterSet: false,
          parsedQuery: {
            '$and': [
              { 'meta.type': { '$eq': 'type1' } },
              {
                'control.max.data.ids': { '$_internalExprGte': 'efdg-33' }
              },
              {
                'control.min.data.ids': { '$_internalExprLte': 'efdg-33' }
              }
            ]
          },
          queryHash: '130CB8DF',
          planCacheKey: '130CB8DF',
          maxIndexedOrSolutionsReached: false,
          maxIndexedAndSolutionsReached: false,
          maxScansToExplodeReached: false,
          winningPlan: {
            stage: 'COLLSCAN',
            filter: {
              '$and': [
                { 'meta.type': { '$eq': 'type1' } },
                {
                  'control.max.data.ids': { '$_internalExprGte': 'efdg-33' }
                },
                {
                  'control.min.data.ids': { '$_internalExprLte': 'efdg-33' }
                }
              ]
            },
            direction: 'forward'
          },
          rejectedPlans: []
        },
        executionStats: {
          executionSuccess: true,
          nReturned: 75371,
          executionTimeMillis: 8123,
          totalKeysExamined: 0,
          totalDocsExamined: 2709723,
          executionStages: {
            stage: 'COLLSCAN',
            filter: {
              '$and': [
                { 'meta.type': { '$eq': 'type1' } },
                {
                  'control.max.data.ids': { '$_internalExprGte': 'efdg-33' }
                },
                {
                  'control.min.data.ids': { '$_internalExprLte': 'efdg-33' }
                }
              ]
            },
            nReturned: 75371,
            executionTimeMillisEstimate: 3849,
            works: 2709725,
            advanced: 75371,
            needTime: 2634353,
            needYield: 0,
            saveState: 2861,
            restoreState: 2861,
            isEOF: 1,
            direction: 'forward',
            docsExamined: 2709723
          },
          allPlansExecution: []
        }
      },
      nReturned: Long("75371"),
      executionTimeMillisEstimate: Long("7420")
    },
    {
      '$_internalUnpackBucket': {
        exclude: [],
        timeField: 'timestamp',
        metaField: 'metadata',
        bucketMaxSpanSeconds: 86400,
        assumeNoMixedSchemaData: true,
        usesExtendedRange: true
      },
      nReturned: Long("509841"),
      executionTimeMillisEstimate: Long("7768")
    },
    {
      '$match': { 'data.ids': { '$eq': 'efdg-33' } },
      nReturned: Long("75"),
      executionTimeMillisEstimate: Long("8121")
    },
    {
      '$sort': { sortKey: { _id: -1 }, limit: Long("1") },
      totalDataSizeSortedBytesEstimate: Long("0"),
      usedDisk: false,
      spills: Long("0"),
      nReturned: Long("1"),
      executionTimeMillisEstimate: Long("8121")
    }
  ],
  serverInfo: {
    host: 'mongodb-7556b474f-mgmb7',
    port: 27017,
    version: '6.0.6',
    gitVersion: '26b4851a412cc8b9b4a18cdb6cd0f9f642e06aa7'
  },
  serverParameters: {
    internalQueryFacetBufferSizeBytes: 104857600,
    internalQueryFacetMaxOutputDocSizeBytes: 104857600,
    internalLookupStageIntermediateDocumentMaxSizeBytes: 104857600,
    internalDocumentSourceGroupMaxMemoryBytes: 104857600,
    internalQueryMaxBlockingSortMemoryUsageBytes: 104857600,
    internalQueryProhibitBlockingMergeOnMongoS: 0,
    internalQueryMaxAddToSetBytes: 104857600,
    internalDocumentSourceSetWindowFieldsMaxMemoryBytes: 104857600
  },
  command: {
    aggregate: 'system.buckets.ts',
    pipeline: [
      {
        '$_internalUnpackBucket': {
          timeField: 'timestamp',
          metaField: 'metadata',
          bucketMaxSpanSeconds: 86400,
          assumeNoMixedSchemaData: true,
          usesExtendedRange: true
        }
      },
      {
        '$match': {
          'metadata.type': 'type1',
          'data.ids': { '$in': [ 'efdg-33' ] }
        }
      },
      { '$sort': { _id: -1 } },
      { '$limit': Long("1") }
    ],
    cursor: {},
    collation: {}
  },
  ok: 1
}

so the planner is using a collection scan. that’s why it’s slow. You need to create index for your query filter. But i’m not familiar with time series and array index, so i can’t help further.

Thanks. I tried create the index on data.ids, but it looks like index on array is not allowed for time series.

work around:
move ids to metadata, then it can be indexed.