Query is very slow

cb420 · May 5, 2021, 8:25am

Hi everyone,

I have 36.5 million documents in my collection. They look like this:

{
    "_id" : ObjectId("60619f84dc46fad865680ab8"),
    "uuid" : "47aad0e5-3e41-4951-8881-3ae70ae85bcd",
    "id_tb" : ObjectId("60619f84dc46fad865680225"),
    "code" : "",
    "ser" : "",
    "mat" : "",
    "state" : "",
    "ts_start" : ISODate("2018-01-01T18:46:56.000Z"),
    "ts_end" : ISODate("2018-01-01T18:47:20.000Z"),
    "ts_restart" : ISODate("2018-01-01T18:46:56.000Z"),
    "values" : [ 
        {
            "dtype" : "i",
            "v_int" : NumberLong(20030606),
            "key" : "x"
        }, 
        {
            "dtype" : "i",
            "v_int" : NumberLong(1208551142),
            "key" : "y"
        },
        ...
    ]
}

Every document has an array values with 730 subdocuments. (The average size of a run document is 53.2 KB)
Now I want to query those documents like this:

db.runs.find({ "ts_start" : { "$gte" : ISODate("2018-01-01T23:00:00Z") }, "ts_end" : { "$lte" : ISODate("2018-01-01T23:59:59Z") } }).limit(80)

This query finishes in less than a second. But if I change limit(80) to limit(81) or anything greater than 80 it will take really long.
I already have indices on the fields I will filter on: _id_ , ser , mat , ts_start , ts_end , code, state and a compound index for ts_start and ts_end : {ts_start:1, ts_end:1}.
The total size of these indices is 2.5 GB.

I am using MongoDB 4.2 on a Windows Server 2012 machine with 32 GB of RAM.

Here is the output of explain("executionStats") on a query that takes long:

{
    "op" : "query",
    "ns" : "db.runs",
    "command" : {
        "find" : "runs",
        "filter" : {
            "ts_start" : {
                "$gte" : ISODate("2018-01-01T23:00:00.000Z")
            },
            "ts_end" : {
                "$lte" : ISODate("2018-01-01T23:59:59.000Z")
            }
        },
        "limit" : 81,
        "comment" : "MyQuery",
        "$db" : "db",
        "lsid" : {
            "id" : UUID("562115f5-059d-441a-96ad-e9da6702c962")
        }
    },
    "keysExamined" : 248196,
    "docsExamined" : 248196,
    "fromMultiPlanner" : true,
    "replanned" : true,
    "replanReason" : "cached plan was less efficient than expected: expected trial execution to take 81 works but it took at least 810 works",
    "cursorExhausted" : true,
    "numYield" : 73223,
    "nreturned" : 80,
    "queryHash" : "8927FA09",
    "planCacheKey" : "EFB63FF1",
    "locks" : {
        "ParallelBatchWriterMode" : {
            "acquireCount" : {
                "r" : NumberLong(1)
            }
        },
        "ReplicationStateTransition" : {
            "acquireCount" : {
                "w" : NumberLong(73225)
            }
        },
        "Global" : {
            "acquireCount" : {
                "r" : NumberLong(73225)
            }
        },
        "Database" : {
            "acquireCount" : {
                "r" : NumberLong(73224)
            }
        },
        "Collection" : {
            "acquireCount" : {
                "r" : NumberLong(73224)
            }
        },
        "Mutex" : {
            "acquireCount" : {
                "r" : NumberLong(1)
            }
        }
    },
    "flowControl" : {},
    "storage" : {
        "data" : {
            "bytesRead" : NumberLong(37794055022),
            "timeReadingMicros" : NumberLong(1394728348)
        }
    },
    "responseLength" : 4283172,
    "protocol" : "op_msg",
    "millis" : 1418283,
    "planSummary" : "IXSCAN { ts_end: 1 }",
    "execStats" : {
        "stage" : "LIMIT",
        "nReturned" : 80,
        "executionTimeMillisEstimate" : 500618,
        "works" : 248197,
        "advanced" : 80,
        "needTime" : 248116,
        "needYield" : 0,
        "saveState" : 73217,
        "restoreState" : 73217,
        "isEOF" : 1,
        "limitAmount" : 81,
        "inputStage" : {
            "stage" : "FETCH",
            "filter" : {
                "ts_start" : {
                    "$gte" : ISODate("2018-01-01T23:00:00.000Z")
                }
            },
            "nReturned" : 80,
            "executionTimeMillisEstimate" : 500574,
            "works" : 248197,
            "advanced" : 80,
            "needTime" : 248116,
            "needYield" : 0,
            "saveState" : 73217,
            "restoreState" : 73217,
            "isEOF" : 1,
            "docsExamined" : 248196,
            "alreadyHasObj" : 0,
            "inputStage" : {
                "stage" : "IXSCAN",
                "nReturned" : 248196,
                "executionTimeMillisEstimate" : 707,
                "works" : 248197,
                "advanced" : 248196,
                "needTime" : 0,
                "needYield" : 0,
                "saveState" : 73217,
                "restoreState" : 73217,
                "isEOF" : 1,
                "keyPattern" : {
                    "ts_end" : 1
                },
                "indexName" : "ts_end",
                "isMultiKey" : false,
                "multiKeyPaths" : {
                    "ts_end" : []
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "forward",
                "indexBounds" : {
                    "ts_end" : [ 
                        "(true, new Date(1514851199000)]"
                    ]
                },
                "keysExamined" : 248196,
                "seeks" : 1,
                "dupsTested" : 0,
                "dupsDropped" : 0
            }
        }
    },
    "ts" : ISODate("2021-05-04T19:13:02.342Z"),
    "client" : "127.0.0.1",
    "allUsers" : [],
    "user" : ""
}

What I can see is that it uses the index {ts_end: 1} and that is quite fast, but both FETCH and LIMIT stages are extremely slow.
While running a query like this I could see in Windows resource monitor that mongod.exe seems to start reading from disk.

Does anyone have an idea on how to make this faster? Thank you in advance.

Edit: @Stennie_X Thanks for adding tags, I could not find that option when creating this topic.

Roman_Gaufman · June 3, 2022, 5:00pm

I believe you could benefit from a compound index on both ts_start and ts_end, e.g.

{ “ts_start”: 1, “ts_end”: 1}

You may want to use -1 instead if you normally order from newest to oldest.