Hi everyone,
I have 36.5 million documents in my collection. They look like this:
{
"_id" : ObjectId("60619f84dc46fad865680ab8"),
"uuid" : "47aad0e5-3e41-4951-8881-3ae70ae85bcd",
"id_tb" : ObjectId("60619f84dc46fad865680225"),
"code" : "",
"ser" : "",
"mat" : "",
"state" : "",
"ts_start" : ISODate("2018-01-01T18:46:56.000Z"),
"ts_end" : ISODate("2018-01-01T18:47:20.000Z"),
"ts_restart" : ISODate("2018-01-01T18:46:56.000Z"),
"values" : [
{
"dtype" : "i",
"v_int" : NumberLong(20030606),
"key" : "x"
},
{
"dtype" : "i",
"v_int" : NumberLong(1208551142),
"key" : "y"
},
...
]
}
Every document has an array values
with 730 subdocuments. (The average size of a run document is 53.2 KB)
Now I want to query those documents like this:
db.runs.find({ "ts_start" : { "$gte" : ISODate("2018-01-01T23:00:00Z") }, "ts_end" : { "$lte" : ISODate("2018-01-01T23:59:59Z") } }).limit(80)
This query finishes in less than a second. But if I change limit(80)
to limit(81)
or anything greater than 80 it will take really long.
I already have indices on the fields I will filter on: _id_
, ser
, mat
, ts_start
, ts_end
, code
, state
and a compound index for ts_start
and ts_end
: {ts_start:1, ts_end:1}
.
The total size of these indices is 2.5 GB.
I am using MongoDB 4.2 on a Windows Server 2012 machine with 32 GB of RAM.
Here is the output of explain("executionStats")
on a query that takes long:
{
"op" : "query",
"ns" : "db.runs",
"command" : {
"find" : "runs",
"filter" : {
"ts_start" : {
"$gte" : ISODate("2018-01-01T23:00:00.000Z")
},
"ts_end" : {
"$lte" : ISODate("2018-01-01T23:59:59.000Z")
}
},
"limit" : 81,
"comment" : "MyQuery",
"$db" : "db",
"lsid" : {
"id" : UUID("562115f5-059d-441a-96ad-e9da6702c962")
}
},
"keysExamined" : 248196,
"docsExamined" : 248196,
"fromMultiPlanner" : true,
"replanned" : true,
"replanReason" : "cached plan was less efficient than expected: expected trial execution to take 81 works but it took at least 810 works",
"cursorExhausted" : true,
"numYield" : 73223,
"nreturned" : 80,
"queryHash" : "8927FA09",
"planCacheKey" : "EFB63FF1",
"locks" : {
"ParallelBatchWriterMode" : {
"acquireCount" : {
"r" : NumberLong(1)
}
},
"ReplicationStateTransition" : {
"acquireCount" : {
"w" : NumberLong(73225)
}
},
"Global" : {
"acquireCount" : {
"r" : NumberLong(73225)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(73224)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(73224)
}
},
"Mutex" : {
"acquireCount" : {
"r" : NumberLong(1)
}
}
},
"flowControl" : {},
"storage" : {
"data" : {
"bytesRead" : NumberLong(37794055022),
"timeReadingMicros" : NumberLong(1394728348)
}
},
"responseLength" : 4283172,
"protocol" : "op_msg",
"millis" : 1418283,
"planSummary" : "IXSCAN { ts_end: 1 }",
"execStats" : {
"stage" : "LIMIT",
"nReturned" : 80,
"executionTimeMillisEstimate" : 500618,
"works" : 248197,
"advanced" : 80,
"needTime" : 248116,
"needYield" : 0,
"saveState" : 73217,
"restoreState" : 73217,
"isEOF" : 1,
"limitAmount" : 81,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"ts_start" : {
"$gte" : ISODate("2018-01-01T23:00:00.000Z")
}
},
"nReturned" : 80,
"executionTimeMillisEstimate" : 500574,
"works" : 248197,
"advanced" : 80,
"needTime" : 248116,
"needYield" : 0,
"saveState" : 73217,
"restoreState" : 73217,
"isEOF" : 1,
"docsExamined" : 248196,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 248196,
"executionTimeMillisEstimate" : 707,
"works" : 248197,
"advanced" : 248196,
"needTime" : 0,
"needYield" : 0,
"saveState" : 73217,
"restoreState" : 73217,
"isEOF" : 1,
"keyPattern" : {
"ts_end" : 1
},
"indexName" : "ts_end",
"isMultiKey" : false,
"multiKeyPaths" : {
"ts_end" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"ts_end" : [
"(true, new Date(1514851199000)]"
]
},
"keysExamined" : 248196,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
},
"ts" : ISODate("2021-05-04T19:13:02.342Z"),
"client" : "127.0.0.1",
"allUsers" : [],
"user" : ""
}
What I can see is that it uses the index {ts_end: 1}
and that is quite fast, but both FETCH
and LIMIT
stages are extremely slow.
While running a query like this I could see in Windows resource monitor that mongod.exe
seems to start reading from disk.
Does anyone have an idea on how to make this faster? Thank you in advance.
Edit: @Stennie_X Thanks for adding tags, I could not find that option when creating this topic.