Hi MongoDB community,
I’m using a time series collection to store IoT data and would like to get the most recent document in the collection to find out the current state of the IoT device that’s pushing the data.
My collection has about 300,000 documents, uses a timestamp
field as the timeField
, and has an index on timestamp
, as suggested in the documentation.
I assumed that the following query would be efficient:
db.collection("iot-time-series")
.find({}, { sort: { timestamp: -1 }, limit: 1 })
.toArray()
However, it takes about two seconds to execute on an M10 cluster. If I interpret the explain output for this query correctly, MongoDB doesn’t use the index on timestamp
and loads all 300,000 documents into memory to find the one with the most recent timestamp.
Explain output
{
"explainVersion": "1",
"stages": [
{
"$cursor": {
"queryPlanner": {
"namespace": "REDACTED.system.buckets.iot-time-series",
"indexFilterSet": false,
"parsedQuery": {},
"queryHash": "8B3D4AB8",
"planCacheKey": "D542626C",
"maxIndexedOrSolutionsReached": false,
"maxIndexedAndSolutionsReached": false,
"maxScansToExplodeReached": false,
"winningPlan": { "stage": "COLLSCAN", "direction": "forward" },
"rejectedPlans": []
},
"executionStats": {
"executionSuccess": true,
"nReturned": 825,
"executionTimeMillis": 1232,
"totalKeysExamined": 0,
"totalDocsExamined": 825,
"executionStages": {
"stage": "COLLSCAN",
"nReturned": 825,
"executionTimeMillisEstimate": 0,
"works": 827,
"advanced": 825,
"needTime": 1,
"needYield": 0,
"saveState": 41,
"restoreState": 41,
"isEOF": 1,
"direction": "forward",
"docsExamined": 825
},
"allPlansExecution": []
}
},
"nReturned": 825,
"executionTimeMillisEstimate": 90
},
{
"$_internalUnpackBucket": {
"exclude": [],
"timeField": "timestamp",
"metaField": "metadata",
"bucketMaxSpanSeconds": 3600
},
"nReturned": 294628,
"executionTimeMillisEstimate": 1060
},
{
"$sort": { "sortKey": { "timestamp": -1 }, "limit": 1 },
"totalDataSizeSortedBytesEstimate": 0,
"usedDisk": false,
"nReturned": 1,
"executionTimeMillisEstimate": 1227
}
],
"serverInfo": {
"host": "REDACTED",
"port": 27017,
"version": "5.0.6",
"gitVersion": "212a8dbb47f07427dae194a9c75baec1d81d9259"
},
"serverParameters": {
"internalQueryFacetBufferSizeBytes": 104857600,
"internalQueryFacetMaxOutputDocSizeBytes": 104857600,
"internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
"internalDocumentSourceGroupMaxMemoryBytes": 104857600,
"internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
"internalQueryProhibitBlockingMergeOnMongoS": 0,
"internalQueryMaxAddToSetBytes": 104857600,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600
},
"command": {
"aggregate": "system.buckets.iot-time-series",
"pipeline": [
{
"$_internalUnpackBucket": {
"timeField": "timestamp",
"metaField": "metadata",
"bucketMaxSpanSeconds": 3600,
"exclude": []
}
},
{ "$sort": { "timestamp": -1 } },
{ "$limit": 1 }
],
"cursor": {},
"maxTimeMS": 60000,
"collation": { "locale": "simple" }
},
"ok": 1,
"$clusterTime": {
"clusterTime": { "$timestamp": "7068709110501343233" },
"signature": {
"hash": "/scbPt2f8gVBc/Jpq0GEGEc6Ze4=",
"keyId": { "low": 2, "high": 1633075851, "unsigned": false }
}
},
"operationTime": { "$timestamp": "7068709110501343233" }
}
Is there another, more efficient way to get the latest document in a time series collection? Does MongoDB make any guarantees regarding the order of time series documents?