I’m working on a collection with around 17 million documents. Each document has a structure like this:
{
_id: ...
learnerId: <string>
type: <string>
organizationId: <string>
timestamp: ISODate
payload: {
learningPackage: {
learningPackageId: <string>
}
}
}
So the document represent a list of events over time for different users (called learners). My requirement is to count all the events of a certain type, for s specific organisationId for a list of learnerIds.
my query then looks like this:
db.getCollection("event").aggregate(
[
{
"$match": {
"organizationId": "OrgId",
"type": "Type",
"learnerId": {
"$in": [
"LearnerId1",
"LearnerId2",
"LearnerId3",
"LearnerId4",
"LearnerId5"
]
},
"payload.learningPackage.learningPackageId": "LearningPackageId"
}
},
{
"$group": {
"_id": "$learnerId",
"n": {
"$sum": 1
}
}
}
]
)
I’ve added an Index for organizationId, type, learnerId and payload.learningPackage.learningPackageId as a compound index. Otherwise the query is not usable at all
This works fine as long as I have just a few LearnerId’s. But usually I end up having 1000 or more learners I want to query the amount of events for.
At this point the query starts to become slow. Currently around 4-5 seconds. Which is acceptable, but knowing that in the future we will have way more documents in the collection and also the amount of learners can easy be even larger, I want to find a proper solution.
Is there any performant way this requirement can be solved using a query or do I have to restructure my data (e.g. always writing the total amount to the last event).
Here some parts of the explain output (cannot post everything due to data privacy issues):
"serverInfo" : {
"host" : "78b4f7e41a7b",
"port" : 27017.0,
"version" : "6.0.3",
"gitVersion" : "f803681c3ae19817d31958965850193de067c516"
},
"serverParameters" : {
"internalQueryFacetBufferSizeBytes" : 104857600.0,
"internalQueryFacetMaxOutputDocSizeBytes" : 104857600.0,
"internalLookupStageIntermediateDocumentMaxSizeBytes" : 104857600.0,
"internalDocumentSourceGroupMaxMemoryBytes" : 104857600.0,
"internalQueryMaxBlockingSortMemoryUsageBytes" : 104857600.0,
"internalQueryProhibitBlockingMergeOnMongoS" : 0.0,
"internalQueryMaxAddToSetBytes" : 104857600.0,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes" : 104857600.0
},
Question on that: Do you think upgrading to the latest version would already change anything terms of performance?
Winning plan:
"queryPlan": {
"stage": "GROUP",
"planNodeId": 3.0,
"inputStage": {
"stage": "PROJECTION_COVERED",
"planNodeId": 2.0,
"transformBy": {
"learnerId": true,
"_id": false
},
"inputStage": {
"stage": "IXSCAN",
"planNodeId": 1.0,
"keyPattern": {
"type": 1.0,
"payload.learningPackage.learningPackageId": 1.0,
"organizationId": 1.0,
"learnerId": 1.0
},
"indexName": "test_speed",
"isMultiKey": false,
"multiKeyPaths": {
"type": [
],
"payload.learningPackage.learningPackageId": [
],
"organizationId": [
],
"learnerId": [
]
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2.0,
"direction": "forward",
"indexBounds": {
"type": [
"[\"\", \"\"]"
],
"payload.learningPackage.learningPackageId": [
"[\"\", \"\"]"
],
"organizationId": [
"[\"\", \"\"]"
],
"learnerId": ["", ""
]
}
}
}
},
"slotBasedPlan": {
"slots": "$$RESULT=s19 env: { s2 = Nothing (SEARCH_META), s3 = 1737454159256 (NOW), s1 = TimeZoneDatabase(Asia/Krasnoyarsk...Etc/GMT-6) (timeZoneDB) }",
"stages": "[3] mkbson s19 [_id = s16, n = s18] true false \n[3] project [s18 = doubleDoubleSumFinalize (s17)] \n[3] group [s16] [s17 = aggDoubleDoubleSum (1)] \n[3] project [s16 = fillEmpty (s15, null)] \n[3] project [s15 = getField (s14, \"learnerId\")] \n[2] mkbson s14 [learnerId = s4] true false \n[1] filter {isRecordId (s5)} \n[1] lspool sp1 [s5, s4] {! isRecordId (s5)} \n[1] union [s5, s4] [\n [s7, s6] [1] project [s6 = Nothing, s7 = KS(3C5175697A526573756C74003C363236613932336332333537323738393236313834386466003C363136373034666630626132343530326538373434396135003C363136383163343830626132343530326538373434396534000104)] \n [1] limit 1 \n [1] coscan , \n [s13, s8] [1] nlj [] [s11] \n left \n [1] sspool sp1 [s11] \n right \n [1] chkbounds s9 s10 s13 \n [1] nlj [] [s12] \n left \n [1] project [s12 = s11] \n [1] limit 1 \n [1] coscan \n right \n [1] ixseek s12 none s9 s10 none [s8 = 3] @\"d00b56b3-9b3b-4372-8d22-b28d05430971\" @\"test_speed\" true \n \n \n \n \n] "
}
},
Execution stats:
"executionStats": {
"executionSuccess": true,
"nReturned": 1565.0,
"executionTimeMillis": 4423.0,
"totalKeysExamined": 1703250.0,
"totalDocsExamined": 0.0,
"executionStages": {
"stage": "mkbson",
"planNodeId": 3.0,
"nReturned": 1565.0,
"executionTimeMillisEstimate": 4338.0,
"opens": 1.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 1.0,
"objSlot": 19.0,
"fields": [
],
"projectFields": [
"_id",
"n"
],
"projectSlots": [
Long(
"16"
),
Long(
"18"
)
],
"forceNewObject": true,
"returnOldObject": false,
"inputStage": {
"stage": "project",
"planNodeId": 3.0,
"nReturned": 1565.0,
"executionTimeMillisEstimate": 4338.0,
"opens": 1.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 1.0,
"projections": {
"18": "doubleDoubleSumFinalize (s17) "
},
"inputStage": {
"stage": "group",
"planNodeId": 3.0,
"nReturned": 1565.0,
"executionTimeMillisEstimate": 4338.0,
"opens": 1.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 1.0,
"groupBySlots": [
Long(
"16"
)
],
"expressions": {
"17": "aggDoubleDoubleSum (1) "
},
"usedDisk": false,
"spilledRecords": 0.0,
"spilledBytesApprox": 0.0,
"inputStage": {
"stage": "project",
"planNodeId": 3.0,
"nReturned": 1702845.0,
"executionTimeMillisEstimate": 4231.0,
"opens": 1.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 1.0,
"projections": {
"16": "fillEmpty (s15, null) "
},
"inputStage": {
"stage": "project",
"planNodeId": 3.0,
"nReturned": 1702845.0,
"executionTimeMillisEstimate": 4192.0,
"opens": 1.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 1.0,
"projections": {
"15": "getField (s14, \"learnerId\") "
},
"inputStage": {
"stage": "mkbson",
"planNodeId": 2.0,
"nReturned": 1702845.0,
"executionTimeMillisEstimate": 4138.0,
"opens": 1.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 1.0,
"objSlot": 14.0,
"fields": [
],
"projectFields": [
"learnerId"
],
"projectSlots": [
Long(
"4"
)
],
"forceNewObject": true,
"returnOldObject": false,
"inputStage": {
"stage": "filter",
"planNodeId": 1.0,
"nReturned": 1702845.0,
"executionTimeMillisEstimate": 4097.0,
"opens": 1.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 1.0,
"numTested": 1703250.0,
"filter": "isRecordId (s5) ",
"inputStage": {
"stage": "lspool",
"planNodeId": 1.0,
"nReturned": 1703250.0,
"executionTimeMillisEstimate": 4069.0,
"opens": 1.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 1.0,
"spoolId": 1.0,
"outputSlots": [
Long(
"5"
),
Long(
"4"
)
],
"filter": "! isRecordId (s5) ",
"inputStage": {
"stage": "union",
"planNodeId": 1.0,
"nReturned": 1703250.0,
"executionTimeMillisEstimate": 3998.0,
"opens": 1.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 1.0,
"inputSlots": [
Long(
"7"
),
Long(
"6"
),
Long(
"13"
),
Long(
"8"
)
],
"outputSlots": [
Long(
"5"
),
Long(
"4"
)
],
"inputStages": [
{
"stage": "project",
"planNodeId": 1.0,
"nReturned": 1.0,
"executionTimeMillisEstimate": 0.0,
"opens": 1.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 1.0,
"projections": {
"6": "Nothing ",
"7": "KS(3C5175697A526573756C74003C363236613932336332333537323738393236313834386466003C363136373034666630626132343530326538373434396135003C363136383163343830626132343530326538373434396534000104) "
},
"inputStage": {
"stage": "limit",
"planNodeId": 1.0,
"nReturned": 1.0,
"executionTimeMillisEstimate": 0.0,
"opens": 1.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 1.0,
"limit": 1.0,
"inputStage": {
"stage": "coscan",
"planNodeId": 1.0,
"nReturned": 1.0,
"executionTimeMillisEstimate": 0.0,
"opens": 1.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 0.0
}
}
},
{
"stage": "nlj",
"planNodeId": 1.0,
"nReturned": 1703249.0,
"executionTimeMillisEstimate": 3998.0,
"opens": 1.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 1.0,
"totalDocsExamined": 0.0,
"totalKeysExamined": 1703250.0,
"collectionScans": 0.0,
"collectionSeeks": 0.0,
"indexScans": 0.0,
"indexSeeks": 405.0,
"indexesUsed": [
"test_speed"
],
"innerOpens": 405.0,
"innerCloses": 1.0,
"outerProjects": [
],
"outerCorrelated": [
Long(
"11"
)
],
"outerStage": {
"stage": "sspool",
"planNodeId": 1.0,
"nReturned": 405.0,
"executionTimeMillisEstimate": 0.0,
"opens": 1.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 1.0,
"spoolId": 1.0,
"outputSlots": [
Long(
"11"
)
]
},
"innerStage": {
"stage": "chkbounds",
"planNodeId": 1.0,
"nReturned": 1703249.0,
"executionTimeMillisEstimate": 3970.0,
"opens": 405.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 1.0,
"seeks": 404.0,
"inKeySlot": 9.0,
"inRecordIdSlot": 10.0,
"outSlot": 13.0,
"inputStage": {
"stage": "nlj",
"planNodeId": 1.0,
"nReturned": 1703250.0,
"executionTimeMillisEstimate": 3752.0,
"opens": 405.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 0.0,
"totalDocsExamined": 0.0,
"totalKeysExamined": 1703250.0,
"collectionScans": 0.0,
"collectionSeeks": 0.0,
"indexScans": 0.0,
"indexSeeks": 405.0,
"indexesUsed": [
"test_speed"
],
"innerOpens": 405.0,
"innerCloses": 1.0,
"outerProjects": [
],
"outerCorrelated": [
Long(
"12"
)
],
"outerStage": {
"stage": "project",
"planNodeId": 1.0,
"nReturned": 405.0,
"executionTimeMillisEstimate": 0.0,
"opens": 405.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 0.0,
"projections": {
"12": "s11 "
},
"inputStage": {
"stage": "limit",
"planNodeId": 1.0,
"nReturned": 405.0,
"executionTimeMillisEstimate": 0.0,
"opens": 405.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 0.0,
"limit": 1.0,
"inputStage": {
"stage": "coscan",
"planNodeId": 1.0,
"nReturned": 405.0,
"executionTimeMillisEstimate": 0.0,
"opens": 405.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 0.0
}
}
},
"innerStage": {
"stage": "ixseek",
"planNodeId": 1.0,
"nReturned": 1703250.0,
"executionTimeMillisEstimate": 3744.0,
"opens": 405.0,
"closes": 1.0,
"saveState": 1703.0,
"restoreState": 1703.0,
"isEOF": 0.0,
"indexName": "test_speed",
"keysExamined": 1703250.0,
"seeks": 405.0,
"numReads": 1703250.0,
"recordSlot": 9.0,
"recordIdSlot": 10.0,
"seekKeySlotLow": 12.0,
"outputSlots": [
Long(
"8"
)
],
"indexKeysToInclude": "00000000000000000000000000001000"
}
}
}
}
]
}
}
}
}
}
}
}
}
}
},