Morning all,
Been recently trying to troubleshoot a big slowdown in my aggregation pipeline stage, when FETCH is performed. A sneak peak into what documents look like before going into further details:
{
"_id" : ObjectId("..."),
"date" : ISODate("..."),
"unique_id" : "field_a=value_a+field_b=value_b...+field_n=value_n", // Unique index using this field together with date
"is_valid": false,
"name": "My new Document",
"extra_attributes" : [
{
"attribute": "", // Used to return all set using the index - if there's a better way to do this please let me know!
"value": ""
},
{
"attribute" : "field_a",
"value" : "value_a"
},
{
"attribute" : "field_b",
"value" : "value_b"
},
...
],
"count_a": NumberLong("0"),
"count_b": NumberLong("0"),
...
"count_n": NumberLong("0")
}
In order to have a smaller working set I’ve locally replicated some of the data, approx 1M documents with average size per document of 1.2KB and with an overall collection size of 200GB.
A couple of indexes are found in this collection:
- date: 1 + name: 1 + unique_id: 1, is_valid: 1 [unique index used for inserts] | Size: 100MB
- date: 1 + extra_attributes.attribute: 1 + extra_attributes.value: 1 + is_valid: 1 + name: 1 [read index] | Size: 90MB
Local testing shows the following times for the executionStats
only having:
IXSCAN (1.4s) → FETCH (7s) → …
Fetching is really bringing down my aggregation times, and the same goes when the $group is added. To return the amount of documents mentioned the times go way above 30s.
Would it be just a matter of resources or am I doing something wrong at data model level?
Here is the executionStats
output:
{
"explainVersion" : "1",
"queryPlanner" : {
"namespace" : "my_db.my_collection",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"extra_attributes.attribute" : {
"$eq" : ""
}
},
{
"extra_attributes.value" : {
"$eq" : ""
}
},
{
"date" : {
"$lt" : ISODate("...")
}
},
{
"date" : {
"$gte" : ISODate("...")
}
},
{
"is_valid" : {
"$in" : [
false,
true
]
}
}
]
},
"optimizedPipeline" : true,
"maxIndexedOrSolutionsReached" : false,
"maxIndexedAndSolutionsReached" : false,
"maxScansToExplodeReached" : false,
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"extra_attributes.attribute" : {
"$eq" : ""
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"date" : 1,
"extra_attributes.attribute" : 1,
"extra_attributes.value" : 1,
"is_valid" : 1,
"name" : 1
},
"indexName" : "read_index_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"date" : [ ],
"extra_attributes.attribute" : [ "extra_attributes" ],
"extra_attributes.value" : [ "extra_attributes" ],
"is_valid" : [ ],
"name" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"date" : [ "[new Date(...), new Date(...))" ],
"extra_attributes.attribute" : [ "[\"\", \"\"]" ],
"extra_attributes.value" : [ "[MinKey, MaxKey]" ],
"is_valid" : [ "[false, false]", "[true, true]" ],
"name" : [ "[MinKey, MaxKey]" ]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1037256,
"executionTimeMillis" : 12311,
"totalKeysExamined" : 1037265,
"totalDocsExamined" : 1037256,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"extra_attributes.attribute" : {
"$eq" : ""
}
},
"nReturned" : 1037256,
"executionTimeMillisEstimate" : 7658,
"works" : 1037266,
"advanced" : 1037256,
"needTime" : 9,
"needYield" : 0,
"saveState" : 1220,
"restoreState" : 1220,
"isEOF" : 1,
"docsExamined" : 1037256,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1037256,
"executionTimeMillisEstimate" : 1388,
"works" : 1037266,
"advanced" : 1037256,
"needTime" : 9,
"needYield" : 0,
"saveState" : 1220,
"restoreState" : 1220,
"isEOF" : 1,
"keyPattern" : {
"date" : 1,
"extra_attributes.attribute" : 1,
"extra_attributes.value" : 1,
"is_valid" : 1,
"name" : 1
},
"indexName" : "read_index_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"date" : [ ],
"extra_attributes.attribute" : [ "extra_attributes" ],
"extra_attributes.value" : [ "extra_attributes" ],
"is_valid" : [ ],
"name" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"date" : [ "[new Date(...), new Date(...))" ],
"extra_attributes.attribute" : [ "[\"\", \"\"]" ],
"extra_attributes.value" : [ "[MinKey, MaxKey]" ],
"is_valid" : [ "[false, false]", "[true, true]" ],
"name" : [ "[MinKey, MaxKey]" ]
},
"keysExamined" : 1037265,
"seeks" : 10,
"dupsTested" : 1037256,
"dupsDropped" : 0
}
}
},
"command" : {
"aggregate" : "my_collection",
"pipeline" : [
{
"$match" : {
"is_valid" : {
"$in" : [
true,
false
]
},
"date" : {
"$gte" : ISODate("..."),
"$lt" : ISODate("...")
},
"extra_attributes.attribute" : "",
"extra_attributes.value" : ""
}
}
],
"cursor" : {
},
"$db" : "my_db"
},
"serverInfo" : {
"port" : 27017,
"version" : "5.0.6"
},
"serverParameters" : {
"internalQueryFacetBufferSizeBytes" : 104857600,
"internalQueryFacetMaxOutputDocSizeBytes" : 104857600,
"internalLookupStageIntermediateDocumentMaxSizeBytes" : 104857600,
"internalDocumentSourceGroupMaxMemoryBytes" : 104857600,
"internalQueryMaxBlockingSortMemoryUsageBytes" : 104857600,
"internalQueryProhibitBlockingMergeOnMongoS" : 0,
"internalQueryMaxAddToSetBytes" : 104857600,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes" : 104857600
},
"ok" : 1
}
Thanks in advance!