Aggregation group reaching Disk Util% up to 100%

Hello,

i’m working on a M30 cluster where one of the aggregation request make the “Disk Util%” rising up to 100%.

it seems to be the Group part that is the problem :


db.getCollection('users').aggregate([
        {$match: {date: {$gt: ISODate("2023-05-24 08:01:08.604Z")}} },
    {$lookup: { from: 'courses', localField: '_id', foreignField: 'uid', as: 'courses'  } },
    {$unwind: '$courses' },
    {$match: { 'courses.distance': {$gt: 0.1 } } },
     {$group: {_id: '$courses.uid',
      nb: {$sum: 1},
      totalDuration: {$sum: '$courses.duration'},
      totalDistance: {$sum: '$courses.distance' },
      v: {$sum: '$courses.info.v'},
      s: {$sum: '$courses.info.s'},
      d: {$sum: '$courses.info.d'},
      b: {$sum: '$courses.info.b'},
      u: {$sum: '$courses.info.u'},
    }}
    ])

here is the Explain :

{
    "explainVersion" : "1",
    "stages" : [ 
        {
            "$cursor" : {
                "queryPlanner" : {
                    "namespace" : "redacted.users",
                    "indexFilterSet" : false,
                    "parsedQuery" : {
                        "date" : {
                            "$gt" : ISODate("2023-05-24T08:01:08.604Z")
                        }
                    },
                    "queryHash" : "9890BE05",
                    "planCacheKey" : "23BBE46F",
                    "maxIndexedOrSolutionsReached" : false,
                    "maxIndexedAndSolutionsReached" : false,
                    "maxScansToExplodeReached" : false,
                    "winningPlan" : {
                        "stage" : "PROJECTION_DEFAULT",
                        "transformBy" : {
                            "_id" : 1,
                            "courses.duration" : 1,
                            "courses.info.v" : 1,
                            "courses.info.s" : 1,
                            "courses.info.d" : 1,
                            "courses.info.b" : 1,
                            "courses.info.u" : 1,
                            "courses.distance" : 1,
                            "courses.uid" : 1
                        },
                        "inputStage" : {
                            "stage" : "FETCH",
                            "inputStage" : {
                                "stage" : "IXSCAN",
                                "keyPattern" : {
                                    "date" : -1
                                },
                                "indexName" : "date_-1",
                                "isMultiKey" : false,
                                "multiKeyPaths" : {
                                    "date" : []
                                },
                                "isUnique" : false,
                                "isSparse" : false,
                                "isPartial" : false,
                                "indexVersion" : 2,
                                "direction" : "forward",
                                "indexBounds" : {
                                    "date" : [ 
                                        "[new Date(9223372036854775807), new Date(1684915268604))"
                                    ]
                                }
                            }
                        }
                    },
                    "rejectedPlans" : []
                },
                "executionStats" : {
                    "executionSuccess" : true,
                    "nReturned" : 5790,
                    "executionTimeMillis" : 160018,
                    "totalKeysExamined" : 5790,
                    "totalDocsExamined" : 5790,
                    "executionStages" : {
                        "stage" : "PROJECTION_DEFAULT",
                        "nReturned" : 5790,
                        "executionTimeMillisEstimate" : 218,
                        "works" : 5791,
                        "advanced" : 5790,
                        "needTime" : 0,
                        "needYield" : 0,
                        "saveState" : 15,
                        "restoreState" : 15,
                        "isEOF" : 1,
                        "transformBy" : {
                            "_id" : 1,
                            "courses.duration" : 1,
                            "courses.info.v" : 1,
                            "courses.info.s" : 1,
                            "courses.info.d" : 1,
                            "courses.info.b" : 1,
                            "courses.info.u" : 1,
                            "courses.distance" : 1,
                            "courses.uid" : 1
                        },
                        "inputStage" : {
                            "stage" : "FETCH",
                            "nReturned" : 5790,
                            "executionTimeMillisEstimate" : 217,
                            "works" : 5791,
                            "advanced" : 5790,
                            "needTime" : 0,
                            "needYield" : 0,
                            "saveState" : 15,
                            "restoreState" : 15,
                            "isEOF" : 1,
                            "docsExamined" : 5790,
                            "alreadyHasObj" : 0,
                            "inputStage" : {
                                "stage" : "IXSCAN",
                                "nReturned" : 5790,
                                "executionTimeMillisEstimate" : 6,
                                "works" : 5791,
                                "advanced" : 5790,
                                "needTime" : 0,
                                "needYield" : 0,
                                "saveState" : 15,
                                "restoreState" : 15,
                                "isEOF" : 1,
                                "keyPattern" : {
                                    "date" : -1
                                },
                                "indexName" : "date_-1",
                                "isMultiKey" : false,
                                "multiKeyPaths" : {
                                    "date" : []
                                },
                                "isUnique" : false,
                                "isSparse" : false,
                                "isPartial" : false,
                                "indexVersion" : 2,
                                "direction" : "forward",
                                "indexBounds" : {
                                    "date" : [ 
                                        "[new Date(9223372036854775807), new Date(1684915268604))"
                                    ]
                                },
                                "keysExamined" : 5790,
                                "seeks" : 1,
                                "dupsTested" : 0,
                                "dupsDropped" : 0
                            }
                        }
                    },
                    "allPlansExecution" : []
                }
            },
            "nReturned" : NumberLong(5790),
            "executionTimeMillisEstimate" : NumberLong(221)
        }, 
        {
            "$lookup" : {
                "from" : "courses",
                "as" : "courses",
                "localField" : "_id",
                "foreignField" : "uid",
                "let" : {},
                "pipeline" : [ 
                    {
                        "$match" : {
                            "distance" : {
                                "$gt" : 0.1
                            }
                        }
                    }
                ],
                "unwinding" : {
                    "preserveNullAndEmptyArrays" : false
                }
            },
            "totalDocsExamined" : NumberLong(441038),
            "totalKeysExamined" : NumberLong(441038),
            "collectionScans" : NumberLong(0),
            "indexesUsed" : [ 
                "uid_1"
            ],
            "nReturned" : NumberLong(433350),
            "executionTimeMillisEstimate" : NumberLong(158406)
        }, 
        {
            "$group" : {
                "_id" : "$courses.userId",
                "nb" : {
                    "$sum" : {
                        "$const" : 1.0
                    }
                },
                "totalDuration" : {
                    "$sum" : "$courses.duration"
                },
                "totalDistance" : {
                    "$sum" : "$courses.distance"
                },
                "v" : {
                    "$sum" : "$courses.info.v"
                },
                "s" : {
                    "$sum" : "$courses.info.s"
                },
                "d" : {
                    "$sum" : "$courses.info.d"
                },
                "b" : {
                    "$sum" : "$courses.info.b"
                },
                "u" : {
                    "$sum" : "$courses.info.u"
                }
            },
            "maxAccumulatorMemoryUsageBytes" : {
                "nb" : NumberLong(456640),
                "totalDuration" : NumberLong(456640),
                "totalDistance" : NumberLong(456640),
                "v" : NumberLong(456640),
                "s" : NumberLong(456640),
                "d" : NumberLong(456640),
                "b" : NumberLong(456640),
                "u" : NumberLong(456640)
            },
            "totalOutputDataSizeBytes" : NumberLong(3772988),
            "usedDisk" : false,
            "spills" : NumberLong(0),
            "nReturned" : NumberLong(5708),
            "executionTimeMillisEstimate" : NumberLong(160000)
        }
    ],
    "serverInfo" : {
        "host" : "redacted",
        "port" : redacted,
        "version" : "6.0.6",
        "gitVersion" : "redacted"
    },
    "serverParameters" : {
        "internalQueryFacetBufferSizeBytes" : 104857600,
        "internalQueryFacetMaxOutputDocSizeBytes" : 104857600,
        "internalLookupStageIntermediateDocumentMaxSizeBytes" : 104857600,
        "internalDocumentSourceGroupMaxMemoryBytes" : 104857600,
        "internalQueryMaxBlockingSortMemoryUsageBytes" : 104857600,
        "internalQueryProhibitBlockingMergeOnMongoS" : 0,
        "internalQueryMaxAddToSetBytes" : 104857600,
        "internalDocumentSourceSetWindowFieldsMaxMemoryBytes" : 104857600
    },
    "command" : {
        "aggregate" : "users",
        "pipeline" : [ 
            {
                "$match" : {
                    "date" : {
                        "$gt" : ISODate("2023-05-24T08:01:08.604Z")
                    }
                }
            }, 
            {
                "$lookup" : {
                    "from" : "courses",
                    "localField" : "_id",
                    "foreignField" : "uid",
                    "as" : "courses"
                }
            }, 
            {
                "$unwind" : "$courses"
            }, 
            {
                "$match" : {
                    "courses.distance" : {
                        "$gt" : 0.1
                    }
                }
            }, 
            {
                "$group" : {
                    "_id" : "$courses.uid",
                    "nb" : {
                        "$sum" : 1.0
                    },
                    "totalDuration" : {
                        "$sum" : "$courses.duration"
                    },
                    "totalDistance" : {
                        "$sum" : "$courses.distance"
                    },
                    "v" : {
                        "$sum" : "$courses.info.v"
                    },
                    "s" : {
                        "$sum" : "$courses.info.s"
                    },
                    "d" : {
                        "$sum" : "$courses.info.d"
                    },
                    "b" : {
                        "$sum" : "$courses.info.b"
                    },
                    "u" : {
                        "$sum" : "$courses.info.u"
                    }
                }
            }
        ],
        "cursor" : {
            "batchSize" : 1.0
        },
        "$db" : "redacted"
    },
    "ok" : 1.0,
    "$clusterTime" : {
        "clusterTime" : Timestamp(1685022863, 6),
        "signature" : {
            "hash" : { "$binary" : "zeFocRuBcH7U2lePwakQsnU8yCg=", "$type" : "00" },
            "keyId" : NumberLong(7193055453171941378)
        }
    },
    "operationTime" : Timestamp(1685022863, 6)
}

am i doing something wrong in this query ? or is this normal ?

Sincerly,

Yann.

A $group stage is blocking in a sense that all incoming documents are processed before one outgoing document is produced.

I always try to $group on a smaller subset.

In your case, I think you could move your $group inside a pipeline in your $lookup. This way the $group of each user can be produced right away. This way you may avoid a much bigger $group at the end, you would also avoid an $unwind. This $lookup would look like:

{ "$lookup" : {
    "from" : "courses" ,
    "localField" : "_id" ,
    "foreignField" : "uid" ,
    "as" : "courses" ,
    "pipeline" : [
        { "$match" : { "distance" : { "$gt" : 0.1 } } } ,
        { "$group" : {
            "_id" : null ,
            "nb" : { "$sum" : 1 } ,
            "totalDuration" : { "$sum" : "$duration" } ,
            "totalDistance" : { "$sum" : "$distance" } ,
            "v" : { "$sum" : "$info.v" } ,
            /* ... */
        } }
    ]
} }
1 Like

Thanks a lot for the answer .
it work very very well.

Thanks again.

Yann.

1 Like