Making a $group with multiple $sum more efficient

Hi all,

I’m trying to figure out how to make this query more efficient. I have indices set up on each of the fields I use in any of the $match parts of the query. The $group seems to bear the heavy load by looking at a full explain. Any ideas would be very welcome.

db.getCollection("mycollection").aggregate([
      {
        "$match": {
          "person.isVerified": true
        }
      },
      {
        "$match": {
          "skipped": false
        }
      },
      {
        "$match": {
          "result.secondsToComplete": {
            "$gt": 0
          }
        }
      },
      {
        "$match": {
          "creationDate": {
            "$gt": ISODate("2022-01-02T00:00:00Z"),
            "$lt": ISODate("2022-09-01T23:59:59.999Z")
            
          }
        }
      },
      {
        "$group": {
          "_id": "$person.id",
          "sessionDate": {
            "$max": "$creationDate"
          },
          "completions": {
            "$sum": {
              "$ifNull": [
                "$completions",
                1
              ]
            }
          },
          "testsMade": {
            "$sum": "$result.makes"
          },
          "testsTaken": {
            "$sum": "$result.attempts"
          },
          "ftMade": {
            "$sum": "$result.ftMade"
          },
          "ftTaken": {
            "$sum": "$result.ftTaken"
          },
          "fgTaken": {
            "$sum": "$result.fgTaken"
          },
          "fgMade": {
            "$sum": "$result.fgMade"
          },
          "threeMade": {
            "$sum": "$result.threeMade"
          },
          "threeTaken": {
            "$sum": "$result.threeTaken"
          },
          "twoMade": {
            "$sum": "$result.twoMade"
          },
          "twoTaken": {
            "$sum": "$result.twoTaken"
          },
          "longesttestsTestStreak": {
            "$max": "$result.testsTestStreak"
          },
          "firstDate": {
            "$min": "$result.firstDate"
          },
          "lastDate": {
            "$max": "$result.lastDate"
          },
          "secondsToCompleteTest": {
            "$min": "$result.secondsToComplete"
          },
          "firstName": {
            "$max": "$person.snapShot.firstName"
          },
          "lastName": {
            "$max": "$person.snapShot.lastName"
          },
          "personName": {
            "$max": "$person.snapShot.personName"
          },
          "personNameLastFirst": {
            "$max": "$person.snapShot.personNameLastFirst"
          },
          "metaTag": {
            "$max": "$person.snapShot.metaTag"
          },
          "membership": {
            "$max": "$person.snapShot.membership"
          }
        }
      },
      {
        "$addFields": {
          "hasFt": {
            "$cmp": [
              {
                "$ifNull": [
                  "$ftTaken",
                  0
                ]
              },
              0
            ]
          },
          "hasFg": {
            "$cmp": [
              {
                "$ifNull": [
                  "$fgTaken",
                  0
                ]
              },
              0
            ]
          },
          "hasTwo": {
            "$cmp": [
              {
                "$ifNull": [
                  "$twoTaken",
                  0
                ]
              },
              0
            ]
          },
          "hasThree": {
            "$cmp": [
              {
                "$ifNull": [
                  "$threeTaken",
                  0
                ]
              },
              0
            ]
          },
          "hasAttempts": {
            "$cmp": [
              {
                "$ifNull": [
                  "$testsTaken",
                  0
                ]
              },
              0
            ]
          }
        }
      },
      {
        "$addFields": {
          "freeThrowPercentage": {
            "$cond": [
              "$hasFt",
              {
                "$multiply": [
                  {
                    "$divide": [
                      "$ftMade",
                      "$ftTaken"
                    ]
                  },
                  100
                ]
              },
              null
            ]
          },
          "fieldGoalPercentage": {
            "$cond": [
              "$hasFg",
              {
                "$multiply": [
                  {
                    "$divide": [
                      "$fgMade",
                      "$fgTaken"
                    ]
                  },
                  100
                ]
              },
              null
            ]
          },
          "fourPointPercentage": {
            "$cond": [
              "$hasTwo",
              {
                "$multiply": [
                  {
                    "$divide": [
                      "$twoMade",
                      "$twoTaken"
                    ]
                  },
                  100
                ]
              },
              null
            ]
          },
          "fivePointPercentage": {
            "$cond": [
              "$hasThree",
              {
                "$multiply": [
                  {
                    "$divide": [
                      "$threeMade",
                      "$threeTaken"
                    ]
                  },
                  100
                ]
              },
              null
            ]
          },
          "overallPercentage": {
            "$cond": [
              "$hasAttempts",
              {
                "$multiply": [
                  {
                    "$divide": [
                      "$testsMade",
                      "$testsTaken"
                    ]
                  },
                  100
                ]
              },
              null
            ]
          }
        }
      },
      {
        "$sort": {
          "testsTaken": -1
        }
      }
    ], {"allowDiskUse": true})


Consolidate your $match into a single one. May be mongod do this by itself but it could reduce the server work load.

A $sort after a $group cannot use the index.

In your first $addFields, you compute flags in order to use them in the following $addFields. You could forgo the first $addFields by using the expressions directly in the second. In principal, the memory used to compute an expression is released as soon as the expression is evaluated. The memory used by your new fields exists until the document is out of the pipeline.

1 Like