Mongo DB aggregation optimization

I have an application in which I am using MongoDB. I have 1.1M documents in a single collection and I am trying to do some aggregations, my document structure looks like this:

image

My aggregation query looks like this:

db.spinHistory.explain().aggregate([
  {
    "$match": {
      "gameRef": "6047a10c58ed573e490b8f54"
    }
  },
  {
    "$project": {
      "platformRef": 1,
      "gameRef": 1,
      "currency": 1,
      "win": 1,
      "bet": 1,
      "bonusWin": "$data.bonusWin",
      "_id": 0
    }
  },
  {
    "$group": {
      "_id": {
        "platformRef": "$platformRef",
        "gameRef": "$gameRef",
        "currency": "$currency"
      },
      "bet": {
        "$sum": "$bet"
      },
      "win": {
        "$sum": "$win"
      },
      "bonus": {
        "$sum": "$data.bonusWin"
      },
      "count": {
        "$sum": 1
      }
    }
  },
  {
    "$project": {
      "platformRef": "$_id.platformRef",
      "gameRef": "$_id.gameRef",
      "currency": "$_id.currency",
      "bet": 1,
      "win": 1,
      "bonus": 1,
      "count": 1
    }
  }
])

This query takes 5 seconds to execute (in 1.1M documents). I wonder if there is any way to optimize it?

I have this index set up:

{
  "v": 2,
  "key": {
    "gameRef": 1,
    "platformRef": 1,
    "currency": 1,
    "bet": 1,
    "win": 1,
    "data.bonusWin": 1
  },
  "name": "idx_spin_history_main_fields",
  "background": false
}

and the explain plan gives me this:

{
  "stages": [
    {
      "$cursor": {
        "queryPlanner": {
          "plannerVersion": 1,
          "namespace": "oak9e_rgs_temp.spinHistory",
          "indexFilterSet": false,
          "parsedQuery": {
            "gameRef": {
              "$eq": "6047a10c58ed573e490b8f54"
            }
          },
          "queryHash": "27C08187",
          "planCacheKey": "E204EC8C",
          "winningPlan": {
            "stage": "PROJECTION_DEFAULT",
            "transformBy": {
              "bet": true,
              "platformRef": true,
              "win": true,
              "currency": true,
              "gameRef": true,
              "bonusWin": "$data.bonusWin",
              "_id": false
            },
            "inputStage": {
              "stage": "IXSCAN",
              "keyPattern": {
                "gameRef": 1,
                "platformRef": 1,
                "currency": 1,
                "bet": 1,
                "win": 1,
                "data.bonusWin": 1
              },
              "indexName": "idx_spin_history_main_fields",
              "isMultiKey": false,
              "multiKeyPaths": {
                "gameRef": [],
                "platformRef": [],
                "currency": [],
                "bet": [],
                "win": [],
                "data.bonusWin": []
              },
              "isUnique": false,
              "isSparse": false,
              "isPartial": false,
              "indexVersion": 2,
              "direction": "forward",
              "indexBounds": {
                "gameRef": [
                  "[\"6047a10c58ed573e490b8f54\", \"6047a10c58ed573e490b8f54\"]"
                ],
                "platformRef": [
                  "[MinKey, MaxKey]"
                ],
                "currency": [
                  "[MinKey, MaxKey]"
                ],
                "bet": [
                  "[MinKey, MaxKey]"
                ],
                "win": [
                  "[MinKey, MaxKey]"
                ],
                "data.bonusWin": [
                  "[MinKey, MaxKey]"
                ]
              }
            }
          },
          "rejectedPlans": []
        }
      }
    },
    {
      "$group": {
        "_id": {
          "platformRef": "$platformRef",
          "gameRef": "$gameRef",
          "currency": "$currency"
        },
        "bet": {
          "$sum": "$bet"
        },
        "win": {
          "$sum": "$win"
        },
        "bonus": {
          "$sum": "$data.bonusWin"
        },
        "count": {
          "$sum": {
            "$const": 1
          }
        }
      }
    },
    {
      "$project": {
        "_id": true,
        "bet": true,
        "bonus": true,
        "count": true,
        "win": true,
        "platformRef": "$_id.platformRef",
        "gameRef": "$_id.gameRef",
        "currency": "$_id.currency"
      }
    }
  ],
  "serverInfo": {
    "host": "DESKTOP-V3NTFPM",
    "port": 27017,
    "version": "4.4.3",
    "gitVersion": "913d6b62acfbb344dde1b116f4161360acd8fd13"
  },
  "ok": 1
}

Please can you let me know if there is anything I can do to make this work faster?

Hi @Paulius_Matulionis and welcome in the MongoDB community :muscle: !

Firstly, your explain plan looks pretty good. There is no “FETCH” stage which means that only the index is used to answer your query which is pretty neat. We call this a “covered query”.

Secondly, I found an error in your pipeline and also the first $project stage is useless.

Errors/optimizations:

  • In the first project you are doing, you are transforming data.bonusWin into bonusWin. Then in the $group stage, you are using again data.bonusWin which no longer exists. It’s not a big deal as we are removing this useless stage.
  • You can remove the _id in the last $project stage as you are already projecting these fields to surface them.

Here is what your final pipeline should look like:

[
  {
    '$match': {
      'gameRef': '8bdbef7c0ad31c5805d258ca'
    }
  }, {
    '$group': {
      '_id': {
        'platformRef': '$platformRef', 
        'gameRef': '$gameRef', 
        'currency': '$currency'
      }, 
      'bet': {
        '$sum': '$bet'
      }, 
      'win': {
        '$sum': '$win'
      }, 
      'bonus': {
        '$sum': '$data.bonusWin'
      }, 
      'count': {
        '$sum': 1
      }
    }
  }, {
    '$project': {
      'platformRef': '$_id.platformRef', 
      'gameRef': '$_id.gameRef', 
      'currency': '$_id.currency', 
      'bet': 1, 
      'win': 1, 
      'bonus': 1, 
      'count': 1, 
      '_id': 0
    }
  }
]

Here is my index:

{
  "v": 2,
  "key": {
    "gameRef": 1,
    "platformRef": 1,
    "currency": 1,
    "bet": 1,
    "win": 1,
    "data.bonusWin": 1
  },
  "name": "gameRef_1_platformRef_1_currency_1_bet_1_win_1_data.bonusWin_1"
}

And my winning plan:

{
  "winningPlan": {
    "stage": "PROJECTION_DEFAULT",
    "transformBy": {
      "bet": 1,
      "currency": 1,
      "data.bonusWin": 1,
      "gameRef": 1,
      "platformRef": 1,
      "win": 1,
      "_id": 0
    },
    "inputStage": {
      "stage": "IXSCAN",
      "keyPattern": {
        "gameRef": 1,
        "platformRef": 1,
        "currency": 1,
        "bet": 1,
        "win": 1,
        "data.bonusWin": 1
      },
      "indexName": "gameRef_1_platformRef_1_currency_1_bet_1_win_1_data.bonusWin_1",
      "isMultiKey": false,
      "multiKeyPaths": {
        "gameRef": [],
        "platformRef": [],
        "currency": [],
        "bet": [],
        "win": [],
        "data.bonusWin": []
      },
      "isUnique": false,
      "isSparse": false,
      "isPartial": false,
      "indexVersion": 2,
      "direction": "forward",
      "indexBounds": {
        "gameRef": [
          "[\"8bdbef7c0ad31c5805d258ca\", \"8bdbef7c0ad31c5805d258ca\"]"
        ],
        "platformRef": [
          "[MinKey, MaxKey]"
        ],
        "currency": [
          "[MinKey, MaxKey]"
        ],
        "bet": [
          "[MinKey, MaxKey]"
        ],
        "win": [
          "[MinKey, MaxKey]"
        ],
        "data.bonusWin": [
          "[MinKey, MaxKey]"
        ]
      }
    }
  }
}

We have a way to verify that our query is covered. If I remove the bet field from my index, here is my winning plan now:

{
  "winningPlan": {
    "stage": "PROJECTION_DEFAULT",
    "transformBy": {
      "bet": 1,
      "currency": 1,
      "data.bonusWin": 1,
      "gameRef": 1,
      "platformRef": 1,
      "win": 1,
      "_id": 0
    },
    "inputStage": {
      "stage": "FETCH",
      "inputStage": {
        "stage": "IXSCAN",
        "keyPattern": {
          "gameRef": 1,
          "platformRef": 1,
          "currency": 1,
          "win": 1,
          "data.bonusWin": 1
        },
        "indexName": "gameRef_1_platformRef_1_currency_1_win_1_data.bonusWin_1",
        "isMultiKey": false,
        "multiKeyPaths": {
          "gameRef": [],
          "platformRef": [],
          "currency": [],
          "win": [],
          "data.bonusWin": []
        },
        "isUnique": false,
        "isSparse": false,
        "isPartial": false,
        "indexVersion": 2,
        "direction": "forward",
        "indexBounds": {
          "gameRef": [
            "[\"8bdbef7c0ad31c5805d258ca\", \"8bdbef7c0ad31c5805d258ca\"]"
          ],
          "platformRef": [
            "[MinKey, MaxKey]"
          ],
          "currency": [
            "[MinKey, MaxKey]"
          ],
          "win": [
            "[MinKey, MaxKey]"
          ],
          "data.bonusWin": [
            "[MinKey, MaxKey]"
          ]
        }
      }
    }
  }
}

As you can see, I have an additional stage FETCH now that is required because MongoDB needs to fetch the entire document on disk to retrieve this bet field that we are now missing.

The pipeline I shared above along with the index is the optimal way to run this query.
The only way left we could use to save a few milliseconds is to remove the final project stage that is just making our life more convenient in the application layer to map the results.

Make sure that ALL your indexes fit in RAM & that you have enough RAM left to run this query and it should be lightning fast. The only limitation at this point is the hardware & the configuration of your cluster & OS.

Cheers,
Maxime.

1 Like