Calculating percentiles

Hey everyone, I’m working on a side project where I display the statistics for football players, and I currently have a database with all their statistics and numbers. I planned to create a spider chart on which different players can be compared. However, I ran into a scaling issue, distance run and shots/90 don’t fit well on the same spider chart. I figured I could make the spider chart percentile-based, which would make them all be set to the same scale (1 to 100). I started working on calculating the percentile based on the player requested, which has gone fine, but I was wondering if there was a way to find the percentile for all fields at once instead of having to do them individually. Thanks for your time.

Do you have your current query and sample documents? This makes it easier for people to play with a query using your data.
You can use the “Preformatted Text” button in the editor to style your code and document fragments so they can be easily copied.

Thanks for the reply, John. Here’s what one of my documents looks like.

{
  "_id": {
    "$oid": "645fffcf1119f5fafce46a24"
  },
  "name": "Kieran Tierney",
  "club": "Arsenal",
  "season": "2021-22",
  "position": "FB",
  "standard_stats": {
    "name": "Kieran Tierney",
    "club": "Arsenal",
    "season": "2021-22",
    "position": "FB",
    "league": "Premier League",
    "goals90": 0.05,
    "assists90": 0.14,
    "goalsAndAssits90": 0.19,
    "nonPenGoals90": 0.05,
    "penScored": 0,
    "pensTaken": 0,
    "yellow90": 0,
    "red90": 0,
    "xG90": 0.03,
    "nonPenXG90": 0.03,
    "xAG": 0.08,
    "nonPenXGAG90": 0.11,
    "progCarries90": 2.91,
    "progressivePass90": 4.93,
    "progPassRec90": 6.02
  },
  "shooting_stats": {
    "name": "Kieran Tierney",
    "club": "Arsenal",
    "season": "2021-22",
    "position": "FB",
    "league": "Premier League",
    "shots90": 0.71,
    "shotOnTarget90": 0.24,
    "shotOnTargetPercent90": "33.3%",
    "goalPerShot90": 0.07,
    "goalPerShotOnTarget90": 0.2,
    "avgShotDistance": 20.8,
    "shotsFreeKicks": 0,
    "nonPenXGPerShot90": 0.04,
    "goalsMinusXG": "+0.02",
    "nonPenGoalsMinusXG": "+0.02"
  },
  "passing_stats": {
    "name": "Kieran Tierney",
    "club": "Arsenal",
    "season": "2021-22",
    "position": "FB",
    "league": "Premier League",
    "passesCompleted90": 42.3,
    "passesAttempted90": 54.75,
    "passCompletionPercent": 77.3,
    "totalPassingDistance90": 765.4,
    "progressivePassingDistance90": 244.72,
    "shortPassesCompleted90": 17.81,
    "shortPassesAttempted90": 20.07,
    "shortPassesCompletionPercent": "88.8%",
    "mediumPassesCompleted90": 20.26,
    "mediumPassesAttempted90": 24.11,
    "mediumPassesCompletionPercent": "84.0%",
    "longPassesCompleted90": 3.85,
    "longPassesAttempted90": 8.18,
    "longPassesCompletionPercent": "47.1%",
    "xA90": 0.08,
    "keyPasses90": 0.94,
    "passesFinalThird90": 3.81,
    "passesPenaltyArea90": 1.13,
    "crossesPenaltyArea90": 0.56
  },
  "pass_types": {
    "name": "Kieran Tierney",
    "club": "Arsenal",
    "season": "2021-22",
    "position": "FB",
    "league": "Premier League",
    "liveBallPasses90": 45.82,
    "deadBallPasses90": 8.6,
    "freeKickPasses": 0.33,
    "throughBalls90": 0,
    "switches90": 0.61,
    "crosses90": 3.71,
    "throwIns90": 8.22,
    "cornerKicks90": 0.05,
    "inswingingCorners90": 0,
    "outswingingCorners90": 0,
    "straightCorners90": 0,
    "passesOffside90": 0.33,
    "passesBlockedByOpp90": 1.03
  },
  "shot_goalCreation": {
    "name": "Kieran Tierney",
    "club": "Arsenal",
    "season": "2021-22",
    "position": "FB",
    "league": "Premier League",
    "shotCreateAct90": 2.07,
    "liveBallShotCreateAct90": 1.69,
    "deadBallShotCreateAct90": 0.24,
    "takeOnShotCreateAct90": 0.05,
    "shotsLeadingToNewShot90": 0.05,
    "foulsLeadingToShot90": 0.05,
    "defendingActionsLeadingToShot90": 0,
    "goalCreateAct90": 0.09,
    "liveBallGoalCreateAct90": 0.09,
    "deadBallGoalCreateAct90": 0,
    "takeOnGoalCreateAct90": 0,
    "shotsLeadingToGoal90": 0,
    "foulsLeadingToGoal90": 0,
    "defendingActionsLeadingToGoal90": 0
  },
  "defensive_stats": {
    "name": "Kieran Tierney",
    "club": "Arsenal",
    "season": "2021-22",
    "position": "FB",
    "league": "Premier League",
    "tacklesAttempted90": 0.85,
    "tacklesWon90": 0.66,
    "tacklesInDef3rd90": 0.52,
    "tacklesInMiddle3rd90": 0.33,
    "tacklesInAttack3rd90": 0,
    "dribblersTackled90": 0.47,
    "dribblersChallenged90": 0.52,
    "percentDribblersTackled90": "90.9%",
    "challengesLost90": 0.05,
    "blocks90": 1.08,
    "shotsBlocked90": 0.24,
    "passesBlocke90": 0.85,
    "interceptions90": 0.71,
    "tacklesAndInterceptions90": 1.55,
    "clearances90": 2.54,
    "errors90": 0
  },
  "possession_stats": {
    "name": "Kieran Tierney",
    "club": "Arsenal",
    "season": "2021-22",
    "position": "FB",
    "league": "Premier League",
    "touches90": 63.31,
    "touchesInDefPenArea90": 2.91,
    "touchesInDef3rd90": 15.89,
    "touchesInMiddle3rd90": 27.31,
    "touchesInAttack3rd90": 20.49,
    "touchesInAttackPenArea90": 1.64,
    "liveBallTouches90": 63.31,
    "takeOnsAttempted90": 1.08,
    "successfulTakeOns90": 0.42,
    "successfulTakeOnPercent": "39.1%",
    "timesTackledInTakeOn90": 0.66,
    "tackledInTakeOnPercent": "60.9%",
    "totalCarryingDistance90": 176.33,
    "progressiveCarryingDistance90": 102.78,
    "carriesIntoFinal3rd90": 1.5,
    "carriesIntoPenArea90": 0.33,
    "carries90": 34.31,
    "miscontrols90": 0.85,
    "dispossessed90": 0.38,
    "passesReceived90": 39.1
  },
  "other_stats": {
    "name": "Kieran Tierney",
    "club": "Arsenal",
    "season": "2021-22",
    "position": "FB",
    "league": "Premier League",
    "secondYellow90": 0,
    "foulsCommitted90": 0.24,
    "foulsDrawn90": 0.75,
    "offsides90": 0.05,
    "penaltiesWon90": 0,
    "penaltiesGivenAway90": 0,
    "ownGoals90": 0,
    "ballRecoveries90": 3.99,
    "aerialsWon90": 0.56,
    "aerialsLost90": 1.08,
    "percentAerialsWon": "34.3%"
  }
}

And here’s what the query in the backend looks like.

const result = await db.collection('PremierLeague2022-21Big6').aggregate([
      {
        $sort: {"shooting_stats.shots90": 1}
      },
      {
        $group: {
          _id: null,
          data: { $push: "$shooting_stats.shots90" },
          count: { $sum: 1 }
        }
      },]).toArray();

I did have a play and got some $map running and was thinking about $facet but I’m not sure I understand the calcs you want to do exactly.

So for the example aggregate you shared, for each player, you want to calculate shots90 / total number of players? Or do you want to calculate shots90 for a player compared to the average of all players?

The calc I’m trying to do is find what percentile specific stats for players are in based on my database. For example, what percentile is his assists90 in. I have figured out how to do this for individual statistics, but I was wondering if I could do this for all of the statistics. My current implementation returns a sorted list of all the numbers for a specific stat(goals90) and from there I can calculate the percentile. I’m trying to see if I could replicate this for all other statistics without having to use the same lines of code over and over

What version of Mongo are you running? I just saw this…

What’s the current query you’re running to calculate the percentile?

Thank you for this. I’ll look into it further, but it seems to require me to specify the percentile instead of me giving it the number and then it telling me the percentile that number is in

I’m currently finding the percentile in the frontend. So right now I have it returning the array t

Depending on how many players…it may be worth keeping a collection of percentile ranges for each stat…and just recalculate it as players update, or if you do a bulk update once a day / week, refresh it then, you could then join/lookup the collection onto the player(s) as you get them and work out which of the percentiles the players stat falls into.

I figured as much. Do you have any ideas on how I could make a collection of the percentiles quickly?

Also, I’m running Mongodb version 5.7.0 and mongoose version 7.4.3. I just saw I needed to be on version 7. When you say Mongo are you referring to Mongodb or is mongo a separate thing I need to install?

I found a SO article that has a few options:

Depending on what version you’re running you have a few options.

Thanks for these. I’ll test them out and post what works