Updating nested array fields with values from another field in the nested document

I reached a dead end with the (not so) simple and slow solution.

I have no clue on how to use $group to reconstruct the 2 level of arrays.

So here is something that seems to work:

pipeline = []

/* same lookup as above but without $unwind */
lookup_riders = {
    '$lookup': {
      from: 'riders',
      localField: 'sessions.entries.rd_id',
      foreignField: 'rd_id',
      as: '_tmp.riders'
    }
  }

pipeline.push( lookup_riders )

/* The meat of the solution in a big stew */

set_sessions = { "$set": {
    "_tmp.sessions": { "$map": {
    "input": "$sessions",
    "as": "session",
    "in": { "$mergeObjects": [
        "$$session" ,
        { "entries" : { "$map" : {
            "input" : "$$session.entries" ,
            "as" : "entry" ,
            "in" : { "$mergeObjects" : [
                "$$entry" ,
                { "_id" : { "$reduce" : {
                    "input" : "$_tmp.riders" ,
                    "initialValue" : null ,
                    "in" : { "$cond" : [
                        { "$eq" : [ "$$entry.rd_id" , "$$this.rd_id" ] } ,
                        "$$this._id" ,
                        "$$value"
                    ] }
                } } }
            ] }
        } } }
     ] }
  } }
} }

pipeline.push( set_sessions )

Note, I put the results in _tmp because it is easier to debug when you do not overwrite right away the original values. So what remains to do is to replace the original sessions with _tmp.sessions with

{ "$set" : { "sessions" : "$_tmp.sessions" } }

and the clean up with

{ "$unset" : [ "_tmp" , "sessions.entries.rd_id" ] }

For those who hate big code stew my real code uses functions to divide the code in more manageable units.

function pipeline()  {
    return [
        lookup_riders() ,
        set_tmp_sessions() ,
        replace_sessions() ,
        cleanup()
    ] ;
}

function lookup_riders()  {
   return { "$lookup": {
      "from": 'riders',
      "localField": "sessions.entries.rd_id",
      "foreignField": "rd_id",
      "as": "_tmp.riders"
  }  } ;
}

function set_tmp_sessions() {
    return { "$set" : {
        "_tmp.sessions"  : map_sessions()
    } }
}

function map_sessions() {
   return { "$map" : {
      "input" : "$sessions" ,
      "as" : "session" ,
      "in" : { "$mergeObjects" : [
         "$$session" ,
         { "entries" : map_entries() }
      ] }
   } } ;
}

function map_entries() {
   return { "$map" : {
      "input" : "$$session.entries" ,
      "as" : "entry" ,
      "in" : { "$mergeObjects" : [
         "$$entry" ,
         { "_id" : reduce_riders() }
      ] }
   } } ;
}

function reduce_riders() {
   return { "$reduce" : {
      "input" : "$_tmp.riders" ,
      "initialValue" : null ,
      "in" : { "$cond" : [
         { "$eq" : [ "$$entry.rd_id" , "$$this.rd_id" ] } ,
         "$$this._id" ,
         "$$value"
      ] }
   } } ;
}

function replace_sessions() {
    return { "$set" : { "sessions" : "$_tmp.sessions" } } ;
}

function cleanup() {
    return { "$unset" : [ "_tmp" , "sessions.entries.rd_id" ] } ;
}

db.races.aggregate( pipeline() ) 

This way curly braces are easier to matched. When I make a mistake in one function, I simply edit a smaller part of code rather than the big stew.

I recommend that you $out into a temporary collection, check the result and then $merge into the original.

AT YOUR OWN RISK

2 Likes