Resolve and replace in-place references (almost working but complex query inside)

Hi,

I would like to create in-place generic references. These references would be able to exist either as an object or as an array of these references. A reference is generic in the sense that it is only identified by a property referenceId being present.

(The example from below available here: https://mongoplayground.net/p/CQJJ6q9F9k8 )

An example data-set:

[
  { "id": "1", "name": "First" },
  { 
    "id": "2", "name": "Second", 
    // in-place as object
    "ref1": { "referenceId": "3" },
    // or an array of these references
    "ref2": [ 
      { "referenceId": "1" },
      { "referenceId": "3" }
    ],
    "ref3": { "referenceId": "4" }
  },
  { "name": "Third", "id": "3" },
  { 
    "name": "Fourth", "id": "4",
    // reference here for more than one level of resolving id(2).ref3 -> id(4).ref3 -> id(3)
    // this will or will not be resolved depending on the order of results
    "ref3": [
      { "referenceId": "3" }
    ]
  }
]

Now, I have (more or less successfully) created a rather long and complex query resolving the references. However, I have two issues:

  • one: am I making things too complex? I am not able to decide what the run-time and memory complexity of this query looks like. I understand that a lot of times things can be modeled simpler or moved out of the db layer to the backend (or even front-end) resolving the references lazily.
  • two: how do I deal with the issue of the query being dependent on the order of results coming back? In my example, the second doc is referencing the fourth, which in turn references the third. While the reference in doc 4 will always be resolved, the reference in doc 4 (when inserted into doc 2) will only be resolved, if the referencing doc has been processed already.

The query follows at the end of this post (for the sake of readability). When invoked, it returns my (almost) correct result:

[
  {
    "id": "4", "name": "Fourth", 
    // referenceId 3 resolved
    "ref3": [ { "id": "3", "name": "Third" } ]
  },
  {
    "id": "2", "name": "Second",
    // referenceId 3 resolved
    "ref1": { "id": "3", "name": "Third" },
    // referenceId 1 and 3 resolved
    "ref2": [
      { "id": "1", "name": "First" },
      { "id": "3", "name": "Third" }
    ],
    "ref3": { 
      "id": "4", "name": "Fourth", 
      // unresolved
      "ref3": [ { "referenceId": "3" } ]
    }
  },
  { "id": "1", "name": "First" },
  { "id": "3", "name": "Third" }
]

As promised, here is the query. Again, I understand it is rather complex and long, but apart from the issue with “circular” references it actually does work :wink:

So, my question here is: am I fighting the system and should abandon the idea of having these generic references in-place or is there maybe a trick I could use to make it more efficient / working? Obivously,
without introducing more passes.

Any ideas would be very appreciated.

Cheers,

Chris

P.S.: I also came up with a simplified version of this but at the price of “freedom” when storing the data.

db.collection.aggregate([
  // filter out objects with referenceId inside
  {
    $addFields: {
      "references": {
        $map: {
          input: {
            "$objectToArray": "$$ROOT"
          },
          as: "item",
          in: {
            $cond: {
              if: {
                $ne: [
                  "$$item.v.referenceId",
                  undefined
                ]
              },
              then: [
                "$$item.k",
                [
                  "$$item.v.referenceId",
                  {
                    "$cond": [
                      {
                        "$ne": [
                          "$$item.v.referenceId.0",
                          undefined
                        ]
                      },
                      {
                        $size: "$$item.v.referenceId"
                      },
                      0
                    ]
                  }
                ]
              ],
              else: null
            }
          }
        }
      }
    }
  },
  // filter out potential null values from above
  {
    "$addFields": {
      "references": {
        $filter: {
          input: "$references",
          cond: {
            $ne: [
              "$$this",
              null
            ]
          }
        }
      }
    }
  },
  // remove the referenced objects from root. this needs to be
  // done because otherwise there will be a clash. when reducing
  // the data at the end
  {
    "$replaceRoot": {
      "newRoot": {
        "$mergeObjects": [
          {
            "references": "$references"
          },
          {
            "$arrayToObject": {
              $map: {
                input: {
                  $objectToArray: "$$ROOT"
                },
                as: "item",
                in: {
                  $cond: {
                    if: {
                      $eq: [
                        "$$item.v.referenceId",
                        undefined
                      ]
                    },
                    then: [
                      "$$item.k",
                      "$$item.v"
                    ],
                    else: [
                      "_",
                      "_"
                    ]
                  }
                }
              }
            }
          }
        ]
      }
    }
  },
  // prepare lookup via references array with k, v for static
  // key lookup via $references.v
  {
    "$addFields": {
      "references": {
        $map: {
          input: "$references",
          as: "item",
          in: {
            k: {
              "$arrayElemAt": [
                "$$item",
                0
              ]
            },
            v: {
              "$arrayElemAt": [
                "$$item",
                1
              ]
            }
          }
        }
      }
    }
  },
  {
    $unwind: {
      path: "$references",
      preserveNullAndEmptyArrays: true
    }
  },
  // perform lookup into field resolved
  {
    $lookup: {
      "from": "collection",
      "localField": "references.v.0",
      "foreignField": "id",
      "as": "resolved"
    }
  },
  {
    $unwind: {
      path: "$resolved",
      preserveNullAndEmptyArrays: true
    }
  },
  // prepare grouping via key and value
  {
    "$addFields": {
      "k": "$references.k",
      "v": "$resolved",
      type: {
        "$arrayElemAt": [
          "$references.v",
          1
        ]
      }
    }
  },
  // remove temp variables references and resolved
  {
    $project: {
      resolved: 0,
      references: 0,
      
    }
  },
  // group by key as id
  {
    $group: {
      _id: [
        "$k",
        "$_id"
      ],
      resolved: {
        $push: "$v"
      },
      root: {
        $first: "$$ROOT"
      }
    }
  },
  // create a ref field for grouping later
  // make 1-sized arrays into fields - this is not optimal,
  // since we don't know what it was before. optimal would be
  // storing the size from the input document
  {
    $addFields: {
      ref: {
        $cond: [
          {
            $eq: [
              "$root.k",
              undefined
            ]
          },
          {},
          {
            $arrayToObject: [
              [
                {
                  k: "$root.k",
                  v: {
                    $cond: [
                      {
                        $and: [
                          {
                            $eq: [
                              {
                                $size: "$resolved"
                              },
                              1
                            ]
                          },
                          {
                            $lt: [
                              "$root.type",
                              1
                            ]
                          }
                        ]
                      },
                      {
                        $arrayElemAt: [
                          "$resolved",
                          0
                        ]
                      },
                      "$resolved"
                    ]
                  }
                }
              ]
            ]
          }
        ]
      }
    }
  },
  // remove temp fields which were nested before
  // group by id, merge by ref, merge with root for reduce
  {
    $project: {
      "root.k": 0,
      "root.v": 0,
      "root._": 0,
      "root.type": 0
    }
  },
  // group by id, merge by ref, merge with root for reduce
  {
    $group: {
      _id: "$root.id",
      ref: {
        $push: {
          $mergeObjects: [
            "$root",
            "$ref"
          ]
        }
      },
      root: {
        $first: "$$ROOT"
      }
    }
  },
  // merge by ref, merge with root for reduce
  // reduce the ref array into final result
  {
    $project: {
      result: {
        $reduce: {
          input: "$ref",
          initialValue: {},
          in: {
            $mergeObjects: [
              "$$this",
              "$$value"
            ]
          }
        }
      }
    }
  },
  // replace root with result
  {
    $replaceRoot: {
      newRoot: "$result"
    }
  }
])