Hello.
I’m trying to do a lookup stage but its really slow when using a pipeline in the lookup stage compared to a regular one with localField/foreignField.
The structure below is simplified but have the structure with arrays.
Artist collection (100 artists)
{title: ‘the Beatles’, songs: [ObejctId(1), ObejctId(2), ObejctId(2)]}
….
Songs collection (1000 songs)
{_id: 1: title: ‘Help1’}
{_id: 1: title: ‘Come together’}
{_id: 1: title: ‘Something’}
…
I want to join all the song to the current artist by the objectID from the song array to the ObjectId of the song in the song collection.
This pipeline takes about 60ms with localField/foreignField.
{ $match: { songs: { $exists: true, $ne: [] } } },
{
$lookup: {
from: 'songs',
localField: 'songs',
foreignField: '_id',
as: 'songs',
},
},
{
$project: {
slug: 1,
title: 1,
songs: 1,
user: 1,
name: 1,
profileImage: 1,
songCount: {
$size: '$songs',
},
},
},
This pipeline takes about 2.5 seconds with the same data.
{ $match: { songs: { $exists: true, $ne: [] } } },
{
$lookup: {
from: 'songs',
let: {
songIds: '$songs',
},
pipeline: [
{
$match: {
$expr: {
$in: ['$_id', '$$songIds'],
},
},
},
],
as: 'songs',
},
},
{
$project: {
slug: 1,
title: 1,
songs: 1,
user: 1,
name: 1,
profileImage: 1,
songCount: {
$size: '$songs',
},
},
},
I’ve looked into indexes and created one on the artist collection on the songs array like this
db.artists.createIndex({songs:1});
(tried both asc/desc)
But it doesn’t seem to do any difference in the time.
When running the aggregation with .explain('executionStats')
with the above created index I get
"winningPlan" : {
"stage" : “IXSCAN",
......
}
everywhere on both queries. And If I drop the index I get
"winningPlan" : {
"stage" : "COLLSCAN",
......
}
everywhere on both queries.
But there is no difference on the execution time. Both takes about 2 seconds (pipeline) seconds respective 60ms (foreignField).
When index is added I get "totalKeysExamined" : 865,
on both queries and without index "totalKeysExamined" : 0.
When comparing the two explained result the only difference between the stats are executionTimeMillis and the query itself.
So are the indexes really used or am I doing something wrong or have I misunderstood indexes?
Mongo 4.2.8
Using Robo 3T
Full explain from pipeline with index ~2.5 seconds
/* 1 */
{
"stages" : [
{
"$cursor" : {
"query" : {
"songs" : {
"$exists" : true,
"$ne" : []
}
},
"fields" : {
"name" : 1,
"profileImage" : 1,
"slug" : 1,
"songs" : 1,
"title" : 1,
"user" : 1,
"_id" : 1
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "freshsound.artists",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"songs" : {
"$exists" : true
}
},
{
"songs" : {
"$not" : {
"$eq" : []
}
}
}
]
},
"queryHash" : "F267DA83",
"planCacheKey" : "F7BD81F7",
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"songs" : {
"$exists" : true
}
},
{
"songs" : {
"$not" : {
"$eq" : []
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"songs" : 1.0
},
"indexName" : "songs_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"songs" : [
"songs"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"songs" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : []
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 99,
"executionTimeMillis" : 2161,
"totalKeysExamined" : 865,
"totalDocsExamined" : 117,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"songs" : {
"$exists" : true
}
},
{
"songs" : {
"$not" : {
"$eq" : []
}
}
}
]
},
"nReturned" : 99,
"executionTimeMillisEstimate" : 0,
"works" : 866,
"advanced" : 99,
"needTime" : 766,
"needYield" : 0,
"saveState" : 7,
"restoreState" : 7,
"isEOF" : 1,
"docsExamined" : 117,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 117,
"executionTimeMillisEstimate" : 0,
"works" : 866,
"advanced" : 117,
"needTime" : 748,
"needYield" : 0,
"saveState" : 7,
"restoreState" : 7,
"isEOF" : 1,
"keyPattern" : {
"songs" : 1.0
},
"indexName" : "songs_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"songs" : [
"songs"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"songs" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 865,
"seeks" : 1,
"dupsTested" : 865,
"dupsDropped" : 748
}
}
}
}
},
{
"$lookup" : {
"from" : "songs",
"as" : "songs",
"let" : {
"songIds" : "$songs"
},
"pipeline" : [
{
"$match" : {
"$expr" : {
"$in" : [
"$_id",
"$$songIds"
]
}
}
}
]
}
},
{
"$project" : {
"_id" : true,
"title" : true,
"slug" : true,
"profileImage" : true,
"songs" : true,
"user" : true,
"name" : true,
"songCount" : {
"$size" : [
"$songs"
]
}
}
}
],
"serverInfo" : {
"host" : "MacBook-Pro.local",
"port" : 27017,
"version" : "4.2.8",
"gitVersion" : "43d25964249164d76d5e04dd6cf38f6111e21f5f"
},
"ok" : 1.0
}
Explain with foreignFiel/localField ~60ms
/* 1 */
{
"stages" : [
{
"$cursor" : {
"query" : {
"songs" : {
"$exists" : true,
"$ne" : []
}
},
"fields" : {
"name" : 1,
"profileImage" : 1,
"slug" : 1,
"songs" : 1,
"title" : 1,
"user" : 1,
"_id" : 1
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "freshsound.artists",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"songs" : {
"$exists" : true
}
},
{
"songs" : {
"$not" : {
"$eq" : []
}
}
}
]
},
"queryHash" : "F267DA83",
"planCacheKey" : "F7BD81F7",
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"songs" : {
"$exists" : true
}
},
{
"songs" : {
"$not" : {
"$eq" : []
}
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"songs" : 1.0
},
"indexName" : "songs_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"songs" : [
"songs"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"songs" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : []
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 99,
"executionTimeMillis" : 73,
"totalKeysExamined" : 865,
"totalDocsExamined" : 117,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"songs" : {
"$exists" : true
}
},
{
"songs" : {
"$not" : {
"$eq" : []
}
}
}
]
},
"nReturned" : 99,
"executionTimeMillisEstimate" : 0,
"works" : 866,
"advanced" : 99,
"needTime" : 766,
"needYield" : 0,
"saveState" : 7,
"restoreState" : 7,
"isEOF" : 1,
"docsExamined" : 117,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 117,
"executionTimeMillisEstimate" : 0,
"works" : 866,
"advanced" : 117,
"needTime" : 748,
"needYield" : 0,
"saveState" : 7,
"restoreState" : 7,
"isEOF" : 1,
"keyPattern" : {
"songs" : 1.0
},
"indexName" : "songs_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"songs" : [
"songs"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"songs" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 865,
"seeks" : 1,
"dupsTested" : 865,
"dupsDropped" : 748
}
}
}
}
},
{
"$lookup" : {
"from" : "songs",
"as" : "songs",
"localField" : "songs",
"foreignField" : "_id"
}
},
{
"$project" : {
"_id" : true,
"songs" : true,
"user" : true,
"name" : true,
"profileImage" : true,
"title" : true,
"slug" : true,
"songCount" : {
"$size" : [
"$songs"
]
}
}
},
{
"$project" : {
"_id" : true,
"songs" : true,
"user" : true,
"name" : true,
"profileImage" : true,
"title" : true,
"slug" : true,
"songCount" : {
"$size" : [
"$songs"
]
}
}
},
{
"$match" : {
"songCount" : {
"$not" : {
"$eq" : 0.0
}
}
}
}
],
"serverInfo" : {
"host" : "MacBook-Pro.local",
"port" : 27017,
"version" : "4.2.8",
"gitVersion" : "43d25964249164d76d5e04dd6cf38f6111e21f5f"
},
"ok" : 1.0
}
Thanks!