Hi
I have small schema like this
{
"_id": ObjectId('65110ce0d7c21aaca1d1sf33'),
bookingId: "1234",
pgId: ObjectId('65110ce0d7c21aaca1d1db33'),
scheduledDate: ISODate("2023-09-16T05:30:00.000Z")
}
I have index for
a. pgId
My collection could have huge amount of documents.
- I want to search by pgId and sort subset of data based on scheduledDate
What is the best way to do this ?
I have tried two ways
a. SORT stage being used in RAM memory
find( { pgId: ObjectId(‘65110ce0d7c21aaca1d1db33’) } ).sort({ scheduledDate : -1 }).explain(“executionStats”)
Here is the output
db.bookingOrders.find( { pgId: ObjectId('65110ce0d7c21aaca1d1db33') } ).sort({ scheduledDate : -1 }).explain("executionStats")
{
explainVersion: '1',
queryPlanner: {
namespace: 'foody.bookingOrders',
indexFilterSet: false,
parsedQuery: { pgId: { '$eq': ObjectId("65110ce0d7c21aaca1d1db33") } },
queryHash: '0D151AAD',
planCacheKey: 'B4247D13',
maxIndexedOrSolutionsReached: false,
maxIndexedAndSolutionsReached: false,
maxScansToExplodeReached: false,
winningPlan: {
stage: 'SORT',
sortPattern: { scheduledDate: -1 },
memLimit: 104857600,
type: 'simple',
inputStage: {
stage: 'FETCH',
inputStage: {
stage: 'IXSCAN',
keyPattern: { pgId: 1 },
indexName: 'pgId_1',
isMultiKey: false,
multiKeyPaths: { pgId: [] },
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: {
pgId: [
"[ObjectId('65110ce0d7c21aaca1d1db33'), ObjectId('65110ce0d7c21aaca1d1db33')]"
]
}
}
}
},
rejectedPlans: []
},
executionStats: {
executionSuccess: true,
nReturned: 8910,
executionTimeMillis: 46,
totalKeysExamined: 8910,
totalDocsExamined: 8910,
executionStages: {
stage: 'SORT',
nReturned: 8910,
executionTimeMillisEstimate: 9,
works: 17822,
advanced: 8910,
needTime: 8911,
needYield: 0,
saveState: 17,
restoreState: 17,
isEOF: 1,
sortPattern: { scheduledDate: -1 },
memLimit: 104857600,
type: 'simple',
totalDataSizeSorted: 4535264,
usedDisk: false,
spills: 0,
inputStage: {
stage: 'FETCH',
nReturned: 8910,
executionTimeMillisEstimate: 9,
works: 8911,
advanced: 8910,
needTime: 0,
needYield: 0,
saveState: 17,
restoreState: 17,
isEOF: 1,
docsExamined: 8910,
alreadyHasObj: 0,
inputStage: {
stage: 'IXSCAN',
nReturned: 8910,
executionTimeMillisEstimate: 0,
works: 8911,
advanced: 8910,
needTime: 0,
needYield: 0,
saveState: 17,
restoreState: 17,
isEOF: 1,
keyPattern: { pgId: 1 },
indexName: 'pgId_1',
isMultiKey: false,
multiKeyPaths: { pgId: [] },
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: {
pgId: [
"[ObjectId('65110ce0d7c21aaca1d1db33'), ObjectId('65110ce0d7c21aaca1d1db33')]"
]
},
keysExamined: 8910,
seeks: 1,
dupsTested: 0,
dupsDropped: 0
}
}
}
},
b. No SORT operation in RAM by using sort on index field. In this case scheduledDate ( For this i created index for scheduledDate)
find( { pgId: ObjectId(‘65110ce0d7c21aaca1d1db33’) } ).sort({ scheduledDate : -1 }).explain(“executionStats”)
Here is the output
db.bookingOrders.find( { pgId: ObjectId('65110ce0d7c21aaca1d1db33') } ).sort({ scheduledDate : -1 }).explain("executionStats")
{
explainVersion: '1',
queryPlanner: {
namespace: 'foody.bookingOrders',
indexFilterSet: false,
parsedQuery: { pgId: { '$eq': ObjectId("65110ce0d7c21aaca1d1db33") } },
queryHash: '0D151AAD',
planCacheKey: 'B4247D13',
maxIndexedOrSolutionsReached: false,
maxIndexedAndSolutionsReached: false,
maxScansToExplodeReached: false,
winningPlan: {
stage: 'FETCH',
filter: { pgId: { '$eq': ObjectId("65110ce0d7c21aaca1d1db33") } },
inputStage: {
stage: 'IXSCAN',
keyPattern: { scheduledDate: -1 },
indexName: 'scheduledDate_-1',
isMultiKey: false,
multiKeyPaths: { scheduledDate: [] },
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: { scheduledDate: [ '[MaxKey, MinKey]' ] }
}
},
rejectedPlans: [
{
stage: 'SORT',
sortPattern: { scheduledDate: -1 },
memLimit: 104857600,
type: 'simple',
inputStage: {
stage: 'FETCH',
inputStage: {
stage: 'IXSCAN',
keyPattern: { pgId: 1 },
indexName: 'pgId_1',
isMultiKey: false,
multiKeyPaths: { pgId: [] },
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: {
pgId: [
"[ObjectId('65110ce0d7c21aaca1d1db33'), ObjectId('65110ce0d7c21aaca1d1db33')]"
]
}
}
}
}
]
},
executionStats: {
executionSuccess: true,
nReturned: 8910,
executionTimeMillis: 418,
totalKeysExamined: 80000,
totalDocsExamined: 80000,
executionStages: {
stage: 'FETCH',
filter: { pgId: { '$eq': ObjectId("65110ce0d7c21aaca1d1db33") } },
nReturned: 8910,
executionTimeMillisEstimate: 135,
works: 80001,
advanced: 8910,
needTime: 71090,
needYield: 0,
saveState: 80,
restoreState: 80,
isEOF: 1,
docsExamined: 80000,
alreadyHasObj: 0,
inputStage: {
stage: 'IXSCAN',
nReturned: 80000,
executionTimeMillisEstimate: 55,
works: 80001,
advanced: 80000,
needTime: 0,
needYield: 0,
saveState: 80,
restoreState: 80,
isEOF: 1,
keyPattern: { scheduledDate: -1 },
indexName: 'scheduledDate_-1',
isMultiKey: false,
multiKeyPaths: { scheduledDate: [] },
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: { scheduledDate: [ '[MaxKey, MinKey]' ] },
keysExamined: 80000,
seeks: 1,
dupsTested: 0,
dupsDropped: 0
}
}
},
command: {
find: 'bookingOrders',
filter: { pgId: ObjectId("65110ce0d7c21aaca1d1db33") },
sort: { scheduledDate: -1 },
'$db': 'foody'
},
serverInfo: {
host: 'DESKTOP-O4GVMQ6',
port: 27017,
version: '6.0.8',
gitVersion: '3d84c0dd4e5d99be0d69003652313e7eaf4cdd74'
},
serverParameters: {
internalQueryFacetBufferSizeBytes: 104857600,
internalQueryFacetMaxOutputDocSizeBytes: 104857600,
internalLookupStageIntermediateDocumentMaxSizeBytes: 104857600,
internalDocumentSourceGroupMaxMemoryBytes: 104857600,
internalQueryMaxBlockingSortMemoryUsageBytes: 104857600,
internalQueryProhibitBlockingMergeOnMongoS: 0,
internalQueryMaxAddToSetBytes: 104857600,
internalDocumentSourceSetWindowFieldsMaxMemoryBytes: 104857600
},
ok: 1
}
Even though approach “a” has given less time, in future as data grows i am afraid SORT being done in RAM is not that great idea.
Can someone explain why approach “b” is taking more time, can we not expect do sort only for find based result ? Meaning sort only subset of doc matching pgId ?
Please suggest