I have data structure like below,
{
"metadata": {
"id": "node1000",
"type": "node"
},
"timestamp": "2023-07-07T12:35:41.901Z",
"data": {
"attribute1": "value1",
"attribute2": ["v1","v2"],
....
}
}
metadata field is set as the time series’ meta field . but overtime, with over 18M documents , it’s very slow to find distinct matadata.id even with indexes.
db.ts.createIndex( { “metadata.type”:1 ,“metadata.id”: 1 } )
size of the index:
‘metadata.type_1_metadata.id_1’: 182472704
for a distinct query:
mydb> db.ts.explain("executionStats").distinct("metadata.id",{ "metadata.type": "node" })
{
explainVersion: '1',
stages: [
{
'$cursor': {
queryPlanner: {
namespace: 'rhdb.system.buckets.ts',
indexFilterSet: false,
parsedQuery: { 'meta.type': { '$eq': 'node' } },
queryHash: '11855857',
planCacheKey: '33F0AFDF',
maxIndexedOrSolutionsReached: false,
maxIndexedAndSolutionsReached: false,
maxScansToExplodeReached: false,
winningPlan: {
stage: 'FETCH',
inputStage: {
stage: 'IXSCAN',
keyPattern: { 'meta.type': 1, 'meta.id': 1 },
indexName: 'metadata.type_1_metadata.id_1',
isMultiKey: false,
multiKeyPaths: { 'meta.type': [], 'meta.id': [] },
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: {
'meta.type': [ '["node", "node"]' ],
'meta.id': [ '[MinKey, MaxKey]' ]
}
}
},
rejectedPlans: []
},
executionStats: {
executionSuccess: true,
nReturned: 4081225,
executionTimeMillis: 631401,
totalKeysExamined: 4081225,
totalDocsExamined: 4081225,
executionStages: {
stage: 'FETCH',
nReturned: 4081225,
executionTimeMillisEstimate: 565025,
works: 4081226,
advanced: 4081225,
needTime: 0,
needYield: 0,
saveState: 38756,
restoreState: 38756,
isEOF: 1,
docsExamined: 4081225,
alreadyHasObj: 0,
inputStage: {
stage: 'IXSCAN',
nReturned: 4081225,
executionTimeMillisEstimate: 7614,
works: 4081226,
advanced: 4081225,
needTime: 0,
needYield: 0,
saveState: 38756,
restoreState: 38756,
isEOF: 1,
keyPattern: { 'meta.type': 1, 'meta.id': 1 },
indexName: 'metadata.type_1_metadata.id_1',
isMultiKey: false,
multiKeyPaths: { 'meta.type': [], 'meta.id': [] },
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: {
'meta.type': [ '["node", "node"]' ],
'meta.id': [ '[MinKey, MaxKey]' ]
},
keysExamined: 4081225,
seeks: 1,
dupsTested: 0,
dupsDropped: 0
}
}
}
},
nReturned: Long("4081225"),
executionTimeMillisEstimate: Long("600669")
},
{
'$_internalUnpackBucket': {
include: [ 'metadata' ],
timeField: 'timestamp',
metaField: 'metadata',
bucketMaxSpanSeconds: 86400,
assumeNoMixedSchemaData: true
},
nReturned: Long("16537570"),
executionTimeMillisEstimate: Long("610630")
},
{
'$replaceRoot': {
newRoot: {
_internalUnwoundArray: {
'$_internalFindAllValuesAtPath': [ { '$const': 'metadata.id' } ]
}
}
},
nReturned: Long("16537570"),
executionTimeMillisEstimate: Long("624739")
},
{
'$unwind': {
path: '$_internalUnwoundArray',
preserveNullAndEmptyArrays: true
},
nReturned: Long("16537570"),
executionTimeMillisEstimate: Long("627371")
},
{
'$group': {
_id: { '$const': null },
distinct: { '$addToSet': '$_internalUnwoundArray' }
},
maxAccumulatorMemoryUsageBytes: { distinct: Long("2295937") },
totalOutputDataSizeBytes: Long("2296118"),
usedDisk: false,
spills: Long("0"),
nReturned: Long("1"),
executionTimeMillisEstimate: Long("631384")
}
],
serverInfo: {
host: 'mongodb-5b784c88b-sbwpd',
port: 27017,
version: '6.0.6',
gitVersion: '26b4851a412cc8b9b4a18cdb6cd0f9f642e06aa7'
},
serverParameters: {
internalQueryFacetBufferSizeBytes: 104857600,
internalQueryFacetMaxOutputDocSizeBytes: 104857600,
internalLookupStageIntermediateDocumentMaxSizeBytes: 104857600,
internalDocumentSourceGroupMaxMemoryBytes: 104857600,
internalQueryMaxBlockingSortMemoryUsageBytes: 104857600,
internalQueryProhibitBlockingMergeOnMongoS: 0,
internalQueryMaxAddToSetBytes: 104857600,
internalDocumentSourceSetWindowFieldsMaxMemoryBytes: 104857600
},
command: {
aggregate: 'system.buckets.ts',
pipeline: [
{
'$_internalUnpackBucket': {
timeField: 'timestamp',
metaField: 'metadata',
bucketMaxSpanSeconds: 86400,
assumeNoMixedSchemaData: true,
usesExtendedRange: false
}
},
{ '$match': { 'metadata.type': 'node' } },
{
'$replaceRoot': {
newRoot: {
_internalUnwoundArray: { '$_internalFindAllValuesAtPath': 'metadata.id' }
}
}
},
{
'$unwind': {
path: '$_internalUnwoundArray',
preserveNullAndEmptyArrays: true
}
},
{
'$group': {
_id: null,
distinct: { '$addToSet': '$_internalUnwoundArray' }
}
}
],
cursor: {},
collation: {}
},
ok: 1
}
I thought covered queries maybe better . but it looks like it still needs to unpack documents, not just used information from index. ( there will be further process to find the unique value out of mongodb)
mydb> db.ts.explain("executionStats").find({ "metadata.type": "node" }, { "metadata.id": 1, "_id": 0 })
{
explainVersion: '1',
stages: [
{
'$cursor': {
queryPlanner: {
namespace: 'rhdb.system.buckets.ts',
indexFilterSet: false,
parsedQuery: { 'meta.type': { '$eq': 'node' } },
queryHash: '11855857',
planCacheKey: '33F0AFDF',
maxIndexedOrSolutionsReached: false,
maxIndexedAndSolutionsReached: false,
maxScansToExplodeReached: false,
winningPlan: {
stage: 'FETCH',
inputStage: {
stage: 'IXSCAN',
keyPattern: { 'meta.type': 1, 'meta.id': 1 },
indexName: 'metadata.type_1_metadata.id_1',
isMultiKey: false,
multiKeyPaths: { 'meta.type': [], 'meta.id': [] },
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: {
'meta.type': [ '["node", "node"]' ],
'meta.id': [ '[MinKey, MaxKey]' ]
}
}
},
rejectedPlans: []
},
executionStats: {
executionSuccess: true,
nReturned: 4081225,
executionTimeMillis: 674615,
totalKeysExamined: 4081225,
totalDocsExamined: 4081225,
executionStages: {
stage: 'FETCH',
nReturned: 4081225,
executionTimeMillisEstimate: 616918,
works: 4081226,
advanced: 4081225,
needTime: 0,
needYield: 0,
saveState: 41221,
restoreState: 41221,
isEOF: 1,
docsExamined: 4081225,
alreadyHasObj: 0,
inputStage: {
stage: 'IXSCAN',
nReturned: 4081225,
executionTimeMillisEstimate: 8354,
works: 4081226,
advanced: 4081225,
needTime: 0,
needYield: 0,
saveState: 41221,
restoreState: 41221,
isEOF: 1,
keyPattern: { 'meta.type': 1, 'meta.id': 1 },
indexName: 'metadata.type_1_metadata.id_1',
isMultiKey: false,
multiKeyPaths: { 'meta.type': [], 'meta.id': [] },
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: {
'meta.type': [ '["node", "node"]' ],
'meta.id': [ '[MinKey, MaxKey]' ]
},
keysExamined: 4081225,
seeks: 1,
dupsTested: 0,
dupsDropped: 0
}
}
}
},
nReturned: Long("4081225"),
executionTimeMillisEstimate: Long("653606")
},
{
'$_internalUnpackBucket': {
include: [ 'metadata' ],
timeField: 'timestamp',
metaField: 'metadata',
bucketMaxSpanSeconds: 86400,
assumeNoMixedSchemaData: true
},
nReturned: Long("16537570"),
executionTimeMillisEstimate: Long("662804")
},
{
'$project': { metadata: { id: true }, _id: false },
nReturned: Long("16537570"),
executionTimeMillisEstimate: Long("669607")
}
],
serverInfo: {
host: 'mongodb-5b784c88b-sbwpd',
port: 27017,
version: '6.0.6',
gitVersion: '26b4851a412cc8b9b4a18cdb6cd0f9f642e06aa7'
},
serverParameters: {
internalQueryFacetBufferSizeBytes: 104857600,
internalQueryFacetMaxOutputDocSizeBytes: 104857600,
internalLookupStageIntermediateDocumentMaxSizeBytes: 104857600,
internalDocumentSourceGroupMaxMemoryBytes: 104857600,
internalQueryMaxBlockingSortMemoryUsageBytes: 104857600,
internalQueryProhibitBlockingMergeOnMongoS: 0,
internalQueryMaxAddToSetBytes: 104857600,
internalDocumentSourceSetWindowFieldsMaxMemoryBytes: 104857600
},
command: {
aggregate: 'system.buckets.ts',
pipeline: [
{
'$_internalUnpackBucket': {
timeField: 'timestamp',
metaField: 'metadata',
bucketMaxSpanSeconds: 86400,
assumeNoMixedSchemaData: true,
usesExtendedRange: false
}
},
{ '$match': { 'metadata.type': 'node' } },
{ '$project': { 'metadata.id': 1, _id: 0 } }
],
cursor: {},
collation: {}
},
ok: 1
}
Did I get the query right? Any advice is appreciated.