A year late here @kevinadi but hopefully you are still around.
After spending a bit of time over the last few days learning more about which specific queries have been causing issues I can offer a bit more here and hopefully get some productive feedback.
The problem queries seem to be ones that use a text index and have certain words under 4 characters. For instance, this query performs fine:
db.rocketchat_message.find({ '$text': { '$search': 'type=test' }, t: { '$ne': 'rm' }, _hidden: { '$ne': true }, rid: 'LkgTmX2dCncp5Rxtcx2Hj2YYiyyK49zj9i' })
While this one is very slow:
db.rocketchat_message.find({ '$text': { '$search': 'type=www' }, t: { '$ne': 'rm' }, _hidden: { '$ne': true }, rid: 'LkgTmX2dCncp5Rxtcx2Hj2YYiyyK49zj9i' })
In fact, even searching “www” is very slow but “wwww” is not. Furthermore, despite having created a compound index and added rid
to the text index I found the order of the columns makes a big difference. When I created this, it would search text fields first (on 6 million records) and then filter by rid
which is silly because rid
only matches 257 of those:
db.rocketchat_message.createIndex( {"msg" : "text", "rid" : 1 } )
This led me to recreate the index as such:
db.rocketchat_message.createIndex( { "rid" : 1, "msg" : "text" } )
This is now much faster but obviously fails completely when rid
is not included. That makes the index useless for querying the entire table and since Mongodb only allows one text index per table I’m a bit stuck there.
At any rate, I’m wondering I can get some specific input into why my initial queries are performing so badly on shorter word lengths. Here is an explain that might help shed some light:
db.rocketchat_message.find({ '$text': { '$search': 'https://www.example.com' }, t: { '$ne': 'rm' }, _hidden: { '$ne': true }, rid: 'LkgTmX2dCncp5Rxtcx2Hj2YYiyyK49zj9i' }).explain('executionStats')
{
explainVersion: '1',
queryPlanner: {
namespace: 'rocketchat.rocketchat_message',
indexFilterSet: false,
parsedQuery: {
'$and': [
{ rid: { '$eq': 'LkgTmX2dCncp5Rxtcx2Hj2YYiyyK49zj9i' } },
{ _hidden: { '$not': { '$eq': true } } },
{ t: { '$not': { '$eq': 'rm' } } },
{
'$text': {
'$search': 'https://www.example.com',
'$language': 'english',
'$caseSensitive': false,
'$diacriticSensitive': false
}
}
]
},
maxIndexedOrSolutionsReached: false,
maxIndexedAndSolutionsReached: false,
maxScansToExplodeReached: false,
winningPlan: {
stage: 'FETCH',
filter: {
'$and': [
{ _hidden: { '$not': { '$eq': true } } },
{ t: { '$not': { '$eq': 'rm' } } }
]
},
inputStage: {
stage: 'TEXT_MATCH',
indexPrefix: {},
indexName: 'msg_text_rid_1',
parsedTextQuery: {
terms: [ 'com', 'exampl', 'https', 'www' ],
negatedTerms: [],
phrases: [],
negatedPhrases: []
},
textIndexVersion: 3,
inputStage: {
stage: 'FETCH',
inputStage: {
stage: 'OR',
filter: { rid: { '$eq': 'LkgTmX2dCncp5Rxtcx2Hj2YYiyyK49zj9i' } },
inputStages: [
{
stage: 'IXSCAN',
keyPattern: { _fts: 'text', _ftsx: 1, rid: 1 },
indexName: 'msg_text_rid_1',
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'backward',
indexBounds: {}
},
{
stage: 'IXSCAN',
keyPattern: { _fts: 'text', _ftsx: 1, rid: 1 },
indexName: 'msg_text_rid_1',
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'backward',
indexBounds: {}
},
{
stage: 'IXSCAN',
keyPattern: { _fts: 'text', _ftsx: 1, rid: 1 },
indexName: 'msg_text_rid_1',
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'backward',
indexBounds: {}
},
{
stage: 'IXSCAN',
keyPattern: { _fts: 'text', _ftsx: 1, rid: 1 },
indexName: 'msg_text_rid_1',
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'backward',
indexBounds: {}
}
]
}
}
}
},
rejectedPlans: []
},
executionStats: {
executionSuccess: true,
nReturned: 2,
executionTimeMillis: 47743,
totalKeysExamined: 13301404,
totalDocsExamined: 4,
executionStages: {
stage: 'FETCH',
filter: {
'$and': [
{ _hidden: { '$not': { '$eq': true } } },
{ t: { '$not': { '$eq': 'rm' } } }
]
},
nReturned: 2,
executionTimeMillisEstimate: 15933,
works: 13301408,
advanced: 2,
needTime: 13301405,
needYield: 0,
saveState: 13308,
restoreState: 13308,
isEOF: 1,
docsExamined: 2,
alreadyHasObj: 2,
inputStage: {
stage: 'TEXT_MATCH',
nReturned: 2,
executionTimeMillisEstimate: 15717,
works: 13301408,
advanced: 2,
needTime: 13301405,
needYield: 0,
saveState: 13308,
restoreState: 13308,
isEOF: 1,
indexPrefix: {},
indexName: 'msg_text_rid_1',
parsedTextQuery: {
terms: [ 'com', 'exampl', 'https', 'www' ],
negatedTerms: [],
phrases: [],
negatedPhrases: []
},
textIndexVersion: 3,
docsRejected: 0,
inputStage: {
stage: 'FETCH',
nReturned: 2,
executionTimeMillisEstimate: 15470,
works: 13301408,
advanced: 2,
needTime: 13301405,
needYield: 0,
saveState: 13308,
restoreState: 13308,
isEOF: 1,
docsExamined: 2,
alreadyHasObj: 0,
inputStage: {
stage: 'OR',
filter: { rid: { '$eq': 'LkgTmX2dCncp5Rxtcx2Hj2YYiyyK49zj9i' } },
nReturned: 2,
executionTimeMillisEstimate: 15309,
works: 13301408,
advanced: 2,
needTime: 13301405,
needYield: 0,
saveState: 13308,
restoreState: 13308,
isEOF: 1,
dupsTested: 13301404,
dupsDropped: 6824735,
inputStages: [
{
stage: 'IXSCAN',
nReturned: 351631,
executionTimeMillisEstimate: 214,
works: 351632,
advanced: 351631,
needTime: 0,
needYield: 0,
saveState: 13308,
restoreState: 13308,
isEOF: 1,
keyPattern: { _fts: 'text', _ftsx: 1, rid: 1 },
indexName: 'msg_text_rid_1',
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'backward',
indexBounds: {},
keysExamined: 351631,
seeks: 1,
dupsTested: 351631,
dupsDropped: 0
},
{
stage: 'IXSCAN',
nReturned: 271,
executionTimeMillisEstimate: 0,
works: 272,
advanced: 271,
needTime: 0,
needYield: 0,
saveState: 13308,
restoreState: 13308,
isEOF: 1,
keyPattern: { _fts: 'text', _ftsx: 1, rid: 1 },
indexName: 'msg_text_rid_1',
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'backward',
indexBounds: {},
keysExamined: 271,
seeks: 1,
dupsTested: 271,
dupsDropped: 0
},
{
stage: 'IXSCAN',
nReturned: 6476186,
executionTimeMillisEstimate: 5115,
works: 6476187,
advanced: 6476186,
needTime: 0,
needYield: 0,
saveState: 13308,
restoreState: 13308,
isEOF: 1,
keyPattern: { _fts: 'text', _ftsx: 1, rid: 1 },
indexName: 'msg_text_rid_1',
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'backward',
indexBounds: {},
keysExamined: 6476186,
seeks: 1,
dupsTested: 6476186,
dupsDropped: 0
},
{
stage: 'IXSCAN',
nReturned: 6473316,
executionTimeMillisEstimate: 4796,
works: 6473317,
advanced: 6473316,
needTime: 0,
needYield: 0,
saveState: 13308,
restoreState: 13308,
isEOF: 1,
keyPattern: { _fts: 'text', _ftsx: 1, rid: 1 },
indexName: 'msg_text_rid_1',
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'backward',
indexBounds: {},
keysExamined: 6473316,
seeks: 1,
dupsTested: 6473316,
dupsDropped: 0
}
]
}
}
}
}
},
command: {
find: 'rocketchat_message',
filter: {
'$text': { '$search': 'https://www.example.com' },
t: { '$ne': 'rm' },
_hidden: { '$ne': true },
rid: 'LkgTmX2dCncp5Rxtcx2Hj2YYiyyK49zj9i'
},
'$db': 'rocketchat'
},
serverInfo: {
host: 'chat',
port: 27017,
version: '5.0.15',
gitVersion: '935639beed3d0c19c2551c93854b831107c0b118'
},
serverParameters: {
internalQueryFacetBufferSizeBytes: 104857600,
internalQueryFacetMaxOutputDocSizeBytes: 104857600,
internalLookupStageIntermediateDocumentMaxSizeBytes: 104857600,
internalDocumentSourceGroupMaxMemoryBytes: 104857600,
internalQueryMaxBlockingSortMemoryUsageBytes: 104857600,
internalQueryProhibitBlockingMergeOnMongoS: 0,
internalQueryMaxAddToSetBytes: 104857600,
internalDocumentSourceSetWindowFieldsMaxMemoryBytes: 104857600
},
ok: 1,
'$clusterTime': {
clusterTime: Timestamp({ t: 1678285569, i: 1 }),
signature: {
hash: Binary(Buffer.from("e07e7a674340cdc236b59f232718bd4694bb0321", "hex"), 0),
keyId: Long("7176963756902055940")
}
},
operationTime: Timestamp({ t: 1678285569, i: 1 })
}