@Asya_Kamsky @steevej
This is my log with Java:
{"t":{"$date":"2021-09-07T10:03:17.484+00:00"},"s":"I", "c":"COMMAND", "id":51803, "ctx":"conn20","msg":"Slow query","attr":{"type":"command","ns":"bird.statement","command":{"explain":{"aggregate":"statement","pipeline":[{"$group":{"_id":{"invoice_id":"$invoice_id","upc1":"$upc","retailer_id":"$retailer_id"},"qty":{"$sum":"$qty"},"revenue":{"$sum":"$extd_price"},"date_sk":{"$first":"$date_sk"},"inventory_name":{"$first":"$inventory_name"},"format":{"$first":"$format"},"sub_studio":{"$first":"$sub_studio"},"street_date":{"$first":"$street_date"},"retailer_id":{"$first":"$retailer_id"},"retailer_name":{"$first":"$retailer_name"},"retailer_type":{"$first":"$cidm_retailer_type"}}},{"$group":{"_id":{"upc1":"$_id.upc1","retailer_id":"$_id.retailer_id"},"invoices":{"$push":{"invoice_id":"$_id.invoice_id","qty":"$qty","revenue":"$revenue"}},"upc":{"$first":"$_id.upc1"},"date_sk":{"$first":"$date_sk"},"inventory_name":{"$first":"$inventory_name"},"format":{"$first":"$format"},"sub_studio":{"$first":"$sub_studio"},"street_date":{"$first":"$street_date"},"retailer_id":{"$first":"$retailer_id"},"retailer_name":{"$first":"$retailer_name"},"retailer_type":{"$first":"$retailer_type"}}}],"cursor":{}},"verbosity":"allPlansExecution","$db":"bird","lsid":{"id":{"$uuid":"4571756a-cb1a-4c46-ba1e-a976e1666f30"}}},"planSummary":"COLLSCAN","numYields":33,"reslen":4898,"locks":{"ParallelBatchWriterMode":{"acquireCount":{"r":1}},"ReplicationStateTransition":{"acquireCount":{"w":1}},"Global":{"acquireCount":{"r":37}},"Database":{"acquireCount":{"r":1}},"Collection":{"acquireCount":{"r":1}},"Mutex":{"acquireCount":{"r":4}}},"storage":{"data":{"bytesRead":39848460,"timeReadingMicros":65875}},"remote":"103.242.190.99:63628","protocol":"op_msg","durationMillis":330}}
{"t":{"$date":"2021-09-07T10:03:17.935+00:00"},"s":"I", "c":"COMMAND", "id":51803, "ctx":"conn20","msg":"Slow query","attr":{"type":"command","ns":"bird.statement","command":{"aggregate":"statement","pipeline":[{"$group":{"_id":{"invoice_id":"$invoice_id","upc1":"$upc","retailer_id":"$retailer_id"},"qty":{"$sum":"$qty"},"revenue":{"$sum":"$extd_price"},"date_sk":{"$first":"$date_sk"},"inventory_name":{"$first":"$inventory_name"},"format":{"$first":"$format"},"sub_studio":{"$first":"$sub_studio"},"street_date":{"$first":"$street_date"},"retailer_id":{"$first":"$retailer_id"},"retailer_name":{"$first":"$retailer_name"},"retailer_type":{"$first":"$cidm_retailer_type"}}},{"$group":{"_id":{"upc1":"$_id.upc1","retailer_id":"$_id.retailer_id"},"invoices":{"$push":{"invoice_id":"$_id.invoice_id","qty":"$qty","revenue":"$revenue"}},"upc":{"$first":"$_id.upc1"},"date_sk":{"$first":"$date_sk"},"inventory_name":{"$first":"$inventory_name"},"format":{"$first":"$format"},"sub_studio":{"$first":"$sub_studio"},"street_date":{"$first":"$street_date"},"retailer_id":{"$first":"$retailer_id"},"retailer_name":{"$first":"$retailer_name"},"retailer_type":{"$first":"$retailer_type"}}}],"cursor":{"batchSize":5},"$db":"bird","lsid":{"id":{"$uuid":"4571756a-cb1a-4c46-ba1e-a976e1666f30"}}},"planSummary":"COLLSCAN","cursorid":2723224898395150748,"keysExamined":0,"docsExamined":33102,"numYields":33,"nreturned":5,"reslen":1990,"locks":{"Global":{"acquireCount":{"r":36}},"Mutex":{"acquireCount":{"r":3}}},"storage":{},"remote":"103.242.190.99:63628","protocol":"op_msg","durationMillis":182}}
{"t":{"$date":"2021-09-07T10:03:30.236+00:00"},"s":"I", "c":"STORAGE", "id":22430, "ctx":"Checkpointer","msg":"WiredTiger message","attr":{"message":"[1631009010:236699][445:0x7fc2bd773700], WT_SESSION.checkpoint: [WT_VERB_CHECKPOINT_PROGRESS] saving checkpoint snapshot min: 1177, snapshot max: 1177 snapshot count: 0, oldest timestamp: (0, 0) , meta checkpoint timestamp: (0, 0) base write gen: 1073601"}}
For my log in JS, there is no log as such. Only the connection and auth succeed log.
{"t":{"$date":"2021-09-07T11:15:45.430+00:00"},"s":"I", "c":"NETWORK", "id":51800, "ctx":"conn21","msg":"client metadata","attr":{"remote":"103.242.190.99:63439","client":"conn21","doc":{"driver":{"name":"nodejs","version":"3.6.5"},"os":{"type":"Windows_NT","name":"win32","architecture":"x64","version":"10.0.19043"},"platform":"'Node.js v14.16.0, LE (legacy)"}}}
{"t":{"$date":"2021-09-07T11:15:45.686+00:00"},"s":"I", "c":"ACCESS", "id":20250, "ctx":"conn21","msg":"Authentication succeeded","attr":{"mechanism":"SCRAM-SHA-256","speculative":true,"principalName":"jtvAdmin","authenticationDatabase":"admin","remote":"103.242.190.99:63439","extraInfo":{}}}
My code without using explain is pretty much the same and also takes the same amount of time:
public static List<BsonDocument> aggregateCollectionPhysicalFlattened(String collectionName, List<? extends Bson> pipeline, int batchSize) {
System.out.println("Aggregating " + collectionName);
try {
MongoDBConnector mongoDBConnector = MongoDBConnector.getInstance();
MongoDatabase db = mongoDBConnector.getDatabase(DATABASE_BIRD);
MongoCollection collection = db.getCollection(collectionName);
long startTime = System.currentTimeMillis();
AggregateIterable mongoIterable = collection.aggregate(pipeline);
long elapsedTime = System.currentTimeMillis() - startTime;
System.out.println("elapsedTime aggregate: " + elapsedTime);
startTime = System.currentTimeMillis();
List<BsonDocument> results = (List<BsonDocument>) mongoIterable.batchSize(batchSize).into(new ArrayList<>());
elapsedTime = System.currentTimeMillis() - startTime;
System.out.println("elapsedTime into: " + elapsedTime);
return results;
} catch(Exception e) {
e.printStackTrace();
}
return null;
}
My JS code is below. It uses GraphQL as the API framework.
export default {
Query: {
async PerpetualInventory(_, { batchSize, dateFrom, dateTo, studio }) {
let res = [];
const matchCond = {};
if(studio) matchCond.sub_studio = {$in: studio}
if(dateFrom || dateTo) {
matchCond.street_date = {};
if (dateFrom) matchCond.street_date["$gte"] = new Date(dateFrom);
if (dateTo) matchCond.street_date["$lt"] = new Date(dateTo);
};
try {
await client.connect();
const db = client.db(dbName);
// Get the collection
const col = db.collection(collName);
res = await col.aggregate([
{
$match: matchCond
},
{
$project: {
upc: 1,
inventory_name: 1,
street_date: 1,
format: 1,
sub_studio: 1,
us_base: 1,
us_srp: 1,
disc_num: 1,
modified_date: 1,
qoh: { $cond: [ {$and : [ { $gte: [ "$modified_date", dateFrom] }, { $lte: [ "$modified_date", dateTo] } ] }, "$qoh", 0 ] },
}
}
]).limit(batchSize || 500).toArray();
} catch(err) {
console.log(err.stack);
}
return res;
},
}
}
I am 100% sure it is the mongoIterable.into(new ArrayList<>())
line which causes the delay. I have timed every step of the process and it is here when the delay happens. If I don’t iterate over the cursor, the query is done in under a second.