Thanks for responding. Regarding your questions:
What is your MongoDB version
How many databases & collections you have in the deployment?
- database : 310 database, with more than 3 million collection. Because for each user the system create a collection, leading to this huge number of collection. I’m working on a solution to reduce this number.
What’s the timeline of the incident, and have you tried rerunning the initial sync?
I have retried several times, still the same error.
What’s the output of rs.status(), rs.conf(), and rs.printReplicationInfo()
I ran those on the primary database, we currently only have two database in replica set, one this primary and the other is the one failing.
rs.status():
productiondb:PRIMARY> rs.status()
{
"set" : "productiondb",
"date" : ISODate("2022-07-21T08:28:09.793Z"),
"myState" : 1,
"term" : NumberLong(22),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1658392089, 1),
"t" : NumberLong(22)
},
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1658392089, 1),
"t" : NumberLong(22)
},
"appliedOpTime" : {
"ts" : Timestamp(1658392089, 38),
"t" : NumberLong(22)
},
"durableOpTime" : {
"ts" : Timestamp(1658392089, 1),
"t" : NumberLong(22)
}
},
"members" : [
{
"_id" : 0,
"name" : "DB1:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 893212,
"optime" : {
"ts" : Timestamp(1658392089, 38),
"t" : NumberLong(22)
},
"optimeDate" : ISODate("2022-07-21T08:28:09Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1657498982, 1),
"electionDate" : ISODate("2022-07-11T00:23:02Z"),
"configVersion" : 105936,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 1,
"name" : "DB2:27017",
"health" : 1,
"state" : 5,
"stateStr" : "STARTUP2",
"uptime" : 165731,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2022-07-21T08:28:09.043Z"),
"lastHeartbeatRecv" : ISODate("2022-07-21T08:28:08.783Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "DB1:27017",
"syncSourceHost" : "DB1:27017",
"syncSourceId" : 0,
"infoMessage" : "",
"configVersion" : 105936
}
],
"ok" : 1,
"operationTime" : Timestamp(1658392089, 38),
"$clusterTime" : {
"clusterTime" : Timestamp(1658392089, 39),
"signature" : {
"hash" : BinData(0,"mWGu9CvEYng72WLP+5MzZW859JU="),
"keyId" : NumberLong("7083073276435496961")
}
}
}
rs.conf()
productiondb:PRIMARY> rs.conf()
{
"_id" : "productiondb",
"version" : 105936,
"protocolVersion" : NumberLong(1),
"members" : [
{
"_id" : 0,
"host" : "DB1:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 2,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 1,
"host" : "DB2:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : true,
"priority" : 0,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 0
}
],
"settings" : {
"chainingAllowed" : true,
"heartbeatIntervalMillis" : 2000,
"heartbeatTimeoutSecs" : 10,
"electionTimeoutMillis" : 10000,
"catchUpTimeoutMillis" : -1,
"catchUpTakeoverDelayMillis" : 30000,
"getLastErrorModes" : {
},
"getLastErrorDefaults" : {
"w" : 1,
"wtimeout" : 0
},
"replicaSetId" : ObjectId("5c1088d3efa5a1bd60427c1f")
}
}
rs.printReplicationInfo()
productiondb:PRIMARY> rs.printReplicationInfo()
configured oplog size: 512000.00009155273MB
log length start to end: 17068secs (4.74hrs)
oplog first event time: Thu Jul 21 2022 12:52:10 GMT+0900 (JST)
oplog last event time: Thu Jul 21 2022 17:36:38 GMT+0900 (JST)
now: Thu Jul 21 2022 17:36:38 GMT+0900 (JST)