Hi,
I have a mongo v4.4 and there is one replicaset doing InitialSync and it keeps failing and resets the data directory when it reaches a specific collection with the below error:
{
"durationMillis" : 9186078,
"status" : "InitialSyncFailure: error cloning databases :: caused by :: HostUnreachable: Error cloning collection 'DB1.collection2' :: caused by :: network error while attempting to run command 'collStats' on host '10.10.0.52:27017' ",
"syncSource" : "10.10.0.52:27017",
"rollBackId" : 10,
"operationsRetried" : 1,
"totalTimeUnreachableMillis" : 0
}
and this is the rs.conf()
rs0:STARTUP2> rs.conf()
{
"_id" : "rs0",
"version" : 30,
"term" : 124,
"protocolVersion" : NumberLong(1),
"writeConcernMajorityJournalDefault" : true,
"members" : [
{
"_id" : 2,
"host" : "10.10.0.52:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 2,
"tags" : {
"serviceName" : "db-support",
"podName" : "db-support-rs0-2"
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 3,
"host" : "10.10.0.177:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 2,
"tags" : {
"podName" : "db-support-rs0-1",
"serviceName" : "db-support"
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 5,
"host" : "10.11.10.74:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 0,
"tags" : {
"external" : "true"
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 6,
"host" : "10.10.0.151:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 0,
"tags" : {
"podName" : "db-support-rs0-0",
"serviceName" : "db-support"
},
"slaveDelay" : NumberLong(0),
"votes" : 0
}
],
"settings" : {
"chainingAllowed" : true,
"heartbeatIntervalMillis" : 2000,
"heartbeatTimeoutSecs" : 100000,
"electionTimeoutMillis" : 10000,
"catchUpTimeoutMillis" : -1,
"catchUpTakeoverDelayMillis" : 30000,
"getLastErrorModes" : {
},
"getLastErrorDefaults" : {
"w" : 1,
"wtimeout" : 0
},
"replicaSetId" : ObjectId("635efd17a1573f6faf2f2161")
}
}
the collection always fails at this specific point
},
"DB1" : {
"collections" : 3,
"clonedCollections" : 0,
"start" : ISODate("2023-08-01T08:13:52.225Z"),
"DB1.collection1" : {
"documentsToCopy" : 406344735,
"documentsCopied" : 406344735,
"indexes" : 4,
"fetchedBatches" : 7321,
"bytesToCopy" : 120041237379,
"approxBytesCopied" : 119871696825,
"start" : ISODate("2023-08-01T08:13:52.259Z"),
"receivedBatches" : 7321
},
"DB1.collection2" : {
"documentsToCopy" : 0,
"documentsCopied" : 0,
"indexes" : 0,
"fetchedBatches" : 0,
"bytesToCopy" : 0,
"receivedBatches" : 0
},
and I can see some error on the sync source host as below ::
},
"s": "I",
"c": "CONNPOOL",
"id": 22572,
"ctx": "ShardRegistry",
"msg": "Dropping all pooled connections",
"attr": {
"hostAndPort": "10.11.10.75:27017",
"error": "ShutdownInProgress: Pool for 10.11.10.75:27017 has expired."
}
}
please not I don’t have any networking issues