So totally I have 7 replica members. 4 in SERVER1 and 3 in SERVER2. Due to some issue, SERVER1 has shutdown. Although 3 members still remain in SERVER2 (2S-1A) election is not taking place and I am not sure why.
This is the rs.status() in the secondary in SERVER2
{
"set" : "REPLICASETNAME",
"date" : ISODate("2021-01-18T06:32:58.873Z"),
"myState" : 2,
"term" : NumberLong(282),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"majorityVoteCount" : 4,
"writeMajorityCount" : 4,
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1610941034, 1),
"t" : NumberLong(282)
},
"lastCommittedWallTime" : ISODate("2021-01-18T03:37:14.660Z"),
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1610941034, 1),
"t" : NumberLong(282)
},
"readConcernMajorityWallTime" : ISODate("2021-01-18T03:37:14.660Z"),
"appliedOpTime" : {
"ts" : Timestamp(1610941034, 1),
"t" : NumberLong(282)
},
"durableOpTime" : {
"ts" : Timestamp(1610941034, 1),
"t" : NumberLong(282)
},
"lastAppliedWallTime" : ISODate("2021-01-18T03:37:14.660Z"),
"lastDurableWallTime" : ISODate("2021-01-18T03:37:14.660Z")
},
"lastStableRecoveryTimestamp" : Timestamp(1610941034, 1),
"lastStableCheckpointTimestamp" : Timestamp(1610941034, 1),
"members" : [
{
"_id" : 0,
"name" : "SERVER1:27017",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2021-01-18T06:32:57.494Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Couldn't get a connection within the time limit",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1
},
{
"_id" : 1,
"name" : "SERVER1:27018",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2021-01-18T06:32:57.493Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Couldn't get a connection within the time limit",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1
},
{
"_id" : 2,
"name" : "SERVER1:27019",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"lastHeartbeat" : ISODate("2021-01-18T06:32:57.494Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Couldn't get a connection within the time limit",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1
},
{
"_id" : 3,
"name" : "SERVER1:27020",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2021-01-18T06:32:57.493Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Couldn't get a connection within the time limit",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1
},
{
"_id" : 4,
"name" : "SERVER2:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 563,
"optime" : {
"ts" : Timestamp(1610941034, 1),
"t" : NumberLong(282)
},
"optimeDate" : ISODate("2021-01-18T03:37:14Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "could not find member to sync from",
"configVersion" : 80072,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 5,
"name" : "server2:27018",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 441,
"optime" : {
"ts" : Timestamp(1610941034, 1),
"t" : NumberLong(282)
},
"optimeDurable" : {
"ts" : Timestamp(1610941034, 1),
"t" : NumberLong(282)
},
"optimeDate" : ISODate("2021-01-18T03:37:14Z"),
"optimeDurableDate" : ISODate("2021-01-18T03:37:14Z"),
"lastHeartbeat" : ISODate("2021-01-18T06:32:58.568Z"),
"lastHeartbeatRecv" : ISODate("2021-01-18T06:32:58.653Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 80072
},
{
"_id" : 6,
"name" : "SERVER2:27020",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 441,
"lastHeartbeat" : ISODate("2021-01-18T06:32:58.546Z"),
"lastHeartbeatRecv" : ISODate("2021-01-18T06:32:58.087Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 80072
}
],
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1610941034, 1),
"signature" : {
"hash" : BinData(0,"JnS/QEMfZgZaDZezG44AVJ5yod4="),
"keyId" : NumberLong("6859706153517973505")
}
},
"operationTime" : Timestamp(1610941034, 1)
}
This is the rs.conf()
{
"_id" : "REPLICASETNAME",
"version" : 68,
"protocolVersion" : NumberLong(1),
"writeConcernMajorityJournalDefault" : true,
"members" : [
{
"_id" : 0,
"host" : "SERVER1:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 10,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 1,
"host" : "SERVER1:27018",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 5,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 2,
"host" : "SERVER1:27019",
"arbiterOnly" : true,
"buildIndexes" : true,
"hidden" : false,
"priority" : 0,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 3,
"host" : "SERVER1:27020",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 5,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 4,
"host" : "SERVER2:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 1,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 5,
"host" : "SERVER2:27018",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 1,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 6,
"host" : "SERVER2:27020",
"arbiterOnly" : true,
"buildIndexes" : true,
"hidden" : false,
"priority" : 0,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
}
],
"settings" : {
"chainingAllowed" : true,
"heartbeatIntervalMillis" : 2000,
"heartbeatTimeoutSecs" : 10,
"electionTimeoutMillis" : 10000,
"catchUpTimeoutMillis" : -1,
"catchUpTakeoverDelayMillis" : 30000,
"getLastErrorModes" : {
},
"getLastErrorDefaults" : {
"w" : 1,
"wtimeout" : 0
},
"replicaSetId" : ObjectId("5e450995dc745aa0d45e8d74")
}
}
The logs of both SERVER2 secondary 1 and 2 looks like this
2021-01-18T06:36:26.998+0000 I ELECTION [replexec-7] Not starting an election, since we are not electable due to: Not standing for election because I cannot see a majority (mask 0x1)
2021-01-18T06:36:36.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27020
2021-01-18T06:36:36.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27018
2021-01-18T06:36:36.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27017
2021-01-18T06:36:36.994+0000 I CONNPOOL [Replication] Connecting to SERVER1:27019
2021-01-18T06:36:37.192+0000 I ELECTION [replexec-7] Not starting an election, since we are not electable due to: Not standing for election because I cannot see a majority (mask 0x1)
2021-01-18T06:36:48.172+0000 I ELECTION [replexec-7] Not starting an election, since we are not electable due to: Not standing for election because I cannot see a majority (mask 0x1)
2021-01-18T06:36:56.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27018
2021-01-18T06:36:56.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27017
2021-01-18T06:36:56.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27020
2021-01-18T06:36:56.994+0000 I CONNPOOL [Replication] Connecting to SERVER1:27019
2021-01-18T06:36:59.118+0000 I ELECTION [replexec-6] Not starting an election, since we are not electable due to: Not standing for election because I cannot see a majority (mask 0x1)
2021-01-18T06:37:06.544+0000 I NETWORK [conn173] end connection 127.0.0.1:34860 (156 connections now open)
2021-01-18T06:37:09.613+0000 I ELECTION [replexec-6] Not starting an election, since we are not electable due to: Not standing for election because I cannot see a majority (mask 0x1)
2021-01-18T06:37:16.159+0000 I NETWORK [listener] connection accepted from 172.28.0.1:47278 #174 (157 connections now open)
2021-01-18T06:37:16.160+0000 I NETWORK [conn174] received client metadata from 172.28.0.1:47278 conn174: { application: { name: "MongoDB Shell" }, driver: { name: "MongoDB Internal Client", version: "4.4.0" }, os: { type: "Linux", name: "Ubuntu", architecture: "x86_64", version: "18.04" } }
2021-01-18T06:37:16.190+0000 I ACCESS [conn174] Successfully authenticated as principal vvgcaameig on admin from client 172.28.0.1:47278
2021-01-18T06:37:16.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27017
2021-01-18T06:37:16.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27020
2021-01-18T06:37:16.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27018
2021-01-18T06:37:16.994+0000 I CONNPOOL [Replication] Connecting to SERVER1:27019
2021-01-18T06:37:19.924+0000 I ELECTION [replexec-6] Not starting an election, since we are not electable due to: Not standing for election because I cannot see a majority (mask 0x1)
2021-01-18T06:37:24.291+0000 I NETWORK [conn174] end connection 172.28.0.1:47278 (156 connections now open)
2021-01-18T06:37:30.959+0000 I ELECTION [replexec-7] Not starting an election, since we are not electable due to: Not standing for election because I cannot see a majority (mask 0x1)
2021-01-18T06:37:36.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27020
2021-01-18T06:37:36.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27018
2021-01-18T06:37:36.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27017
2021-01-18T06:37:36.994+0000 I CONNPOOL [Replication] Connecting to SERVER1:27019
2021-01-18T06:37:38.720+0000 I NETWORK [listener] connection accepted from SERVER2IP:31560 #175 (157 connections now open)
2021-01-18T06:37:38.720+0000 I NETWORK [conn175] received client metadata from SERVER2IP:31560 conn175: { application: { name: "MongoDB Shell" }, driver: { name: "MongoDB Internal Client", version: "4.4.0" }, os: { type: "Linux", name: "Ubuntu", architecture: "x86_64", version: "18.04" } }
2021-01-18T06:37:38.757+0000 I ACCESS [conn175] Successfully authenticated as principal vvgcaameig on admin from client SERVER2IP:31560
2021-01-18T06:37:41.175+0000 I ELECTION [replexec-6] Not starting an election, since we are not electable due to: Not standing for election because I cannot see a majority (mask 0x1)
2021-01-18T06:37:51.868+0000 I ELECTION [replexec-7] Not starting an election, since we are not electable due to: Not standing for election because I cannot see a majority (mask 0x1)
2021-01-18T06:37:56.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27018
2021-01-18T06:37:56.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27017
2021-01-18T06:37:56.648+0000 I CONNPOOL [ReplicaSetMonitor-TaskExecutor] Connecting to SERVER1:27020
2021-01-18T06:37:56.994+0000 I CONNPOOL [Replication] Connecting to SERVER1:27019
{
"set" : "REPLICASETNAME",
"date" : ISODate("2021-01-18T06:32:58.873Z"),
"myState" : 2,
"term" : NumberLong(282),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"majorityVoteCount" : 4,
"writeMajorityCount" : 4,
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1610941034, 1),
"t" : NumberLong(282)
},
"lastCommittedWallTime" : ISODate("2021-01-18T03:37:14.660Z"),
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1610941034, 1),
"t" : NumberLong(282)
},
"readConcernMajorityWallTime" : ISODate("2021-01-18T03:37:14.660Z"),
"appliedOpTime" : {
"ts" : Timestamp(1610941034, 1),
"t" : NumberLong(282)
},
"durableOpTime" : {
"ts" : Timestamp(1610941034, 1),
"t" : NumberLong(282)
},
"lastAppliedWallTime" : ISODate("2021-01-18T03:37:14.660Z"),
"lastDurableWallTime" : ISODate("2021-01-18T03:37:14.660Z")
},
"lastStableRecoveryTimestamp" : Timestamp(1610941034, 1),
"lastStableCheckpointTimestamp" : Timestamp(1610941034, 1),
"members" : [
{
"_id" : 0,
"name" : "SERVER1:27017",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2021-01-18T06:32:57.494Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Couldn't get a connection within the time limit",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1
},
{
"_id" : 1,
"name" : "SERVER1:27018",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2021-01-18T06:32:57.493Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Couldn't get a connection within the time limit",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1
},
{
"_id" : 2,
"name" : "SERVER1:27019",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"lastHeartbeat" : ISODate("2021-01-18T06:32:57.494Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Couldn't get a connection within the time limit",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1
},
{
"_id" : 3,
"name" : "SERVER1:27020",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2021-01-18T06:32:57.493Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Couldn't get a connection within the time limit",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1
},
{
"_id" : 4,
"name" : "SERVER2:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 563,
"optime" : {
"ts" : Timestamp(1610941034, 1),
"t" : NumberLong(282)
},
"optimeDate" : ISODate("2021-01-18T03:37:14Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "could not find member to sync from",
"configVersion" : 80072,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 5,
"name" : "dev.instasafe.io:27018",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 441,
"optime" : {
"ts" : Timestamp(1610941034, 1),
"t" : NumberLong(282)
},
"optimeDurable" : {
"ts" : Timestamp(1610941034, 1),
"t" : NumberLong(282)
},
"optimeDate" : ISODate("2021-01-18T03:37:14Z"),
"optimeDurableDate" : ISODate("2021-01-18T03:37:14Z"),
"lastHeartbeat" : ISODate("2021-01-18T06:32:58.568Z"),
"lastHeartbeatRecv" : ISODate("2021-01-18T06:32:58.653Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 80072
},
{
"_id" : 6,
"name" : "dev.instasafe.io:27020",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 441,
"lastHeartbeat" : ISODate("2021-01-18T06:32:58.546Z"),
"lastHeartbeatRecv" : ISODate("2021-01-18T06:32:58.087Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 80072
}
],
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1610941034, 1),
"signature" : {
"hash" : BinData(0,"JnS/QEMfZgZaDZezG44AVJ5yod4="),
"keyId" : NumberLong("6859706153517973505")
}
},
"operationTime" : Timestamp(1610941034, 1)
}
As per the logs it keeps trying to connect to SERVER1 although it is down. Why not try SERVER2 members itself? There is 2 Secondaries and 1 Arbiter and it can easily do an election