MongoDB replica crashed

Hello! i have a Replica set which contains of 3 nodes

but my 3rd replica accidentally stop working and if i type systemctl start mongod it says that

systemctl start mongod
Job for mongod.service failed because a timeout was exceeded. See "systemctl status mongod.service" and "journalctl -xe" for details.

and here is mongod logs
{"t":{"$date":"2022-09-06T16:28:30.241+06:00"},"s":"I",  "c":"CONTROL",  "id":20698,   "ctx":"main","msg":"***** SERVER RESTARTED *****"}
{"t":{"$date":"2022-09-06T16:28:30.245+06:00"},"s":"I",  "c":"CONTROL",  "id":23285,   "ctx":"main","msg":"Automatically disabling TLS 1.0, to force-enable TLS 1.0 specify –sslDisabledProtocols 'none'"}
{"t":{"$date":"2022-09-06T16:28:30.271+06:00"},"s":"W",  "c":"ASIO",     "id":22601,   "ctx":"main","msg":"No TransportLayer configured during NetworkInterface startup"}
{"t":{"$date":"2022-09-06T16:28:30.271+06:00"},"s":"I",  "c":"NETWORK",  "id":4648601, "ctx":"main","msg":"Implicit TCP FastOpen unavailable. If TCP FastOpen is required, set tcpFastOpenServer, tcpFastOpenClient, and tcpFastOpenQueueSize."}
{"t":{"$date":"2022-09-06T16:28:30.271+06:00"},"s":"W",  "c":"ASIO",     "id":22601,   "ctx":"main","msg":"No TransportLayer configured during NetworkInterface startup"}
{"t":{"$date":"2022-09-06T16:28:30.271+06:00"},"s":"I",  "c":"STORAGE",  "id":4615611, "ctx":"initandlisten","msg":"MongoDB starting","attr":{"pid":7151,"port":27017,"dbPath":"/var/lib/mongo","architecture":"64-bit","host":"facetech-prod-mongo01-uv03.fortebank.com"}}
{"t":{"$date":"2022-09-06T16:28:30.271+06:00"},"s":"I",  "c":"CONTROL",  "id":23403,   "ctx":"initandlisten","msg":"Build Info","attr":{"buildInfo":{"version":"4.4.3","gitVersion":"913d6b62acfbb344dde1b116f4161360acd8fd13","openSSLVersion":"OpenSSL 1.0.1e-fips 11 Feb 2013","modules":[],"allocator":"tcmalloc","environment":{"distmod":"rhel70","distarch":"x86_64","target_arch":"x86_64"}}}}
{"t":{"$date":"2022-09-06T16:28:30.272+06:00"},"s":"I",  "c":"CONTROL",  "id":51765,   "ctx":"initandlisten","msg":"Operating System","attr":{"os":{"name":"CentOS Linux release 7.9.2009 (Core)","version":"Kernel 3.10.0-1160.42.2.el7.x86_64"}}}
{"t":{"$date":"2022-09-06T16:28:30.272+06:00"},"s":"I",  "c":"CONTROL",  "id":21951,   "ctx":"initandlisten","msg":"Options set by command line","attr":{"options":{"config":"/etc/mongod.conf","net":{"bindIp":"10.0.225.235","port":27017},"processManagement":{"fork":true,"pidFilePath":"/var/run/mongodb/mongod.pid","timeZoneInfo":"/usr/share/zoneinfo"},"replication":{"replSetName":"prod-facetech"},"storage":{"dbPath":"/var/lib/mongo","journal":{"enabled":true}},"systemLog":{"destination":"file","logAppend":true,"path":"/var/log/mongodb/mongod.log"}}}}
{"t":{"$date":"2022-09-06T16:28:30.273+06:00"},"s":"W",  "c":"STORAGE",  "id":22271,   "ctx":"initandlisten","msg":"Detected unclean shutdown - Lock file is not empty","attr":{"lockFile":"/var/lib/mongo/mongod.lock"}}
{"t":{"$date":"2022-09-06T16:28:30.273+06:00"},"s":"I",  "c":"STORAGE",  "id":22270,   "ctx":"initandlisten","msg":"Storage engine to use detected by data files","attr":{"dbpath":"/var/lib/mongo","storageEngine":"wiredTiger"}}
{"t":{"$date":"2022-09-06T16:28:30.273+06:00"},"s":"W",  "c":"STORAGE",  "id":22302,   "ctx":"initandlisten","msg":"Recovering data from the last clean checkpoint."}
{"t":{"$date":"2022-09-06T16:28:30.273+06:00"},"s":"I",  "c":"STORAGE",  "id":22315,   "ctx":"initandlisten","msg":"Opening WiredTiger","attr":{"config":"create,cache_size=3398M,session_max=33000,eviction=(threads_min=4,threads_max=4),config_base=false,statistics=(fast),log=(enabled=true,archive=true,path=journal,compressor=snappy),file_manager=(close_idle_time=100000,close_scan_interval=10,close_handle_minimum=250),statistics_log=(wait=0),verbose=[recovery_progress,checkpoint_progress,compact_progress],"}}
{"t":{"$date":"2022-09-06T16:28:30.879+06:00"},"s":"I",  "c":"STORAGE",  "id":22430,   "ctx":"initandlisten","msg":"WiredTiger message","attr":{"message":"[1662460110:879327][7151:0x7f63aed90bc0], txn-recover: [WT_VERB_RECOVERY_PROGRESS] Recovering log 37908 through 37909"}}
{"t":{"$date":"2022-09-06T16:28:30.975+06:00"},"s":"I",  "c":"STORAGE",  "id":22430,   "ctx":"initandlisten","msg":"WiredTiger message","attr":{"message":"[1662460110:975619][7151:0x7f63aed90bc0], txn-recover: [WT_VERB_RECOVERY_PROGRESS] Recovering log 37909 through 37909"}}
{"t":{"$date":"2022-09-06T16:28:31.108+06:00"},"s":"I",  "c":"STORAGE",  "id":22430,   "ctx":"initandlisten","msg":"WiredTiger message","attr":{"message":"[1662460111:108841][7151:0x7f63aed90bc0], txn-recover: [WT_VERB_RECOVERY | WT_VERB_RECOVERY_PROGRESS] Main recovery loop: starting at 37908/256 to 37909/256"}}
{"t":{"$date":"2022-09-06T16:28:31.241+06:00"},"s":"I",  "c":"STORAGE",  "id":22430,   "ctx":"initandlisten","msg":"WiredTiger message","attr":{"message":"[1662460111:241647][7151:0x7f63aed90bc0], txn-recover: [WT_VERB_RECOVERY_PROGRESS] Recovering log 37908 through 37909"}}
{"t":{"$date":"2022-09-06T16:28:31.331+06:00"},"s":"I",  "c":"STORAGE",  "id":22430,   "ctx":"initandlisten","msg":"WiredTiger message","attr":{"message":"[1662460111:331583][7151:0x7f63aed90bc0], txn-recover: [WT_VERB_RECOVERY_PROGRESS] Recovering log 37909 through 37909"}}
{"t":{"$date":"2022-09-06T16:28:31.395+06:00"},"s":"I",  "c":"STORAGE",  "id":22430,   "ctx":"initandlisten","msg":"WiredTiger message","attr":{"message":"[1662460111:395447][7151:0x7f63aed90bc0], txn-recover: [WT_VERB_RECOVERY | WT_VERB_RECOVERY_PROGRESS] Set global recovery timestamp: (1659204188, 1)"}}
{"t":{"$date":"2022-09-06T16:28:31.395+06:00"},"s":"I",  "c":"STORAGE",  "id":22430,   "ctx":"initandlisten","msg":"WiredTiger message","attr":{"message":"[1662460111:395512][7151:0x7f63aed90bc0], txn-recover: [WT_VERB_RECOVERY | WT_VERB_RECOVERY_PROGRESS] Set global oldest timestamp: (1659204183, 1)"}}
{"t":{"$date":"2022-09-06T16:28:32.098+06:00"},"s":"I",  "c":"STORAGE",  "id":4795906, "ctx":"initandlisten","msg":"WiredTiger opened","attr":{"durationMillis":1825}}
{"t":{"$date":"2022-09-06T16:28:32.098+06:00"},"s":"I",  "c":"RECOVERY", "id":23987,   "ctx":"initandlisten","msg":"WiredTiger recoveryTimestamp","attr":{"recoveryTimestamp":{"$timestamp":{"t":1659204188,"i":1}}}}
{"t":{"$date":"2022-09-06T16:28:32.099+06:00"},"s":"I",  "c":"STORAGE",  "id":4366408, "ctx":"initandlisten","msg":"No table logging settings modifications are required for existing WiredTiger tables","attr":{"loggingEnabled":false}}
{"t":{"$date":"2022-09-06T16:28:32.103+06:00"},"s":"I",  "c":"STORAGE",  "id":22383,   "ctx":"initandlisten","msg":"The size storer reports that the oplog contains","attr":{"numRecords":205531,"dataSize":53787115711}}
{"t":{"$date":"2022-09-06T16:28:32.103+06:00"},"s":"I",  "c":"STORAGE",  "id":22386,   "ctx":"initandlisten","msg":"Sampling the oplog to determine where to place markers for truncation"}
{"t":{"$date":"2022-09-06T16:28:32.107+06:00"},"s":"I",  "c":"STORAGE",  "id":22389,   "ctx":"initandlisten","msg":"Sampling from the oplog to determine where to place markers for truncation","attr":{"from":{"$timestamp":{"t":1658914933,"i":1}},"to":{"$timestamp":{"t":1659204198,"i":1}}}}
{"t":{"$date":"2022-09-06T16:28:32.107+06:00"},"s":"I",  "c":"STORAGE",  "id":22390,   "ctx":"initandlisten","msg":"Taking samples and assuming each oplog section contains","attr":{"numSamples":1001,"containsNumRecords":2052,"containsNumBytes":537004935}}
{"t":{"$date":"2022-09-06T16:28:42.159+06:00"},"s":"I",  "c":"STORAGE",  "id":22392,   "ctx":"initandlisten","msg":"Oplog sampling progress","attr":{"completed":53,"total":1001}}
{"t":{"$date":"2022-09-06T16:28:52.198+06:00"},"s":"I",  "c":"STORAGE",  "id":22392,   "ctx":"initandlisten","msg":"Oplog sampling progress","attr":{"completed":103,"total":1001}}
{"t":{"$date":"2022-09-06T16:29:02.199+06:00"},"s":"I",  "c":"STORAGE",  "id":22392,   "ctx":"initandlisten","msg":"Oplog sampling progress","attr":{"completed":157,"total":1001}}
{"t":{"$date":"2022-09-06T16:29:12.200+06:00"},"s":"I",  "c":"STORAGE",  "id":22392,   "ctx":"initandlisten","msg":"Oplog sampling progress","attr":{"completed":219,"total":1001}}
{"t":{"$date":"2022-09-06T16:29:22.450+06:00"},"s":"I",  "c":"STORAGE",  "id":22392,   "ctx":"initandlisten","msg":"Oplog sampling progress","attr":{"completed":277,"total":1001}}
{"t":{"$date":"2022-09-06T16:29:32.521+06:00"},"s":"I",  "c":"STORAGE",  "id":22392,   "ctx":"initandlisten","msg":"Oplog sampling progress","attr":{"completed":352,"total":1001}}
{"t":{"$date":"2022-09-06T16:29:42.660+06:00"},"s":"I",  "c":"STORAGE",  "id":22392,   "ctx":"initandlisten","msg":"Oplog sampling progress","attr":{"completed":422,"total":1001}}
{"t":{"$date":"2022-09-06T16:29:52.741+06:00"},"s":"I",  "c":"STORAGE",  "id":22392,   "ctx":"initandlisten","msg":"Oplog sampling progress","attr":{"completed":500,"total":1001}}
{"t":{"$date":"2022-09-06T16:30:00.241+06:00"},"s":"I",  "c":"CONTROL",  "id":23377,   "ctx":"SignalHandler","msg":"Received signal","attr":{"signal":15,"error":"Terminated"}}
{"t":{"$date":"2022-09-06T16:30:00.241+06:00"},"s":"I",  "c":"CONTROL",  "id":23378,   "ctx":"SignalHandler","msg":"Signal was sent by kill(2)","attr":{"pid":1,"uid":0}}
{"t":{"$date":"2022-09-06T16:30:00.245+06:00"},"s":"I",  "c":"CONTROL",  "id":23381,   "ctx":"SignalHandler","msg":"will terminate after current cmd ends"}
{"t":{"$date":"2022-09-06T16:30:00.261+06:00"},"s":"I",  "c":"REPL",     "id":4784900, "ctx":"SignalHandler","msg":"Stepping down the ReplicationCoordinator for shutdown","attr":{"waitTimeMillis":10000}}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"COMMAND",  "id":4784901, "ctx":"SignalHandler","msg":"Shutting down the MirrorMaestro"}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"SHARDING", "id":4784902, "ctx":"SignalHandler","msg":"Shutting down the WaitForMajorityService"}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"NETWORK",  "id":20562,   "ctx":"SignalHandler","msg":"Shutdown: going to close listening sockets"}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"NETWORK",  "id":4784905, "ctx":"SignalHandler","msg":"Shutting down the global connection pool"}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"STORAGE",  "id":4784906, "ctx":"SignalHandler","msg":"Shutting down the FlowControlTicketholder"}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"-",        "id":20520,   "ctx":"SignalHandler","msg":"Stopping further Flow Control ticket acquisitions."}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"REPL",     "id":4784907, "ctx":"SignalHandler","msg":"Shutting down the replica set node executor"}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"NETWORK",  "id":4784918, "ctx":"SignalHandler","msg":"Shutting down the ReplicaSetMonitor"}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"SHARDING", "id":4784921, "ctx":"SignalHandler","msg":"Shutting down the MigrationUtilExecutor"}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"CONTROL",  "id":4784925, "ctx":"SignalHandler","msg":"Shutting down free monitoring"}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"STORAGE",  "id":4784927, "ctx":"SignalHandler","msg":"Shutting down the HealthLog"}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"STORAGE",  "id":4784929, "ctx":"SignalHandler","msg":"Acquiring the global lock for shutdown"}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"-",        "id":4784931, "ctx":"SignalHandler","msg":"Dropping the scope cache for shutdown"}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"FTDC",     "id":4784926, "ctx":"SignalHandler","msg":"Shutting down full-time data capture"}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"CONTROL",  "id":20565,   "ctx":"SignalHandler","msg":"Now exiting"}
{"t":{"$date":"2022-09-06T16:30:00.305+06:00"},"s":"I",  "c":"CONTROL",  "id":23138,   "ctx":"SignalHandler","msg":"Shutting down","attr":{"exitCode":0}}

This does not look like a crash.

It looks like a normal terminal due to mongod receiving SIGTERM, either manually from shell using kill or automatically from some unknown process.

it’s strange bcoz no one kill it

Your first line indicate

Can you share what you have find when following what was advised?

2 Likes

solved, it kills by systemd because of long mongodb startup Increase startup timeout solve the issue

2 Likes

This topic was automatically closed 5 days after the last reply. New replies are no longer allowed.