From 3c94b0e552ce7aeed29603a3476b7a7d07847f84 Mon Sep 17 00:00:00 2001 From: Eric MacDonald Date: Fri, 8 Mar 2024 15:55:33 +0000 Subject: [PATCH] Avoid creating non-volatile node locked file while in simplex mode It is possible to lock controller-0 on a DX system before controller-1 has been configured/enabled. Due to the following recent updates this can lead to SM disabling all controller services on that now locked controller-0 thereby preventing any subsequent controller-0 unlock attempts. https://review.opendev.org/c/starlingx/metal/+/907620 https://review.opendev.org/c/starlingx/ha/+/910227 This update modifies the mtce node locked flag file management so that the non-volatile node locked file (/etc/mtc/tmp/.node_locked) is only created on a locked host after controller-1 is installed, provisioned and configured. This prevents SM from shutting down if the administrator locks controller-0 before controller-1 is configured. Test Plan: PASS: Verify AIO DX Install. PASS: Verify Standard System Install. PASS: Verify Swact back and forth. PASS: Verify lock/unlock of controller-0 prior to controller-1 config PASS: Verify the non-volatile node locked flag file is not created while the /etc/platform/simplex file exists on the active controller. PASS: Verify lock and delete of controller-1 puts the system back into simplex mode where the non-volatile node locked flag file is once again not created if controller-0 is then unlocked. PASS: Verify an existing non-volatile node locked flag file is removed if present on a node that is locked without new persist option. PASS: Verify original reported issue is resolved for DX systems. Closes-Bug: 2051578 Change-Id: I40e9dd77aa3e5b0dc03dca3b1d3d73153d8816be Signed-off-by: Eric MacDonald --- mtce-common/src/common/nodeBase.h | 3 ++- mtce/src/maintenance/mtcCompMsg.cpp | 17 ++++++++++++----- mtce/src/maintenance/mtcCtrlMsg.cpp | 14 ++++++++++++++ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/mtce-common/src/common/nodeBase.h b/mtce-common/src/common/nodeBase.h index 8af83c3b..38a188d7 100755 --- a/mtce-common/src/common/nodeBase.h +++ b/mtce-common/src/common/nodeBase.h @@ -66,7 +66,7 @@ void daemon_exit ( void ); #define FAIL_BM_IPADDR (121*256) #define FAIL_BM_PASSWORD (122*256) - +#define MTC_PARM_LOCK_PERSIST_IDX (0) // node lock command #define MTC_PARM_UPTIME_IDX (0) #define MTC_PARM_HEALTH_IDX (1) #define MTC_PARM_FLAGS_IDX (2) @@ -110,6 +110,7 @@ void daemon_exit ( void ); #define SMGMT_DEGRADED_FILE ((const char *)"/var/run/.sm_degraded") #define SMGMT_UNHEALTHY_FILE ((const char *)"/var/run/.sm_node_unhealthy") #define UNLOCK_READY_FILE ((const char *)"/etc/platform/.unlock_ready") +#define STILL_SIMPLEX_FILE ((const char *)"/etc/platform/simplex") /** path to and module init file name */ #define MTCE_CONF_FILE ((const char *)"/etc/mtc.conf") diff --git a/mtce/src/maintenance/mtcCompMsg.cpp b/mtce/src/maintenance/mtcCompMsg.cpp index ba6049ba..446f1f53 100755 --- a/mtce/src/maintenance/mtcCompMsg.cpp +++ b/mtce/src/maintenance/mtcCompMsg.cpp @@ -237,13 +237,20 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) daemon_log ( NODE_LOCKED_FILE, ADMIN_LOCKED_STR); } - /* Preserve the node locked state in a non-volatile backup - * file that persists over reboot. - * Maintaining the legacy NODE_LOCKED_FILE as other sw looks at it. */ - if ( daemon_is_file_present ( NODE_LOCKED_FILE_BACKUP ) == false ) + /* Only create the non-volatile NODE_LOCKED_FILE_BACKUP file if the + * LOCK_PERSIST flag is present. */ + if ( msg.num && msg.parm[MTC_PARM_LOCK_PERSIST_IDX] ) { - daemon_log ( NODE_LOCKED_FILE_BACKUP, ADMIN_LOCKED_STR ); + /* Preserve the node locked state in a non-volatile backup + * file that persists over reboot. + * Maintaining the legacy NODE_LOCKED_FILE as other sw looks at it. */ + if ( daemon_is_file_present ( NODE_LOCKED_FILE_BACKUP ) == false ) + daemon_log ( NODE_LOCKED_FILE_BACKUP, ADMIN_LOCKED_STR ); } + /* Otherwise if we get a locked message without the LOCK_PERSIST flag + * then remove the non-volatile NODE_LOCKED_FILE_BACKUP file if exists */ + else if ( daemon_is_file_present ( NODE_LOCKED_FILE_BACKUP ) == true ) + daemon_remove_file ( NODE_LOCKED_FILE_BACKUP ); } else if ( msg.cmd == MTC_MSG_UNLOCKED ) { diff --git a/mtce/src/maintenance/mtcCtrlMsg.cpp b/mtce/src/maintenance/mtcCtrlMsg.cpp index 69fef347..1b1ccb5c 100755 --- a/mtce/src/maintenance/mtcCtrlMsg.cpp +++ b/mtce/src/maintenance/mtcCtrlMsg.cpp @@ -717,6 +717,20 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() ); mtc_cmd.cmd = cmd ; mtc_cmd.num = 0 ; + /* Only set the LOCK_PERSIST flag if the STILL_SIMPLEX_FILE + * is no longer present. + * + * The mtcClient will NOT create the non-volatile + * NODE_LOCKED_FILE_BACKUP file if the LOCK_PERSIST flag + * is missing. + * + * This way SM won't shutdown or prevent activating on a + * locked controller until the system is truely duplex. */ + if ( daemon_is_file_present ( STILL_SIMPLEX_FILE ) == false ) + { + mtc_cmd.num = 1 ; + mtc_cmd.parm[MTC_PARM_LOCK_PERSIST_IDX] = true ; + } rc = PASS ; break ; }