metal/mtce/src/maintenance/mtcNodeMsg.h
Eric MacDonald 14bb67789e Add pxeboot network mtcAlive messaging to Maintenance
The introduction of the new pxeboot network requires maintenance
verify and report on messaging failures over that network.

Towards that, this update introduces periodic mtcAlive messaging
between the mtcAgent and mtcClinet.

Test Plan:

PASS: Verify install and provision each system type with a mix
             of networking modes ; ethernet, bond and vlan
             - AIO SX, AIO DX, AIO DX plus
             - Standard System 2+1
             - Storage System 2+1+1
PASS: Verify feature with physical on management interface
PASS: Verify feature with vlan on management interface
PASS: Verify feature with bonded management interface
PASS: Verify feature with bonded vlans on management interface
PASS: Verify in bonded cases handling with 2, 1 or no slaves found
PASS: Verify mgmt-combined or separate cluster-host network
PASS: Verify mtcClient pxeboot interface address learning
             - for worker and storage nodes       ; dhcp leases file
             - for controller nodes before unlock ; dhcp leases file
             - for controller nodes after unlock  ; static from ifcfg
             - from controller within 10 seconds of process restart
PASS: Verify mtcAgent pxeboot interface address learning from
             dnsmasq.hosts file
PASS: Verify pxeboot mtcAlive initiation, handling, loss detection
             and recovery
PASS: Verify success and failure handling of all new pxeboot ip
             address learning functions ;
             - dhcp - all system node installs.
             - dnsmasq.hosts - active controller for all hosts.
             - interfaces.d - controller's mtcClient pxeboot address.
             - pxeboot req mtcAlive - mtcAgent mtcAlive request message.
PASS: Verify mtcClient pxeboot network 'mtcAlive request' and 'reboot'
             command handling for ethernet, vlan and bond configs.
PASS: Verify mtcAlive sequence number monitoring, out-of-sequence
             detection, handling and logging.
PASS: Verify pxeboot rx socket binding and non-blocking attribute
PASS: Verify mtcAgent handling stress soaking of sustained incoming
             500+ msgs/sec ; batch handling and logging.
PASS: Verify mtcAgent and mtcClient pxeboot tx and rx socket messaging,
             failure recovery handling and logging.
PASS: Verify pxeboot receiver is not setup on the oam interface on
             controller-0 first install until after initial config
             complete.

Regression:

PASS: Verify mtcAgent/mtcClient online and offline state management
PASS: Verify mtcAgent/mtcClient command handling
      - over management network
      - over cluster-host network
PASS: Verify mtcClient interface chain log for all iface types
      - bond    : vlan123 -> pxeboot0 (802.3ad 4) -> enp0s8 and enp0s9
      - vlan    : vlan123 -> enp0s8
      - ethernet: enp0s8
PASS: Verify mtcAgent/mtcClient handling and logging including debug
      logging for standard operations
      - node install and unlock
      - node lock and unlock
      - node reinstall, reboot, reset
PASS: Verify graceful recovery handling of heartbeat loss failure.
      - node reboot
      - management interface down
PASS: Verify systemcontroller and subcloud install with dc-libvirt
PASS: Verify no log flooding, coredumps, memory leaks

Story: 2010940
Task: 49541
Change-Id: Ibc87b85e3e0e07c3b8c40b5291bd3372506fbdfb
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
2024-03-28 15:28:27 +00:00

145 lines
5.7 KiB
C++
Executable File

#ifndef __INCLUDE_MTCNODEMSG_HH__
#define __INCLUDE_MTCNODEMSG_HH__
/*
* Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
*/
/**
* @file
* Wind River CGTS Platform Node Maintenance "Messaging"
*
*/
#include <sys/socket.h>
#include <sys/un.h>
#include <netinet/in.h>
using namespace std;
#include "nodeBase.h"
#include "nodeUtil.h" /* for ... msgSock_type */
#include "msgClass.h"
/*************************************************************************
* Common Service Messaging Stuff
*************************************************************************
+----------* +-----------*
* * --- agent_addr ---> * *
* mtcAgent * * mtcClient *
* * * *
* * <-- client_addr ---> * *
* * * *
+--> * * ---+ * *
| +----------* | +-----------*
| |
event config
| |
| +----------* | +-----------*
+--- * * <--+ * *
* hbsAgent * * hbsClient *
* * -- multicast req --> * *
* * * *
* * <-- hb pulse resp -- * *
* * * *
+----------* +-----------* */
#define SA struct sockaddr*
#define MTC_AGENT_RX_BUFF_SIZE (MAX_NODES*MAX_MSG)
#define MAX_RX_MSG_BATCH (50)
/** Maintenance messaging socket control structure */
typedef struct
{
/** These sockets define the maintenance system msging. */
/** UDP sockets used by the mtcAgent to transmit and receive
* maintenance commands to the client (compute) node and
* receive the compute node reply in the receive direction */
msgClassSock* mtc_agent_mgmt_tx_socket ; /**< tx to mtc client mgmnt */
msgClassSock* mtc_agent_mgmt_rx_socket ; /**< rx from mtc client mgmnt */
msgClassSock* mtc_agent_clstr_tx_socket ; /**< tx to mtc client clstr */
msgClassSock* mtc_agent_clstr_rx_socket ; /**< rx from mtc client clstr */
int mtc_agent_port ; /**< the agent rx port number */
int mtc_rx_mgmnt_port ; /**< the agent rx port number */
struct sockaddr_in agent_addr; /**< socket attributes struct */
int mtc_agent_mgmt_rx_socket_size ;
int mtc_agent_clstr_rx_socket_size ;
/** UDP sockets used by the mtcClient to receive maintenance
* commands from and transmit replies to the mtcAgent */
msgClassSock* mtc_client_mgmt_rx_socket ; /**< rx from controller mgmt */
msgClassSock* mtc_client_mgmt_tx_socket ; /**< tx to controller mgmnt */
msgClassSock* mtc_client_clstr_tx_socket_c0 ; /**< tx to controller-0 clstr */
msgClassSock* mtc_client_clstr_tx_socket_c1 ; /**< tx to controller-1 clstr */
msgClassSock* mtc_client_clstr_rx_socket ; /**< rx from controller clstr */
int mtc_mgmnt_cmd_port ; /**< mtc command port mgmnt */
int mtc_clstr_cmd_port ; /**< mtc command port clstr */
struct sockaddr_in mtc_cmd_addr ; /**< socket attributes mgmnt */
/***************************************************************/
/** Event Receive Interface - (UDP over 'lo') */
int mtc_event_rx_port ; /**< mtc event receive port */
msgClassSock* mtc_event_rx_sock ; /**< ... socket */
/** UDP Mtc to Hbs command port */
int mtc_to_hbs_port ; /**< hbs command port */
msgClassSock* mtc_to_hbs_sock ; /**< ... socket */
/** UDP Hardware Monitor Command Port */
int hwmon_cmd_port ; /**< ava event port */
msgClassSock* hwmon_cmd_sock ; /**< ... socket */
/** UDP Logger Port */
msgSock_type mtclogd ; /**< messaging into mtclogd */
/* For select dispatch */
struct timeval waitd ;
fd_set readfds;
/** IPV4 Pxeboot transmit and receive sockets and ports */
int pxeboot_tx_socket ;
int mtc_tx_pxeboot_port ;
int pxeboot_rx_socket ;
int mtc_rx_pxeboot_port ;
/** Active Monitor Socket */
int amon_socket ;
bool main_go_enabled_reply_ack ;
bool subf_go_enabled_reply_ack ;
int netlink_sock ; /* netlink socket */
int ioctl_sock ; /* general ioctl socket */
float msg_rate ;
} mtc_socket_type ;
mtc_socket_type * get_sockPtr ( void );
int send_mtc_msg ( mtc_socket_type * sock_ptr, int cmd, string who_i_am );
int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interface );
int recv_mtc_reply_noblock ( void );
int send_mtc_cmd ( string & hostname, int cmd, int interface , string json_dict="" );
int mtc_service_command ( mtc_socket_type * sock_ptr , int interface );
int mtc_set_availStatus ( string & hostname, mtc_nodeAvailStatus_enum status );
int mtce_send_event ( mtc_socket_type * sock_ptr, unsigned int cmd , const char * mtce_name_ptr );
int mtc_clstr_init ( mtc_socket_type * sock_ptr , char * iface );
string get_who_i_am ( void );
int send_mtcClient_cmd ( mtc_socket_type * sock_ptr, int cmd, string hostname, string address, int port);
#endif