Change hostwd emergency log to write to /dev/kmsg
The hostwd emergency logs was written to /dev/console, the change was to add the prefix "hoswd:" to the log message and write to /dev/kmsg. Test Plan: Pass: AIO-SX and AIO DX full deployment. Pass: kill pmond and wait for the emergency log to be written. Pass: check if the emergency log was written to /dev/kmsg. Pass: Verify logging for quorum report missing failure. Pass: Verify logging for quorum process failure. Pass: Verify emergency log crash dump logging to mesg and console logging for each of the 2 cases above with stressng overloading the server (CPU, FS and Memory); stress-ng --vm-bytes 4000000000 --vm-keep -m 30 -i 30 -c 30 Story: 2010533 Task: 47216 Co-authored-by: Eric MacDonald <eric.macdonald@windriver.com> Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com> Co-authored-by: Christopher Souza <Christopher.DeOliveiraSouza@windriver.com> Signed-off-by: Christopher Souza <Christopher.DeOliveiraSouza@windriver.com> Change-Id: I0da82f964dd096840259c4d0ed4e5f558debdf22
This commit is contained in:
parent
acbd301a1c
commit
56ab793bc5
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016 Wind River Systems, Inc.
|
* Copyright (c) 2015-2016,2023 Wind River Systems, Inc.
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
*
|
*
|
||||||
@ -14,31 +14,65 @@
|
|||||||
#include <linux/watchdog.h>
|
#include <linux/watchdog.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <unistd.h> /* for execve */
|
#include <unistd.h> /* for execve */
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
/**********************************************************
|
||||||
|
* Name: EMERG_PREFIX
|
||||||
|
*
|
||||||
|
* Description: Add a log priority bit mask to the emergency logs.
|
||||||
|
*
|
||||||
|
* Priority logging is set by prefixing a log written to /dev/kmsg
|
||||||
|
* with a <#>. See description and links below.
|
||||||
|
*
|
||||||
|
* Currently using 8 (bitfield)
|
||||||
|
*
|
||||||
|
* #define LOG_KERN (0<<3) // kernel messages
|
||||||
|
* #define LOG_USER (1<<3) // random user-level messages
|
||||||
|
*
|
||||||
|
* "According to syslog.h priorities/facilities are encoded into
|
||||||
|
* a single 32-bit quantity, where the bottom 3 bits are the
|
||||||
|
* priority (0-7) and the top 28 bits are the facility (0-big number).
|
||||||
|
* Both the priorities and the facilities map roughly one-to-one
|
||||||
|
* to strings in the syslogd(8) source code."
|
||||||
|
*
|
||||||
|
* Priorities:
|
||||||
|
* https://github.com/openbsd/src/blob/master/sys/sys/syslog.h#L44-L60
|
||||||
|
*
|
||||||
|
* Facilities
|
||||||
|
* https://github.com/openbsd/src/blob/master/sys/sys/syslog.h#L92-L104
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#define EMERG_PREFIX ((const char *)("<8>hostwd:"))
|
||||||
|
|
||||||
/* In addition to logging to wherever elog messages go,
|
/* In addition to logging to wherever elog messages go,
|
||||||
* this function does its best to log output to the console
|
* this function does its best to log output to the console
|
||||||
* (for the purpose of capturing data when the system is
|
* (for the purpose of capturing data when the system is
|
||||||
* about to go down)
|
* about to go down)
|
||||||
*
|
*
|
||||||
* The path we log to is defined in the config file, exepected to
|
* The path we log to is defined in the config file, expected
|
||||||
* be something like "/dev/console", "/dev/pts/0" or "/dev/ttyS0"
|
* to be something like /dev/console
|
||||||
|
* /dev/pts/0
|
||||||
|
* /dev/ttyS0
|
||||||
|
* /dev/kmsg
|
||||||
*/
|
*/
|
||||||
#define emergency_log(...) \
|
#define emergency_log(...) \
|
||||||
{ \
|
{ \
|
||||||
daemon_config_type *cfg = daemon_get_cfg_ptr (); \
|
daemon_config_type *cfg = daemon_get_cfg_ptr (); \
|
||||||
elog(__VA_ARGS__) \
|
elog(__VA_ARGS__); \
|
||||||
if (cfg->hostwd_console_path) { \
|
if (cfg->hostwd_console_path) { \
|
||||||
FILE* console = fopen(cfg->hostwd_console_path, "a"); \
|
int console = open(cfg->hostwd_console_path, O_RDWR); \
|
||||||
if (NULL != console) { \
|
if (console > 0) { \
|
||||||
fprintf(console, __VA_ARGS__); \
|
char message[MAX_MSG]; \
|
||||||
fclose (console); \
|
snprintf(message, MAX_MSG-1, __VA_ARGS__); \
|
||||||
|
write(console, message, strlen(message)); \
|
||||||
|
close (console); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
int hostw_service_command ( hostw_socket_type * hostw_socket );
|
int hostw_service_command ( hostw_socket_type * hostw_socket );
|
||||||
|
|
||||||
void fork_hostwd_logger ( void );
|
|
||||||
char my_hostname [MAX_HOST_NAME_SIZE+1];
|
char my_hostname [MAX_HOST_NAME_SIZE+1];
|
||||||
|
|
||||||
/* Push daemon state to log file */
|
/* Push daemon state to log file */
|
||||||
@ -130,11 +164,12 @@ void force_crashdump ( void )
|
|||||||
if (( daemon_get_cfg_ptr()->hostwd_kdump_on_stall == 0 ) ||
|
if (( daemon_get_cfg_ptr()->hostwd_kdump_on_stall == 0 ) ||
|
||||||
( ctrl_ptr->kdump_supported == false ))
|
( ctrl_ptr->kdump_supported == false ))
|
||||||
{
|
{
|
||||||
/* crash dump is disabled or not supported */
|
wlog ("Crash dump is disabled or not supported");
|
||||||
return ;
|
return ;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Go for the crash dump */
|
/* Go for the crash dump */
|
||||||
|
emergency_log ("%s forcing a crashdump", EMERG_PREFIX);
|
||||||
|
|
||||||
/* Enable all functions of sysrq */
|
/* Enable all functions of sysrq */
|
||||||
static char sysrq_enable_cmd = '1' ;
|
static char sysrq_enable_cmd = '1' ;
|
||||||
@ -320,12 +355,8 @@ void hostw_service ( void )
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* force a crash dump if that feature is enabled */
|
emergency_log( "%s *** Host watchdog not receiving messages "
|
||||||
force_crashdump();
|
"from PMON ***\n", EMERG_PREFIX);
|
||||||
|
|
||||||
emergency_log( "*** Host watchdog (hostwd) not receiving messages "
|
|
||||||
"from PMON ***\n");
|
|
||||||
|
|
||||||
hostw_log_and_reboot();
|
hostw_log_and_reboot();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -376,11 +407,7 @@ int hostw_service_command ( hostw_socket_type * hostw_socket)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
emergency_log( "*** PMON reports unrecoverable system - message '%s' ***\n", msg[0].buf);
|
emergency_log( "%s *** PMON reports unrecoverable system - %s ***\n", EMERG_PREFIX, msg[0].buf);
|
||||||
|
|
||||||
/* force a crash dump if that feature is enabled */
|
|
||||||
force_crashdump();
|
|
||||||
|
|
||||||
hostw_log_and_reboot();
|
hostw_log_and_reboot();
|
||||||
return FAIL;
|
return FAIL;
|
||||||
}
|
}
|
||||||
@ -406,16 +433,13 @@ void hostw_log_and_reboot()
|
|||||||
{
|
{
|
||||||
daemon_config_type* config = daemon_get_cfg_ptr ();
|
daemon_config_type* config = daemon_get_cfg_ptr ();
|
||||||
|
|
||||||
emergency_log ("*** Host Watchdog declaring system unhealthy ***\n");
|
emergency_log ("%s *** Host Watchdog declaring system unhealthy ***\n", EMERG_PREFIX);
|
||||||
|
|
||||||
/* Start the process to log as much data as possible */
|
/* force a crashdump if enabled */
|
||||||
|
force_crashdump();
|
||||||
/* NOTE: This function currently does not do anything so its commented
|
|
||||||
* out for now. Uncomment when actual value add logging is implemented.
|
|
||||||
fork_hostwd_logger (); */
|
|
||||||
|
|
||||||
if (config->hostwd_reboot_on_err) {
|
if (config->hostwd_reboot_on_err) {
|
||||||
emergency_log ("*** Initiating reboot ***\n");
|
emergency_log ("%s *** Initiating reboot ***\n", EMERG_PREFIX );
|
||||||
|
|
||||||
/* start the process that will perform an ungraceful reboot, if
|
/* start the process that will perform an ungraceful reboot, if
|
||||||
* the graceful reboot fails */
|
* the graceful reboot fails */
|
||||||
@ -425,32 +449,3 @@ void hostw_log_and_reboot()
|
|||||||
fork_graceful_reboot ( GRACEFUL_REBOOT_DELAY );
|
fork_graceful_reboot ( GRACEFUL_REBOOT_DELAY );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Initiate the thread which logs as much information about the system
|
|
||||||
* as possible.
|
|
||||||
*/
|
|
||||||
void fork_hostwd_logger ( void )
|
|
||||||
{
|
|
||||||
int parent = double_fork ();
|
|
||||||
if (0 > parent) /* problem forking */
|
|
||||||
{
|
|
||||||
elog ("failed to fork hostwd logging process\n");
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
else if (0 == parent) /* if we're the child */
|
|
||||||
{
|
|
||||||
sigset_t mask , mask_orig ;
|
|
||||||
|
|
||||||
setup_child(false); /* initialize the process group, etc */
|
|
||||||
ilog ("*** Host Watchdog Logging Thread ***\n");
|
|
||||||
|
|
||||||
sigemptyset (&mask);
|
|
||||||
sigaddset (&mask, SIGTERM );
|
|
||||||
sigprocmask (SIG_BLOCK, &mask, &mask_orig );
|
|
||||||
|
|
||||||
/* TODO - log data here */
|
|
||||||
exit (0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
@ -6,8 +6,7 @@ hostwd_kdump_on_stall = 1 ; issue a sysrq crash dump on quorum msg'ing stall
|
|||||||
; - missing pmond quorum reports
|
; - missing pmond quorum reports
|
||||||
hostwd_failure_threshold = 3 ; number of # missed messages before action taken
|
hostwd_failure_threshold = 3 ; number of # missed messages before action taken
|
||||||
hostwd_use_kern_wd = 1 ; use kernel /dev/watchdog as backup watchdog
|
hostwd_use_kern_wd = 1 ; use kernel /dev/watchdog as backup watchdog
|
||||||
hostwd_console_path = /dev/console ; console on which to log extreme events, like
|
hostwd_console_path = /dev/kmsg ; log extreme events to this device
|
||||||
; notification of reboot
|
|
||||||
|
|
||||||
[timeouts]
|
[timeouts]
|
||||||
kernwd_update_period = 300 ; timeout until kernel resets system due to dead
|
kernwd_update_period = 300 ; timeout until kernel resets system due to dead
|
||||||
|
Loading…
x
Reference in New Issue
Block a user