Merge "Make Hardware Monitor sensor list a thread local variable"
This commit is contained in:
commit
121425ce70
mtce-common/src
common
daemon
mtce/src
@ -28,6 +28,8 @@ using namespace std;
|
||||
#define BMC_PROTOCOL__REDFISH_STR ((const char *)("redfish"))
|
||||
#define BMC_PROTOCOL__IPMITOOL_STR ((const char *)("ipmitool"))
|
||||
#define BMC_PROTOCOL__REDFISHTOOL_STR ((const char *)("redfishtool"))
|
||||
#define WANT_DATED_IPMI_SENSOR_DATA_FILES ((const char *)("/var/run/bmc/ipmitool/want_dated_sensor_data_files"))
|
||||
#define WANT_DATED_REDFISH_SENSOR_DATA_FILES ((const char *)("/var/run/bmc/redfishtool/want_dated_sensor_data_files"))
|
||||
|
||||
/* learned graceful and immediate power control command strings */
|
||||
typedef struct
|
||||
|
@ -982,7 +982,7 @@ int httpUtil_api_request ( libEvent & event )
|
||||
}
|
||||
|
||||
httpUtil_api_request_done:
|
||||
|
||||
daemon_signal_hdlr ();
|
||||
httpUtil_free_conn ( event );
|
||||
httpUtil_free_base ( event );
|
||||
|
||||
@ -1085,7 +1085,7 @@ void httpUtil_log_event ( libEvent * event_ptr )
|
||||
}
|
||||
|
||||
snprintf (&rest_api_log_str[0], MAX_API_LOG_LEN-1,
|
||||
"%s [%5d] %s %s '%s' seq:%d -> Status : %d {execution time %ld.%06ld secs}\n",
|
||||
"%s [%5d] %s %s '%s' seq:%d -> Status : %d {execution time %ld.%ld secs}\n",
|
||||
pt(), getpid(),
|
||||
event_ptr->hostname.c_str(),
|
||||
event_ptr->service.c_str(),
|
||||
|
@ -2500,8 +2500,10 @@ static void redirect_stdout_stderr(const string& hostname,
|
||||
stdout_copy = -1;
|
||||
stderr_copy = -1;
|
||||
|
||||
// Open file with explicit permissions: rw-r--r-- (0644)
|
||||
int redirect_fd = open(output_filename.c_str(),
|
||||
O_CREAT | O_WRONLY | O_TRUNC);
|
||||
O_CREAT | O_WRONLY | O_TRUNC,
|
||||
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); // 0644
|
||||
if ( redirect_fd < 0 )
|
||||
{
|
||||
elog ("%s failed to open output filename: [%s] - error code = %d (%s)",
|
||||
|
@ -50,7 +50,7 @@ static unsigned int __thread_init_sig ;
|
||||
|
||||
static std::string threadStages_str[THREAD_STAGE__STAGES+1];
|
||||
|
||||
int threadUtil_init ( void (*handler)(int, siginfo_t*, void* ))
|
||||
int threadUtil_init ( void (*handler)(int, siginfo_t*, void* ), size_t stack_size )
|
||||
{
|
||||
/* preserve parent process timer handler */
|
||||
thread_timer_handler = handler ;
|
||||
@ -67,24 +67,23 @@ int threadUtil_init ( void (*handler)(int, siginfo_t*, void* ))
|
||||
/* setup to create a 'detached' threads */
|
||||
pthread_attr_init(&__attr);
|
||||
pthread_attr_setdetachstate(&__attr, PTHREAD_CREATE_DETACHED);
|
||||
threadUtil_setstack_size ();
|
||||
threadUtil_setstack_size ( stack_size );
|
||||
|
||||
__thread_init_sig = THREAD_INIT_SIG ;
|
||||
|
||||
return (PASS);
|
||||
}
|
||||
|
||||
#define MTCE_PTHREAD_MAX_STACK_SIZE (0x20000) /* 128K */
|
||||
void threadUtil_setstack_size ( void )
|
||||
void threadUtil_setstack_size ( size_t stack_size )
|
||||
{
|
||||
size_t stack_size_before = 0 ;
|
||||
size_t stack_size_after = 0 ;
|
||||
/* manage pthread stack size */
|
||||
if ( pthread_attr_getstacksize (&__attr,&stack_size_before) == PASS )
|
||||
{
|
||||
if ( stack_size_before > MTCE_PTHREAD_MAX_STACK_SIZE )
|
||||
if ( stack_size_before > stack_size )
|
||||
{
|
||||
if ( pthread_attr_setstacksize ( &__attr, MTCE_PTHREAD_MAX_STACK_SIZE ) == PASS )
|
||||
if ( pthread_attr_setstacksize ( &__attr, stack_size ) == PASS )
|
||||
{
|
||||
if ( pthread_attr_getstacksize (&__attr,&stack_size_after) == PASS )
|
||||
{
|
||||
|
@ -257,7 +257,7 @@ typedef struct
|
||||
|
||||
/* module init/fini */
|
||||
void threadUtil_fini ( void );
|
||||
int threadUtil_init ( void (*handler)(int, siginfo_t*, void* ));
|
||||
int threadUtil_init ( void (*handler)(int, siginfo_t*, void* ), size_t stack_size);
|
||||
|
||||
#define DEFAULT_SYSTEM_REQUEST_LATENCY_SECS (unsigned long long)(15)
|
||||
int threadUtil_bmcSystemCall (string hostname,
|
||||
@ -265,7 +265,7 @@ int threadUtil_bmcSystemCall (string hostname,
|
||||
string datafile,
|
||||
unsigned long long latency_threshold_secs);
|
||||
|
||||
void threadUtil_setstack_size ( void );
|
||||
void threadUtil_setstack_size ( size_t stack_size );
|
||||
|
||||
/* Onetime thread init setup */
|
||||
void thread_init ( thread_ctrl_type & ctrl,
|
||||
|
@ -62,7 +62,7 @@ void daemon_remove_pidfile ( void );
|
||||
void daemon_remove_file ( const char * filename );
|
||||
void daemon_rename_file ( const char * path, const char * old_filename, const char * new_filename );
|
||||
void daemon_make_dir ( const char * dir );
|
||||
|
||||
int daemon_copy_file (string hostname, const char *source );
|
||||
string daemon_read_file ( const char * filename );
|
||||
|
||||
void daemon_logfile_close ( void );
|
||||
|
@ -121,6 +121,69 @@ bool daemon_is_file_present ( const char * filename )
|
||||
return (false);
|
||||
}
|
||||
|
||||
static void get_current_date_as_string ( char *date_str, size_t size )
|
||||
{
|
||||
time_t now = time(NULL);
|
||||
struct tm *t = localtime(&now);
|
||||
|
||||
// Suffix Format: _YYYY-MM-DD_HH-MM-SS
|
||||
strftime(date_str, size, "%Y-%m-%d_%H-%M-%S", t);
|
||||
}
|
||||
|
||||
// 16KB buffer for efficient copying
|
||||
#define MAX_FILE_CONTENT_BUFFER_SIZE 0x4000 // 16 KBytes
|
||||
int daemon_copy_file ( string hostname, const char *source )
|
||||
{
|
||||
// Open source file in binary mode
|
||||
FILE *src = fopen (source, "rb");
|
||||
if (!src)
|
||||
{
|
||||
// Error path
|
||||
wlog ("%s unable to open source file: %s ; (%d:%m)", hostname.c_str(), source, errno);
|
||||
return FAIL ;
|
||||
}
|
||||
|
||||
// Generate the destination filename with date suffix
|
||||
|
||||
// Format: _YYYY-MM-DD_HH-MM-SS needs 21 chars with null termination
|
||||
char date_suffix[21];
|
||||
get_current_date_as_string (date_suffix, sizeof(date_suffix));
|
||||
|
||||
// Max hostname size is 256 plus extra for path and the rest
|
||||
// of the filename size with the dated suffix. 512 is ample.
|
||||
// Example /var/run/bmc/redfishtool/hwmond_controller-0_thermal_sensor_data
|
||||
char destination[512];
|
||||
|
||||
// Create the date suffixed destination filename
|
||||
snprintf(destination, sizeof(destination), "%s_%s", source, date_suffix);
|
||||
|
||||
// Open destination file in binary mode
|
||||
FILE *dest = fopen(destination, "wb");
|
||||
if (!dest)
|
||||
{
|
||||
// Error path
|
||||
wlog ("%s failed to open destination file '%s' for copy operation",
|
||||
hostname.c_str(), destination);
|
||||
|
||||
fclose (src);
|
||||
return FAIL ;
|
||||
}
|
||||
|
||||
char buffer [MAX_FILE_CONTENT_BUFFER_SIZE];
|
||||
size_t bytes_read;
|
||||
|
||||
// Read from source and write to destination in chunks
|
||||
while ((bytes_read = fread(buffer, 1, MAX_FILE_CONTENT_BUFFER_SIZE, src)) > 0)
|
||||
fwrite(buffer, 1, bytes_read, dest);
|
||||
|
||||
// Close files
|
||||
fclose (src);
|
||||
fclose (dest);
|
||||
|
||||
ilog ("%s file '%s' copied to '%s'\n", hostname.c_str(), source, destination);
|
||||
return PASS ;
|
||||
}
|
||||
|
||||
void daemon_healthcheck ( const char * sig )
|
||||
{
|
||||
FILE * hc_file_stream ;
|
||||
|
@ -49,8 +49,8 @@ using namespace std;
|
||||
#endif
|
||||
#define __AREA__ "mon"
|
||||
|
||||
#define MAX_HOST_SENSORS (512) // (100)
|
||||
#define MAX_HOST_GROUPS (20)
|
||||
#define MAX_HOST_SENSORS (256)
|
||||
#define MAX_HOST_GROUPS (10)
|
||||
#define MIN_SENSOR_GROUPS (4)
|
||||
#define HWMON_DEFAULT_LARGE_INTERVAL (MTC_MINS_15)
|
||||
#define HWMON_DEFAULT_AUDIT_INTERVAL (MTC_MINS_2)
|
||||
@ -58,6 +58,7 @@ using namespace std;
|
||||
#define DEGRADE_AUDIT_TRIGGER (2)
|
||||
#define MAX_SENSORS_NOT_FOUND (5)
|
||||
#define START_DEBOUCE_COUNT (1)
|
||||
#define HWMOND_STACK_SIZE (0x80000) // 512 KByes
|
||||
|
||||
// Power sensor data for Dell R740-emc-1 needs 45KiB
|
||||
// Thermal sensor readout on wolfpass requires 20KiB
|
||||
|
@ -82,9 +82,9 @@ void sensor_data_init ( sensor_data_type & data )
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
void sensor_data_print ( const sensor_data_type & data )
|
||||
void sensor_data_print ( string & hostname, const sensor_data_type & data )
|
||||
{
|
||||
blog3 ("%s is %s : %s (%s) %s %s %s %s %s %s %s\n",
|
||||
blog3 ("%s %s is %s : %s (%s) %s %s %s %s %s %s %s\n", hostname.c_str(),
|
||||
data.name.c_str(),
|
||||
data.status.c_str(),
|
||||
data.value.c_str(),
|
||||
@ -107,7 +107,7 @@ void sensor_data_print ( const sensor_data_type & data )
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
int bmc_load_json_sensor ( sensor_data_type & sensor_data , string json_sensor_data )
|
||||
int bmc_load_json_sensor ( string & hostname, sensor_data_type & sensor_data , string json_sensor_data )
|
||||
{
|
||||
int rc = FAIL_KEY_VALUE_PARSE ;
|
||||
// ilog ("sensor data:%s\n", json_sensor_data.c_str() );
|
||||
@ -126,7 +126,7 @@ int bmc_load_json_sensor ( sensor_data_type & sensor_data , string json_sensor_d
|
||||
sensor_data.ucr = jsonUtil_get_key_value_string ( raw_obj, "ucr" ) ;
|
||||
sensor_data.unc = jsonUtil_get_key_value_string ( raw_obj, "unc" ) ;
|
||||
|
||||
sensor_data_print ( sensor_data );
|
||||
sensor_data_print ( hostname, sensor_data );
|
||||
|
||||
json_object_put(raw_obj);
|
||||
rc = PASS ;
|
||||
@ -310,7 +310,7 @@ int hwmonHostClass::bmc_load_sensor_samples ( struct hwmonHostClass::hwmon_host
|
||||
rc = jsonUtil_get_array_idx ( msg_ptr, BMC_JSON__SENSORS_LABEL, index, sensor_data ) ;
|
||||
if ( rc == PASS )
|
||||
{
|
||||
if ( bmc_load_json_sensor ( host_ptr->sample[host_ptr->samples], sensor_data ) == PASS )
|
||||
if ( bmc_load_json_sensor ( host_ptr->hostname , host_ptr->sample[host_ptr->samples], sensor_data ) == PASS )
|
||||
{
|
||||
bool found = false ;
|
||||
|
||||
@ -635,7 +635,7 @@ int hwmonHostClass::bmc_update_sensors ( struct hwmonHostClass::hwmon_host * hos
|
||||
host_ptr->sensor[i].sensorname.c_str(),
|
||||
bmc_status);
|
||||
|
||||
sensor_data_print (host_ptr->sample[j]);
|
||||
sensor_data_print ( host_ptr->hostname, host_ptr->sample[j]);
|
||||
blog3 ("%s ... %s\n", host_ptr->hostname.c_str(), host_ptr->bmc_thread_info.data.c_str());
|
||||
|
||||
host_ptr->sensor[i].sample_severity = HWMON_SEVERITY_MINOR ;
|
||||
@ -698,7 +698,7 @@ int hwmonHostClass::bmc_update_sensors ( struct hwmonHostClass::hwmon_host * hos
|
||||
host_ptr->sensor[i].sensorname.c_str(),
|
||||
bmc_status);
|
||||
|
||||
sensor_data_print (host_ptr->sample[j]);
|
||||
sensor_data_print ( host_ptr->hostname, host_ptr->sample[j]);
|
||||
blog3 ("%s ... %s\n", host_ptr->hostname.c_str(), host_ptr->bmc_thread_info.data.c_str());
|
||||
|
||||
host_ptr->sensor[i].sample_severity = HWMON_SEVERITY_MINOR ;
|
||||
|
@ -41,9 +41,9 @@
|
||||
#define MAX_IPMITOOL_PARSE_ERRORS (20)
|
||||
|
||||
void sensor_data_init ( sensor_data_type & data );
|
||||
void sensor_data_print ( const sensor_data_type & data );
|
||||
void sensor_data_print ( string & hostname, const sensor_data_type & data );
|
||||
void sensor_data_copy ( sensor_data_type & from, sensor_data_type & to );
|
||||
|
||||
int bmc_load_json_sensor ( sensor_data_type & sensor_data , string json_sensor_data );
|
||||
int bmc_load_json_sensor ( string & hostname, sensor_data_type & sensor_data , string json_sensor_data );
|
||||
|
||||
#endif
|
||||
|
@ -272,8 +272,6 @@ int daemon_init ( string iface, string nodetype )
|
||||
|
||||
obj_ptr->system_type = daemon_system_type ();
|
||||
|
||||
threadUtil_init ( hwmonTimer_handler ) ;
|
||||
|
||||
/* Bind signal handlers */
|
||||
if ( daemon_signal_init () != PASS )
|
||||
{
|
||||
@ -295,7 +293,7 @@ int daemon_init ( string iface, string nodetype )
|
||||
rc = FAIL_SOCKET_INIT ;
|
||||
}
|
||||
|
||||
threadUtil_init ( hwmonTimer_handler ) ;
|
||||
threadUtil_init ( hwmonTimer_handler, HWMOND_STACK_SIZE ) ;
|
||||
|
||||
/* override the config reload for the startup case */
|
||||
obj_ptr->config_reload = false ;
|
||||
|
@ -38,17 +38,55 @@ using namespace std;
|
||||
#include "hwmonClass.h" /* for ... thread_extra_info_type */
|
||||
#include "nodeUtil.h" /* for ... fork_execv */
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* Name : bmc_sample_type
|
||||
*
|
||||
* Description: An array of sensor data.
|
||||
*
|
||||
* _sample_list
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
static bmc_sample_type _sample_list[MAX_HOST_SENSORS] ;
|
||||
/* One instance per thread. Uses the memory allocated for the stack.
|
||||
*
|
||||
* Although thread_local variables are not on the stack, they still
|
||||
* consume memory that’s tied to the thread’s overall resources,
|
||||
* and that memory often comes from the same per-thread allocation
|
||||
* that includes the stack ; refer to TLS (Thread-Local Storage).
|
||||
* The TLS area is often allocated adjacent to or within the thread's
|
||||
* stack mapping. A large thread_local variable increases the TLS
|
||||
* memory requirement, and if it exceeds the reserved space or
|
||||
* overlaps with the stack space, the OS may fail to allocate the
|
||||
* thread with a errno "Resource temporarily unavailable".
|
||||
* This allocation required the per thread stack to be increased. */
|
||||
thread_local bmc_sample_type _sample_list[MAX_HOST_SENSORS];
|
||||
|
||||
// #define WANT_SAMPLE_LIST_DEBUG
|
||||
#ifdef WANT_SAMPLE_LIST_DEBUG
|
||||
void print_sample_list ( string & hostname )
|
||||
{
|
||||
bool empty = false ;
|
||||
for ( int i = 0 ; i < MAX_HOST_SENSORS ; i++)
|
||||
{
|
||||
if ( strlen ( _sample_list[i].name ) != 0 )
|
||||
{
|
||||
if ( empty )
|
||||
{
|
||||
slog ("%s has sparse sensor list ; gap at %d", hostname.c_str(), i);
|
||||
empty = false ;
|
||||
}
|
||||
ilog ("%s Sample %d: %s - %s - %s - %s ... %s - %s - %s - %s - %s - %s",
|
||||
hostname.c_str(), i,
|
||||
_sample_list[i].name,
|
||||
_sample_list[i].value,
|
||||
_sample_list[i].unit,
|
||||
_sample_list[i].status,
|
||||
_sample_list[i].lnr,
|
||||
_sample_list[i].lcr,
|
||||
_sample_list[i].lnc,
|
||||
_sample_list[i].unc,
|
||||
_sample_list[i].ucr,
|
||||
_sample_list[i].unr);
|
||||
}
|
||||
else
|
||||
{
|
||||
empty = true ;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // WANT_SAMPLE_LIST_DEBUG
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
@ -164,6 +202,11 @@ static void _parse_sensor_data ( thread_info_type * info_ptr )
|
||||
info_ptr->data.append (",\"");
|
||||
info_ptr->data.append (BMC_JSON__SENSORS_LABEL);
|
||||
info_ptr->data.append ("\":[");
|
||||
|
||||
#ifdef WANT_SAMPLE_LIST_DEBUG
|
||||
print_sample_list ( info_ptr->hostname );
|
||||
#endif // WANT_SAMPLE_LIST_DEBUG
|
||||
|
||||
for ( int i = 0 ; i < samples ; )
|
||||
{
|
||||
_add_json_sensor_tuple ( &_sample_list[i], info_ptr->data ) ;
|
||||
@ -331,6 +374,7 @@ void * hwmonThread_ipmitool ( void * arg )
|
||||
|
||||
/* the number of sensors are learned */
|
||||
extra_ptr->samples = samples = 0 ;
|
||||
MEMSET_ZERO (_sample_list);
|
||||
switch ( info_ptr->command )
|
||||
{
|
||||
case BMC_THREAD_CMD__POWER_STATUS:
|
||||
@ -542,7 +586,17 @@ void * hwmonThread_ipmitool ( void * arg )
|
||||
|
||||
unlink(info_ptr->password_file.data());
|
||||
daemon_remove_file (info_ptr->password_file.data());
|
||||
// info_ptr->password_file.clear();
|
||||
|
||||
/* Debug Option - enable lane debug_bmgt3 = 8 and touch
|
||||
* /var/run/bmc/ipmitool/want_dated_sensor_data_files for ipmi
|
||||
* or
|
||||
* /var/run/bmc/redfishtool/want_dated_sensor_data_files for redfish
|
||||
*
|
||||
* ... to save ther current sensor read file with a dated extension
|
||||
* so that a read history is maintained for debug purposes. */
|
||||
if(daemon_get_cfg_ptr()->debug_bmgmt&8)
|
||||
if ( daemon_is_file_present (WANT_DATED_IPMI_SENSOR_DATA_FILES))
|
||||
daemon_copy_file(info_ptr->hostname, sensor_datafile.data());
|
||||
|
||||
/* check for system call error case */
|
||||
if ( rc != PASS )
|
||||
@ -1227,6 +1281,17 @@ static int _parse_redfish_sensor_data_output_file( thread_info_type * info_ptr,
|
||||
fread(buffer,(st.st_size + 2), 1, _fp);
|
||||
fclose(_fp);
|
||||
|
||||
/* Debug Option - enable lane debug_bmgt3 = 8 and touch
|
||||
* /var/run/bmc/ipmitool/want_dated_sensor_data_files for ipmi
|
||||
* or
|
||||
* /var/run/bmc/redfishtool/want_dated_sensor_data_files for redfish
|
||||
*
|
||||
* ... to save ther current sensor read file with a dated extension
|
||||
* so that a read history is maintained for debug purposes. */
|
||||
if(daemon_get_cfg_ptr()->debug_bmgmt&8)
|
||||
if ( daemon_is_file_present (WANT_DATED_REDFISH_SENSOR_DATA_FILES))
|
||||
daemon_copy_file(info_ptr->hostname, datafile.data());
|
||||
|
||||
switch (sensor_group)
|
||||
{
|
||||
case BMC_SENSOR_POWER_GROUP:
|
||||
@ -1311,6 +1376,7 @@ void * hwmonThread_redfish ( void * arg )
|
||||
|
||||
/* the number of sensors learned */
|
||||
extra_ptr->samples = samples = 0 ;
|
||||
MEMSET_ZERO (_sample_list);
|
||||
|
||||
switch ( info_ptr->command )
|
||||
{
|
||||
|
@ -1179,7 +1179,7 @@ int daemon_init ( string iface, string nodetype )
|
||||
mtcTimer_init ( mtcInv.mtcTimer, mtcInv.my_hostname, "mtc timer" ); /* Init general mtc timer */
|
||||
mtcAlarm_init ();
|
||||
mtc_stages_init ();
|
||||
threadUtil_init ( mtcTimer_handler ) ;
|
||||
threadUtil_init ( mtcTimer_handler, MTCAGENT_STACK_SIZE ) ;
|
||||
|
||||
/* Bind signal handlers */
|
||||
rc = daemon_signal_init () ;
|
||||
|
@ -22,6 +22,8 @@ typedef struct
|
||||
string bm_cmd ;
|
||||
} thread_extra_info_type ;
|
||||
|
||||
#define MTCAGENT_STACK_SIZE (0x20000) // 128 kBytes
|
||||
|
||||
void * mtcThread_bmc ( void * );
|
||||
void * mtcThread_bmc_test ( void * arg );
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user