Cole Walker 63fefca36c [PTP] Reduce cpu usage and correct holdover time
Fixes two issues:

1.
The CPU usage consumed by monitoring the kern.log file for changes in
the NIC cgu status was found to be excessive. This change reverts that
implementation and returns to a polling approach similar to what was
already used for monitoring ptp4l and os clock statuses. This has been
observed to bring main process in the notificationservice-base image
down from ~60% CPU to ~8-10% CPU utilization.

In the future, a preferrable implementation would be to work with device
driver owners to provide support for udev events, removing the need to
poll the status of devices.

2.
User supplied holdover times for each service type were not being
applied correctly. Updated daemon.py to set the holdover times in the
service context.

This also includes providing a user configurable "CONTROL_TIMEOUT"
parameter to control the frequency of polling. This is a global value
and affects the polling rate for all services.

Test-plan:
PASS: Build and install ptp-notification app and containers
PASS: Observe reduced CPU usage and confirm that GNSS monitoring still
works
PASS: User supplied holdover times work correctly, along with polling
rate

Story: 2010056
Task: 46512
Task: 46513

Signed-off-by: Cole Walker <cole.walker@windriver.com>
Change-Id: Ic0050cc09f5118e7f1c32aa13168084d6456437e
2022-10-06 19:27:04 +00:00

220 lines
8.9 KiB
Python

#
# Copyright (c) 2021-2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#
# This script provides the PTP synchronization status
# for PTP NIC configured as subordinate (slave mode)
# It relies on Linux ptp4l (PMC) module in order to work
# Sync status provided as: 'Locked', 'Holdover', 'Freerun'
#
#
import datetime
import logging
import os
import re
import sys
from trackingfunctionsdk.model.dto.ptpstate import PtpState
from trackingfunctionsdk.common.helpers import constants
from trackingfunctionsdk.common.helpers import log_helper
from trackingfunctionsdk.common.helpers import ptpsync as utils
LOG = logging.getLogger(__name__)
log_helper.config_logger(LOG)
class PtpMonitor:
_clock_class = None
_ptp_sync_state = constants.UNKNOWN_PHC_STATE
_new_ptp_sync_event = False
_new_clock_class_event = False
_ptp_event_time = None
_clock_class_event_time = None
_clock_class_retry = 3
# Critical resources
ptp4l_service_name = None
ptp4l_config = None
phc2sys_service_name = None
ptp_oper_dict = {
# [pmc cmd, ptp keywords,...]
1: ["'GET PORT_DATA_SET'", constants.PORT_STATE],
2: ["'GET TIME_STATUS_NP'", constants.GM_PRESENT, constants.MASTER_OFFSET],
3: ["'GET PARENT_DATA_SET'", constants.GM_CLOCK_CLASS, constants.GRANDMASTER_IDENTITY],
4: ["'GET TIME_PROPERTIES_DATA_SET'", constants.TIME_TRACEABLE],
5: ["'GET DEFAULT_DATA_SET'", constants.CLOCK_IDENTITY, constants.CLOCK_CLASS],
}
pmc_query_results = {}
def __init__(self, ptp4l_instance, holdover_time, freq, phc2sys_service_name, init=True):
if init:
self.ptp4l_config = "/ptp/ptpinstance/ptp4l-%s.conf" % ptp4l_instance
self.ptp4l_service_name = ptp4l_instance
self.phc2sys_service_name = phc2sys_service_name
self.holdover_time = int(holdover_time)
self.freq = int(freq)
self._ptp_event_time = datetime.datetime.utcnow().timestamp()
self._clock_class_event_time = datetime.datetime.utcnow().timestamp()
self.set_ptp_sync_state()
self.set_ptp_clock_class()
def set_ptp_sync_state(self):
new_ptp_sync_event, ptp_sync_state, ptp_event_time = self.ptp_status()
if ptp_sync_state != self._ptp_sync_state:
self._new_ptp_sync_event = new_ptp_sync_event
self._ptp_sync_state = ptp_sync_state
self._ptp_event_time = ptp_event_time
else:
self._new_ptp_sync_event = new_ptp_sync_event
def get_ptp_sync_state(self):
return self._new_ptp_sync_event, self._ptp_sync_state, self._ptp_event_time
def set_ptp_clock_class(self):
try:
clock_class = self.pmc_query_results['clockClass']
# Reset retry counter upon getting clock class
self._clock_class_retry = 3
except KeyError:
LOG.warning("set_ptp_clock_class: Unable to read current clockClass")
if self._clock_class_retry > 0:
self._clock_class_retry -= 1
LOG.warning("Trying to get clockClass %s more time(s) before "
"setting clockClass 248 (FREERUN)" % self._clock_class_retry)
clock_class = self._clock_class
else:
clock_class = "248"
self._clock_class_retry = 3
if clock_class != self._clock_class:
self._clock_class = clock_class
self._new_clock_class_event = True
self._clock_class_event_time = datetime.datetime.utcnow().timestamp()
LOG.debug(self.pmc_query_results)
LOG.info("PTP clock class is %s" % self._clock_class)
else:
self._new_clock_class_event = False
def get_ptp_clock_class(self):
self.set_ptp_clock_class()
return self._new_clock_class_event, self._clock_class, self._clock_class_event_time
def ptp_status(self):
# holdover_time - time phc can maintain clock
# freq - the frequency for monitoring the ptp status
# sync_state - the current ptp state
# event_time - the last time that ptp status was changed
####################################
# event states: #
# Locked —> Holdover —> Freerun #
# Holdover —> Locked #
# Freerun —> Locked #
####################################
current_time = datetime.datetime.utcnow().timestamp()
time_in_holdover = None
previous_sync_state = self._ptp_sync_state
if previous_sync_state == PtpState.Holdover:
time_in_holdover = round(current_time - self._ptp_event_time)
# max holdover time is calculated to be in a 'safety' zone
max_holdover_time = (self.holdover_time - self.freq * 2)
pmc, ptp4l, phc2sys, ptp4lconf = utils.check_critical_resources(self.ptp4l_service_name,
self.phc2sys_service_name)
# run pmc command if preconditions met
if pmc and ptp4l and phc2sys and ptp4lconf:
self.pmc_query_results, total_ptp_keywords, port_count = self.ptpsync()
sync_state = utils.check_results(self.pmc_query_results, total_ptp_keywords, port_count)
else:
LOG.warning("Missing critical resource: PMC %s PTP4L %s PHC2SYS %s PTP4LCONF %s" % (pmc, ptp4l, phc2sys, ptp4lconf))
sync_state = PtpState.Freerun
# determine if transition into holdover mode (cannot be in holdover if system clock is
# not in
# sync)
if sync_state == PtpState.Freerun and phc2sys:
if previous_sync_state in [constants.UNKNOWN_PHC_STATE, PtpState.Freerun]:
sync_state = PtpState.Freerun
elif previous_sync_state == PtpState.Locked:
sync_state = PtpState.Holdover
elif previous_sync_state == PtpState.Holdover and time_in_holdover < \
max_holdover_time:
LOG.debug("PTP Status: Time in holdover is %s Max time in holdover is %s" % (
time_in_holdover, max_holdover_time))
sync_state = PtpState.Holdover
else:
sync_state = PtpState.Freerun
# determine if ptp sync state has changed since the last one
LOG.debug("ptp_monitor: sync_state %s, "
"previous_sync_state %s" % (sync_state, previous_sync_state))
if sync_state != previous_sync_state:
new_event = True
self._ptp_event_time = datetime.datetime.utcnow().timestamp()
else:
new_event = False
return new_event, sync_state, self._ptp_event_time
def ptpsync(self):
result = {}
total_ptp_keywords = 0
port_count = 0
ptp_dict_to_use = self.ptp_oper_dict
len_dic = len(ptp_dict_to_use)
for key in range(1, len_dic + 1):
cmd = ptp_dict_to_use[key][0]
cmd = "pmc -b 0 -u -f /ptp/ptpinstance/ptp4l-" + self.ptp4l_service_name + ".conf " +\
cmd
ptp_keyword = ptp_dict_to_use[key][1:]
total_ptp_keywords += len(ptp_keyword)
out, err, errcode = utils.run_shell2('.', None, cmd)
if errcode != 0:
LOG.warning('pmc command returned unknown result')
sys.exit(0)
out = str(out)
try:
out = out.split("\\n\\t\\t")
except:
LOG.warning('cannot split "out" into a list')
sys.exit(0)
for state in out:
try:
state = state.split()
except:
LOG.warning('cannot split "state" into a list')
sys.exit(0)
if len(state) <= 1:
LOG.warning('not received the expected list length')
sys.exit(0)
for item in ptp_keyword:
if state[0] == item:
if item == constants.PORT_STATE:
port_count += 1
result.update({constants.PORT.format(port_count): state[1]})
else:
state[1] = state[1].replace('\\n', '')
state[1] = state[1].replace('\'', '')
result.update({state[0]: state[1]})
# making sure at least one port is available
if port_count == 0:
port_count = 1
# adding the possible ports minus one keyword not used, "portState"
total_ptp_keywords = total_ptp_keywords + port_count - 1
return result, total_ptp_keywords, port_count
if __name__ == "__main__":
test_ptp = PtpMonitor()
LOG.debug("PTP sync state for %s is %s" % (
test_ptp.ptp4l_service_name, test_ptp.get_ptp_sync_state()))
LOG.debug("PTP clock class for %s is %s" % (
test_ptp.ptp4l_service_name, test_ptp.get_ptp_clock_class()))