Tara Subedi 5ae719a36e PTP: Change overall sync-state behavior
Currently overall sync-state (/sync/sync-status/sync-state) represents
degraded (HOLDOVER/FREERUN) when any state is degraded: os-clock-sync-
state, ptp-status/lock-state, gnss-status/gnss-sync-status.

This commit changes overall sync-state to represent whether the os-clock
is synced or not to the timing source.

Overall sync-state should only be degraded if the timing sources
involved in providing the end-to-end timing is degraded or lost (i.e. it
should represent the overall timing chain status). ptp4l instances that
perform timing distribution only and are not part of the host timing
chain must not impact the overall sync-state. os-clock-state should only
represent the status of the OS clock sync state to the primary clock
source in an HA configuration.

Unit tests has been added and unit test's python interpreter has been
changed to python3.9 to inline with base-image's python version (3.9.2).

TEST PLAN:
   PASS: T-GM deployment with two ptp4l instances
         trigger different events (stop/start ptp4l/phc2sys/ts2phc
         instances, change clockClass, skew clocks, disable GNSS)
         overall sync-state is not affected by ptp-inst1 and ptp-inst2
         states.
   PASS: T-BC deployment with two ptp4l instances
         trigger different events (stop/start ptp4l/phc2sys instances,
         change clockClass, skew clocks)
         overall sync-state is not affected by secondary ptp instance
         state.

   PASS: Hybrid (T-GM/T-BC) HA deployement: HA with GNSS and PTP source
         trigger different events
         overall sync-state is result of chained relation to chosen
         primary source

Story: 2011370
Task: 51774

Change-Id: Ibfb7fa0f9f8ad09584a5f28b60b0e4649976932c
Signed-off-by: Tara Nath Subedi <tara.subedi@windriver.com>
2025-04-10 16:03:35 -04:00

290 lines
11 KiB
Python

#
# Copyright (c) 2021-2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#
# This script provides the PTP synchronization status
# for PTP NIC configured as subordinate (slave mode)
# It relies on Linux ptp4l (PMC) module in order to work
# Sync status provided as: 'Locked', 'Holdover', 'Freerun'
#
#
import datetime
import logging
import re
import sys
from trackingfunctionsdk.model.dto.ptpstate import PtpState
from trackingfunctionsdk.common.helpers import constants
from trackingfunctionsdk.common.helpers import log_helper
from trackingfunctionsdk.common.helpers import ptpsync as utils
LOG = logging.getLogger(__name__)
log_helper.config_logger(LOG)
class PtpMonitor:
_clock_class = None
_ptp_sync_state = constants.UNKNOWN_PHC_STATE
_new_ptp_sync_event = False
_new_clock_class_event = False
_ptp_event_time = None
_clock_class_event_time = None
_clock_class_retry = 3
# Critical resources
ptp4l_service_name = None
ptp4l_config = None
phc2sys_service_name = None
ptp_oper_dict = {
# [pmc cmd, ptp keywords,...]
1: ["'GET PORT_DATA_SET'", constants.PORT_STATE],
2: ["'GET TIME_STATUS_NP'", constants.GM_PRESENT,
constants.MASTER_OFFSET],
3: ["'GET PARENT_DATA_SET'", constants.GM_CLOCK_CLASS,
constants.GRANDMASTER_IDENTITY],
4: ["'GET TIME_PROPERTIES_DATA_SET'", constants.TIME_TRACEABLE],
5: ["'GET DEFAULT_DATA_SET'", constants.CLOCK_IDENTITY,
constants.CLOCK_CLASS],
}
pmc_query_results = {}
def __init__(self, ptp4l_instance, holdover_time, freq,
phc2sys_service_name, init=True):
if init:
self.ptp4l_config = constants.PTP_CONFIG_PATH + ("ptp4l-%s.conf" %
ptp4l_instance)
self.ptp4l_service_name = ptp4l_instance
self.phc2sys_service_name = phc2sys_service_name
self.holdover_time = int(holdover_time)
self.freq = int(freq)
self.set_ptp_devices()
self.sync_source = constants.ClockSourceType.TypeNA
self._ptp_event_time = datetime.datetime.utcnow().timestamp()
self._clock_class_event_time = \
datetime.datetime.utcnow().timestamp()
self.set_ptp_sync_state()
self.set_ptp_clock_class()
def set_ptp_devices(self):
ptp_devices = set()
phc_interfaces = self._check_config_file_interfaces()
for phc_interface in phc_interfaces:
ptp_device = utils.get_interface_phc_device(phc_interface)
if ptp_device is not None:
ptp_devices.add(ptp_device)
self.ptp_devices = list(ptp_devices)
LOG.debug("PTP4l PTP devices are %s" % self.ptp_devices)
def get_ptp_devices(self):
return self.ptp_devices
def get_ptp_sync_source(self):
return self.sync_source
def _check_config_file_interfaces(self):
phc_interfaces = []
with open(self.ptp4l_config, 'r') as f:
config_lines = f.readlines()
config_lines = [line.rstrip() for line in config_lines]
for line in config_lines:
# Find the interface value inside the square brackets
if re.match(r"^\[.*\]$", line) and line not in [
"[global]",
"[unicast_master_table]",
]:
phc_interface = line.strip("[]")
LOG.debug("PTP4l interface is %s" % phc_interface)
phc_interfaces.append(phc_interface)
return phc_interfaces
def set_ptp_sync_state(self):
new_ptp_sync_event, ptp_sync_state, ptp_event_time = self.ptp_status()
if ptp_sync_state != self._ptp_sync_state:
self._new_ptp_sync_event = new_ptp_sync_event
self._ptp_sync_state = ptp_sync_state
self._ptp_event_time = ptp_event_time
else:
self._new_ptp_sync_event = new_ptp_sync_event
def get_ptp_sync_state(self):
return self._new_ptp_sync_event, self._ptp_sync_state, \
self._ptp_event_time
def set_ptp_clock_class(self):
try:
clock_class = self.pmc_query_results['gm.ClockClass']
# Reset retry counter upon getting clock class
self._clock_class_retry = 3
except KeyError:
LOG.warning(
"set_ptp_clock_class: Unable to read current clockClass")
if self._clock_class_retry > 0:
self._clock_class_retry -= 1
LOG.warning("Trying to get clockClass %s more time(s) before "
"setting clockClass 248 (FREERUN)"
% self._clock_class_retry)
clock_class = self._clock_class
else:
clock_class = "248"
self._clock_class_retry = 3
if clock_class != self._clock_class:
self._clock_class = clock_class
self._new_clock_class_event = True
self._clock_class_event_time = \
datetime.datetime.utcnow().timestamp()
LOG.debug(self.pmc_query_results)
LOG.info("PTP clock class is %s" % self._clock_class)
else:
self._new_clock_class_event = False
def get_ptp_clock_class(self):
self.set_ptp_clock_class()
return self._new_clock_class_event, self._clock_class, \
self._clock_class_event_time
def ptp_status(self):
# holdover_time - time phc can maintain clock
# freq - the frequency for monitoring the ptp status
# sync_state - the current ptp state
# event_time - the last time that ptp status was changed
####################################
# event states: #
# Locked —> Holdover —> Freerun #
# Holdover —> Locked #
# Freerun —> Locked #
####################################
current_time = datetime.datetime.utcnow().timestamp()
time_in_holdover = None
previous_sync_state = self._ptp_sync_state
if previous_sync_state == PtpState.Holdover:
time_in_holdover = round(current_time - self._ptp_event_time)
# max holdover time is calculated to be in a 'safety' zone
max_holdover_time = (self.holdover_time - self.freq * 2)
previous_sync_source = self.sync_source
sync_source = constants.ClockSourceType.TypeNA
pmc, ptp4l, _, ptp4lconf = \
utils.check_critical_resources(self.ptp4l_service_name,
self.phc2sys_service_name)
# run pmc command if preconditions met
# Removed check for phc2sys, ptp4l status should not depend on it
if pmc and ptp4l and ptp4lconf:
self.pmc_query_results, total_ptp_keywords, port_count = \
self.ptpsync()
try:
sync_state, sync_source = utils.check_results(
self.pmc_query_results,total_ptp_keywords, port_count
)
except RuntimeError as err:
LOG.warning(err)
sync_state = previous_sync_state
sync_source = previous_sync_source
else:
LOG.warning("Missing critical resource: "
"PMC %s PTP4L %s PTP4LCONF %s"
% (pmc, ptp4l, ptp4lconf))
sync_state = PtpState.Freerun
# determine if transition into holdover mode
if sync_state == PtpState.Freerun:
if previous_sync_state in [constants.UNKNOWN_PHC_STATE,
PtpState.Freerun]:
sync_state = PtpState.Freerun
elif previous_sync_state == PtpState.Locked:
sync_state = PtpState.Holdover
elif previous_sync_state == PtpState.Holdover and \
time_in_holdover < max_holdover_time:
LOG.debug("PTP Status: Time in holdover is %s "
"Max time in holdover is %s"
% (time_in_holdover, max_holdover_time))
sync_state = PtpState.Holdover
else:
sync_state = PtpState.Freerun
# determine if ptp sync state has changed since the last one
LOG.debug("ptp_monitor: sync_state %s, "
"previous_sync_state %s, "
"sync_source %s, "
"previous sync_source %s" % (
sync_state, previous_sync_state, sync_source, previous_sync_source
)
)
# record sync_source of this poll.
self.sync_source = sync_source
if sync_state != previous_sync_state:
new_event = True
self._ptp_event_time = datetime.datetime.utcnow().timestamp()
else:
new_event = False
return new_event, sync_state, self._ptp_event_time
def ptpsync(self):
result = {}
total_ptp_keywords = 0
port_count = 0
ptp_dict_to_use = self.ptp_oper_dict
len_dic = len(ptp_dict_to_use)
for key in range(1, len_dic + 1):
cmd = ptp_dict_to_use[key][0]
cmd = "pmc -b 0 -u -f " + constants.PTP_CONFIG_PATH + \
"ptp4l-" + self.ptp4l_service_name + ".conf " + cmd
ptp_keyword = ptp_dict_to_use[key][1:]
total_ptp_keywords += len(ptp_keyword)
out, err, errcode = utils.run_shell2('.', None, cmd)
if errcode != 0:
LOG.warning('pmc command returned unknown result')
sys.exit(0)
out = str(out)
try:
out = out.split("\\n\\t\\t")
except:
LOG.warning('cannot split "out" into a list')
sys.exit(0)
for state in out:
try:
state = state.split()
except:
LOG.warning('cannot split "state" into a list')
sys.exit(0)
if len(state) <= 1:
LOG.warning('not received the expected list length')
sys.exit(0)
for item in ptp_keyword:
if state[0] == item:
if item == constants.PORT_STATE:
port_count += 1
result.update(
{constants.PORT.format(port_count): state[1]})
else:
state[1] = state[1].replace('\\n', '')
state[1] = state[1].replace('\'', '')
result.update({state[0]: state[1]})
# making sure at least one port is available
if port_count == 0:
port_count = 1
# adding the possible ports minus one keyword not used, "portState"
total_ptp_keywords = total_ptp_keywords + port_count - 1
return result, total_ptp_keywords, port_count
if __name__ == "__main__":
test_ptp = PtpMonitor()
LOG.debug("PTP sync state for %s is %s"
% (test_ptp.ptp4l_service_name, test_ptp.get_ptp_sync_state()))
LOG.debug("PTP clock class for %s is %s"
% (test_ptp.ptp4l_service_name, test_ptp.get_ptp_clock_class()))