Workaround retry issue of connection to notificationservice
Monkey patch kombu package to avoid retrying forever while connecting to rabbitmq broker Spawn a thread to ensure the connection to notificationservice Story: 2008529 Task: 42020 Signed-off-by: Bin Yang <bin.yang@windriver.com> Change-Id: Ic12bd2af29ad71739f157dac66998d7972eb2edc
This commit is contained in:
parent
1ca32529ad
commit
5eb6e432dd
@ -7,6 +7,7 @@
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
import threading
|
||||||
import oslo_messaging
|
import oslo_messaging
|
||||||
from oslo_config import cfg
|
from oslo_config import cfg
|
||||||
from notificationclientsdk.common.helpers import rpc_helper
|
from notificationclientsdk.common.helpers import rpc_helper
|
||||||
@ -25,13 +26,42 @@ class BrokerClientBase(object):
|
|||||||
self.listeners = {}
|
self.listeners = {}
|
||||||
self.broker_endpoint = RpcEndpointInfo(broker_transport_endpoint)
|
self.broker_endpoint = RpcEndpointInfo(broker_transport_endpoint)
|
||||||
self.transport = rpc_helper.get_transport(self.broker_endpoint)
|
self.transport = rpc_helper.get_transport(self.broker_endpoint)
|
||||||
|
self._workerevent = threading.Event()
|
||||||
|
self._workerlock = threading.Lock()
|
||||||
|
self._workerterminated = False
|
||||||
|
# spawn a thread to retry on setting up listener
|
||||||
|
self._workerthread = threading.Thread(target=self._refresher, args=())
|
||||||
|
self._workerthread.start()
|
||||||
|
|
||||||
LOG.debug("Created Broker client:{0}".format(broker_name))
|
LOG.debug("Created Broker client:{0}".format(broker_name))
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
|
self._workerterminated = True
|
||||||
|
self._workerevent.set()
|
||||||
self.transport.cleanup()
|
self.transport.cleanup()
|
||||||
del self.transport
|
del self.transport
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def _refresher(self, retry_interval=5):
|
||||||
|
while not self._workerterminated:
|
||||||
|
self._workerevent.wait()
|
||||||
|
self._workerevent.clear()
|
||||||
|
allset = False
|
||||||
|
with self._workerlock:
|
||||||
|
allset = self._refresh()
|
||||||
|
if self._workerevent.is_set():
|
||||||
|
continue
|
||||||
|
if not allset:
|
||||||
|
# retry later
|
||||||
|
time.sleep(retry_interval)
|
||||||
|
# retry on next loop
|
||||||
|
self._workerevent.set()
|
||||||
|
|
||||||
|
def __is_listening(self, context):
|
||||||
|
isactive = context and context.get(
|
||||||
|
'active', False) and context.get('rpcserver', False)
|
||||||
|
return isactive
|
||||||
|
|
||||||
def __create_listener(self, context):
|
def __create_listener(self, context):
|
||||||
target = oslo_messaging.Target(
|
target = oslo_messaging.Target(
|
||||||
topic=context['topic'],
|
topic=context['topic'],
|
||||||
@ -42,6 +72,7 @@ class BrokerClientBase(object):
|
|||||||
return server
|
return server
|
||||||
|
|
||||||
def _refresh(self):
|
def _refresh(self):
|
||||||
|
allset = True
|
||||||
for topic, servers in self.listeners.items():
|
for topic, servers in self.listeners.items():
|
||||||
for servername, context in servers.items():
|
for servername, context in servers.items():
|
||||||
try:
|
try:
|
||||||
@ -57,13 +88,21 @@ class BrokerClientBase(object):
|
|||||||
rpcserver.wait()
|
rpcserver.wait()
|
||||||
context.pop('rpcserver')
|
context.pop('rpcserver')
|
||||||
LOG.debug("Stopped rpcserver@{0}@{1}".format(context['topic'], context['server']))
|
LOG.debug("Stopped rpcserver@{0}@{1}".format(context['topic'], context['server']))
|
||||||
except:
|
except Exception as ex:
|
||||||
LOG.error("Failed to update listener for topic/server:{0}/{1}"
|
LOG.error("Failed to update listener for topic/server:{0}/{1}, reason:{2}"
|
||||||
.format(topic, servername))
|
.format(topic, servername, str(ex)))
|
||||||
|
allset = False
|
||||||
continue
|
continue
|
||||||
|
return allset
|
||||||
|
|
||||||
|
def _trigger_refresh_listener(self, context):
|
||||||
|
self._workerevent.set()
|
||||||
|
# # sleep to re-schedule to run worker thread
|
||||||
|
# time.sleep(2)
|
||||||
|
|
||||||
def add_listener(self, topic, server, listener_endpoints=None):
|
def add_listener(self, topic, server, listener_endpoints=None):
|
||||||
context = self.listeners.get(topic,{}).get(server, {})
|
context = self.listeners.get(topic,{}).get(server, {})
|
||||||
|
with self._workerlock:
|
||||||
if not context:
|
if not context:
|
||||||
context = {
|
context = {
|
||||||
'endpoints': listener_endpoints,
|
'endpoints': listener_endpoints,
|
||||||
@ -78,23 +117,23 @@ class BrokerClientBase(object):
|
|||||||
context['endpoints'] = listener_endpoints
|
context['endpoints'] = listener_endpoints
|
||||||
context['active'] = True
|
context['active'] = True
|
||||||
|
|
||||||
self._refresh()
|
self._trigger_refresh_listener(context)
|
||||||
|
|
||||||
def remove_listener(self, topic, server):
|
def remove_listener(self, topic, server):
|
||||||
context = self.listeners.get(topic,{}).get(server, {})
|
context = self.listeners.get(topic,{}).get(server, {})
|
||||||
|
with self._workerlock:
|
||||||
if context:
|
if context:
|
||||||
context['active'] = False
|
context['active'] = False
|
||||||
self._refresh()
|
self._trigger_refresh_listener(context)
|
||||||
|
|
||||||
def is_listening(self, topic, server):
|
def is_listening(self, topic, server):
|
||||||
context = self.listeners.get(topic,{}).get(server, {})
|
context = self.listeners.get(topic,{}).get(server, {})
|
||||||
return context.get('active', False)
|
return self.__is_listening(context)
|
||||||
|
|
||||||
def any_listener(self):
|
def any_listener(self):
|
||||||
for topic, servers in self.listeners.items():
|
for topic, servers in self.listeners.items():
|
||||||
for servername, context in servers.items():
|
for servername, context in servers.items():
|
||||||
isactive = context.get('active', False)
|
if self.__is_listening(context):
|
||||||
if isactive:
|
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -0,0 +1,31 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2021 Wind River Systems, Inc.
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
|
||||||
|
import kombu.utils.functional
|
||||||
|
|
||||||
|
class OsloMessagingPatcher(object):
|
||||||
|
retry_over_time_orig = None
|
||||||
|
@staticmethod
|
||||||
|
def retry_over_time_patch(
|
||||||
|
fun, catch, args=None, kwargs=None, errback=None,
|
||||||
|
max_retries=None, interval_start=2, interval_step=2,
|
||||||
|
interval_max=30, callback=None, timeout=None):
|
||||||
|
"""
|
||||||
|
patch to retry_over_time with default max_retries=5
|
||||||
|
"""
|
||||||
|
if not max_retries:
|
||||||
|
max_retries = 2
|
||||||
|
return OsloMessagingPatcher.retry_over_time_orig(
|
||||||
|
fun, catch, args, kwargs, errback,
|
||||||
|
max_retries, interval_start, interval_step,
|
||||||
|
interval_max, callback, timeout)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def patch():
|
||||||
|
if not OsloMessagingPatcher.retry_over_time_orig:
|
||||||
|
OsloMessagingPatcher.retry_over_time_orig = kombu.utils.functional.retry_over_time
|
||||||
|
kombu.utils.functional.retry_over_time = OsloMessagingPatcher.retry_over_time_patch
|
||||||
|
return
|
@ -3,3 +3,7 @@
|
|||||||
#
|
#
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
|
|
||||||
|
from notificationclientsdk.common.helpers.patcher import OsloMessagingPatcher
|
||||||
|
|
||||||
|
OsloMessagingPatcher.patch()
|
||||||
|
@ -615,16 +615,21 @@ class NotificationWorker:
|
|||||||
self.signal_node_resources_event()
|
self.signal_node_resources_event()
|
||||||
self.signal_events()
|
self.signal_events()
|
||||||
|
|
||||||
def __start_watch_all_nodes(self):
|
def __start_watch_all_nodes(self, retry_interval=5):
|
||||||
try:
|
try:
|
||||||
if not self.locationservice_client.is_listening_on_location(
|
while not self.locationservice_client.is_listening_on_location(
|
||||||
NodeInfoHelper.BROKER_NODE_ALL):
|
NodeInfoHelper.BROKER_NODE_ALL):
|
||||||
# start watching on the location announcement
|
# start watching on the location announcement
|
||||||
self.locationservice_client.add_location_listener(
|
self.locationservice_client.add_location_listener(
|
||||||
NodeInfoHelper.BROKER_NODE_ALL,
|
NodeInfoHelper.BROKER_NODE_ALL,
|
||||||
location_handler=self.__NodeInfoWatcher)
|
location_handler=self.__NodeInfoWatcher)
|
||||||
LOG.debug("Start watching location announcement of notificationservice@{0}"
|
LOG.debug(
|
||||||
|
"Start watching location announcement of notificationservice@{0}"
|
||||||
.format(NodeInfoHelper.BROKER_NODE_ALL))
|
.format(NodeInfoHelper.BROKER_NODE_ALL))
|
||||||
|
if not self.locationservice_client.is_listening_on_location(
|
||||||
|
NodeInfoHelper.BROKER_NODE_ALL):
|
||||||
|
# retry later and forever
|
||||||
|
time.sleep(retry_interval)
|
||||||
self.locationservice_client.trigger_location_annoucement(timeout=20, retry=10)
|
self.locationservice_client.trigger_location_annoucement(timeout=20, retry=10)
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
LOG.debug("exception: {0}".format(str(ex)))
|
LOG.debug("exception: {0}".format(str(ex)))
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
import oslo_messaging
|
import oslo_messaging
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
|
import kombu
|
||||||
|
|
||||||
from notificationclientsdk.repository.node_repo import NodeRepo
|
from notificationclientsdk.repository.node_repo import NodeRepo
|
||||||
from notificationclientsdk.repository.subscription_repo import SubscriptionRepo
|
from notificationclientsdk.repository.subscription_repo import SubscriptionRepo
|
||||||
@ -48,7 +49,7 @@ class PtpService(object):
|
|||||||
if ResourceType.TypePTP in supported_resource_types:
|
if ResourceType.TypePTP in supported_resource_types:
|
||||||
return self._query(default_node_name)
|
return self._query(default_node_name)
|
||||||
else:
|
else:
|
||||||
raise client_exception.ResourceNotAvailable(default_node_name, ResourceType.TypePTP)
|
raise client_exception.ResourceNotAvailable(broker_node_name, ResourceType.TypePTP)
|
||||||
|
|
||||||
def _query(self, broker_node_name):
|
def _query(self, broker_node_name):
|
||||||
broker_host = "notificationservice-{0}".format(broker_node_name)
|
broker_host = "notificationservice-{0}".format(broker_node_name)
|
||||||
@ -57,12 +58,23 @@ class PtpService(object):
|
|||||||
self.daemon_control.daemon_context['NOTIFICATION_BROKER_PASS'],
|
self.daemon_control.daemon_context['NOTIFICATION_BROKER_PASS'],
|
||||||
broker_host,
|
broker_host,
|
||||||
self.daemon_control.daemon_context['NOTIFICATION_BROKER_PORT'])
|
self.daemon_control.daemon_context['NOTIFICATION_BROKER_PORT'])
|
||||||
|
notificationservice_client = None
|
||||||
|
try:
|
||||||
notificationservice_client = NotificationServiceClient(
|
notificationservice_client = NotificationServiceClient(
|
||||||
broker_node_name, broker_transport_endpoint)
|
broker_node_name, broker_transport_endpoint)
|
||||||
resource_status = notificationservice_client.query_resource_status(
|
resource_status = notificationservice_client.query_resource_status(
|
||||||
ResourceType.TypePTP, timeout=5, retry=10)
|
ResourceType.TypePTP, timeout=5, retry=10)
|
||||||
del notificationservice_client
|
|
||||||
return resource_status
|
return resource_status
|
||||||
|
except oslo_messaging.exceptions.MessagingTimeout as ex:
|
||||||
|
LOG.warning("ptp status is not available @node {0} due to {1}".format(
|
||||||
|
broker_node_name, str(ex)))
|
||||||
|
raise client_exception.ResourceNotAvailable(broker_node_name, ResourceType.TypePTP)
|
||||||
|
except kombu.exceptions.OperationalError as ex:
|
||||||
|
LOG.warning("Node {0} is unreachable yet".format(broker_node_name))
|
||||||
|
raise client_exception.NodeNotAvailable(broker_node_name)
|
||||||
|
finally:
|
||||||
|
if notificationservice_client:
|
||||||
|
del notificationservice_client
|
||||||
|
|
||||||
def add_subscription(self, subscription_dto):
|
def add_subscription(self, subscription_dto):
|
||||||
subscription_orm = SubscriptionOrm(**subscription_dto.to_orm())
|
subscription_orm = SubscriptionOrm(**subscription_dto.to_orm())
|
||||||
@ -77,13 +89,8 @@ class PtpService(object):
|
|||||||
# get initial resource status
|
# get initial resource status
|
||||||
if default_node_name:
|
if default_node_name:
|
||||||
ptpstatus = None
|
ptpstatus = None
|
||||||
try:
|
|
||||||
ptpstatus = self._query(default_node_name)
|
ptpstatus = self._query(default_node_name)
|
||||||
LOG.info("initial ptpstatus:{0}".format(ptpstatus))
|
LOG.info("initial ptpstatus:{0}".format(ptpstatus))
|
||||||
except oslo_messaging.exceptions.MessagingTimeout as ex:
|
|
||||||
LOG.warning("ptp status is not available @node {0} due to {1}".format(
|
|
||||||
default_node_name, str(ex)))
|
|
||||||
raise client_exception.ResourceNotAvailable(broker_node_name, subscription_dto.ResourceType)
|
|
||||||
|
|
||||||
# construct subscription entry
|
# construct subscription entry
|
||||||
subscription_orm.InitialDeliveryTimestamp = ptpstatus.get('EventTimestamp', None)
|
subscription_orm.InitialDeliveryTimestamp = ptpstatus.get('EventTimestamp', None)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user