Add psuedo lazy pirate stragey to agent, with socket timeouts
This adds a pseudo lazy pirate strategy to the agent, with configurable socket timeouts and retry counts. Change-Id: I57bd09c33d071f1cc975e00b9e2deeb715f19bd6
This commit is contained in:
parent
9d2ccc4e59
commit
e4fff5fcc5
@ -335,6 +335,10 @@
|
||||
# Agent unique id, defaults to MAC of primary interface. (string value)
|
||||
#agent_id = <None>
|
||||
|
||||
# Prior to exiting, the number of reconnects the Agent will attempt with the
|
||||
# server upon socket operation errors. (integer value)
|
||||
#agent_socket_conn_retries = 10
|
||||
|
||||
# Heat template containing receipt of building the image. Can be a file name or
|
||||
# one of aliases: "centos", "debian", "ubuntu". Defaults to "ubuntu". (string
|
||||
# value)
|
||||
|
@ -89,9 +89,13 @@ def sleep(seconds):
|
||||
time.sleep(seconds)
|
||||
|
||||
|
||||
def get_socket(endpoint):
|
||||
context = zmq.Context()
|
||||
def get_socket(context, endpoint):
|
||||
socket = context.socket(zmq.REQ)
|
||||
socket.setsockopt(zmq.LINGER, 0)
|
||||
if 'agent_socket_recv_timeout' in cfg.CONF:
|
||||
socket.setsockopt(zmq.RCVTIMEO, cfg.CONF.agent_socket_recv_timeout)
|
||||
if 'agent_socket_send_timeout' in cfg.CONF:
|
||||
socket.setsockopt(zmq.SNDTIMEO, cfg.CONF.agent_socket_send_timeout)
|
||||
socket.connect('tcp://%s' % endpoint)
|
||||
return socket
|
||||
|
||||
@ -133,11 +137,19 @@ def work(agent_id, endpoint, polling_interval=config.DEFAULT_POLLING_INTERVAL,
|
||||
agent_config = dict(polling_interval=polling_interval)
|
||||
LOG.info('Agent config: %s', agent_config)
|
||||
|
||||
socket = get_socket(endpoint)
|
||||
if 'agent_socket_conn_retries' in cfg.CONF:
|
||||
socket_conn_retries = cfg.CONF.agent_socket_conn_retries
|
||||
else:
|
||||
socket_conn_retries = config.DEFAULT_SOCKET_CONN_RETRIES
|
||||
|
||||
context = zmq.Context()
|
||||
socket = get_socket(context, endpoint)
|
||||
socket_retries_left = socket_conn_retries
|
||||
|
||||
while True:
|
||||
try:
|
||||
work_act(socket, agent_id, agent_config)
|
||||
socket_retries_left = socket_conn_retries
|
||||
|
||||
except BaseException as e:
|
||||
if isinstance(e, KeyboardInterrupt):
|
||||
@ -146,10 +158,21 @@ def work(agent_id, endpoint, polling_interval=config.DEFAULT_POLLING_INTERVAL,
|
||||
else:
|
||||
LOG.info('Process is interrupted')
|
||||
sys.exit(3)
|
||||
elif isinstance(e, zmq.error.ZMQError):
|
||||
socket.close()
|
||||
socket_retries_left -= 1
|
||||
if socket_retries_left <= 0:
|
||||
LOG.exception(e)
|
||||
break
|
||||
LOG.warning('Socket reconnecting...')
|
||||
socket = get_socket(context, endpoint)
|
||||
else:
|
||||
LOG.exception(e)
|
||||
break
|
||||
|
||||
socket.close()
|
||||
context.term()
|
||||
|
||||
|
||||
def get_node_uuid():
|
||||
s = '%012x' % uuid.getnode()
|
||||
|
@ -31,6 +31,7 @@ REPORT_TEMPLATES = 'shaker/resources/report_templates/'
|
||||
SCENARIOS = 'shaker/scenarios/'
|
||||
SCHEMAS = 'shaker/resources/schemas/'
|
||||
DEFAULT_POLLING_INTERVAL = 10
|
||||
DEFAULT_SOCKET_CONN_RETRIES = 10
|
||||
|
||||
|
||||
class Endpoint(types.String):
|
||||
@ -75,7 +76,7 @@ COMMON_OPTS = [
|
||||
cfg.IntOpt('polling-interval',
|
||||
default=(utils.env('SHAKER_POLLING_INTERVAL') or
|
||||
DEFAULT_POLLING_INTERVAL),
|
||||
help='How frequently the agent polls server, in seconds')
|
||||
help='How frequently the agent polls server, in seconds'),
|
||||
]
|
||||
|
||||
OPENSTACK_OPTS = [
|
||||
@ -285,6 +286,20 @@ AGENT_OPTS = [
|
||||
cfg.StrOpt('agent-id',
|
||||
default=utils.env('SHAKER_AGENT_ID'),
|
||||
help='Agent unique id, defaults to MAC of primary interface.'),
|
||||
cfg.IntOpt('agent-socket-recv-timeout',
|
||||
default=utils.env('SHAKER_AGENT_SOCKET_RECV_TIMEOUT'),
|
||||
help='The amount of time the socket will wait for '
|
||||
'a response from a sent message, in milliseconds.'),
|
||||
cfg.IntOpt('agent-socket-send-timeout',
|
||||
default=utils.env('SHAKER_AGENT_SOCKET_SEND_TIMEOUT'),
|
||||
help='The amount of time the socket will wait until '
|
||||
'a sent message is accepted, in milliseconds.'),
|
||||
cfg.IntOpt('agent-socket-conn-retries',
|
||||
default=(utils.env('SHAKER_AGENT_SOCKET_CONN_RETRIES') or
|
||||
DEFAULT_SOCKET_CONN_RETRIES),
|
||||
help='Prior to exiting, the number of reconnects the Agent '
|
||||
'will attempt with the server upon socket operation '
|
||||
'errors.'),
|
||||
]
|
||||
|
||||
IMAGE_BUILDER_OPTS = [
|
||||
|
Loading…
x
Reference in New Issue
Block a user