Merge "Add recheck to statsd so that false alarms don't happen"
This commit is contained in:
commit
7b2a30f7f5
@ -67,4 +67,5 @@ datadog_api_key=0987654321
|
|||||||
datadog_app_key=1234567890
|
datadog_app_key=1234567890
|
||||||
datadog_message_tail="@user@domain.com"
|
datadog_message_tail="@user@domain.com"
|
||||||
datadog_tags=service:lbaas
|
datadog_tags=service:lbaas
|
||||||
|
datadog_env=prod
|
||||||
ping_interval = 60
|
ping_interval = 60
|
||||||
|
@ -98,6 +98,13 @@ class AdminAPI(object):
|
|||||||
), body
|
), body
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def get_device(self, device_id):
|
||||||
|
return self._get(
|
||||||
|
'{url}/devices/{device_id}'.format(
|
||||||
|
url=self.url, device_id=device_id
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def _get_node_list(self, limit, marker):
|
def _get_node_list(self, limit, marker):
|
||||||
return self._get(
|
return self._get(
|
||||||
'{url}/devices?marker={marker}&limit={limit}'
|
'{url}/devices?marker={marker}&limit={limit}'
|
||||||
|
@ -22,7 +22,7 @@ class DatadogDriver(AlertDriver):
|
|||||||
super(DatadogDriver, self).__init__(logger, args)
|
super(DatadogDriver, self).__init__(logger, args)
|
||||||
|
|
||||||
def send_alert(self, message, device_id):
|
def send_alert(self, message, device_id):
|
||||||
title = 'Load balancer failure'
|
title = 'Load balancer failure in {0}'.format(self.args.datadog_env)
|
||||||
text = 'Load balancer failed with message {0} {1}'.format(
|
text = 'Load balancer failed with message {0} {1}'.format(
|
||||||
message, self.args.datadog_message_tail
|
message, self.args.datadog_message_tail
|
||||||
)
|
)
|
||||||
@ -33,7 +33,7 @@ class DatadogDriver(AlertDriver):
|
|||||||
self.logger.info('Datadog alert response: {0}'.format(resp))
|
self.logger.info('Datadog alert response: {0}'.format(resp))
|
||||||
|
|
||||||
def send_repair(self, message, device_id):
|
def send_repair(self, message, device_id):
|
||||||
title = 'Load balancer recovered'
|
title = 'Load balancer recovered in {0}'.format(self.args.datadog_env)
|
||||||
text = 'Load balancer recovered with message {0} {1}'.format(
|
text = 'Load balancer recovered with message {0} {1}'.format(
|
||||||
message, self.args.datadog_message_tail
|
message, self.args.datadog_message_tail
|
||||||
)
|
)
|
||||||
|
@ -82,6 +82,10 @@ def main():
|
|||||||
'--datadog_tags',
|
'--datadog_tags',
|
||||||
help='A space separated list of tags for Datadog alerts'
|
help='A space separated list of tags for Datadog alerts'
|
||||||
)
|
)
|
||||||
|
options.parser.add_argument(
|
||||||
|
'--datadog_env', default='unknown',
|
||||||
|
help='Server enironment'
|
||||||
|
)
|
||||||
|
|
||||||
args = options.run()
|
args = options.run()
|
||||||
|
|
||||||
|
@ -90,6 +90,9 @@ class Sched(object):
|
|||||||
if api.is_online():
|
if api.is_online():
|
||||||
lb_list = api.get_ping_list()
|
lb_list = api.get_ping_list()
|
||||||
pings = len(lb_list)
|
pings = len(lb_list)
|
||||||
|
if pings == 0:
|
||||||
|
self.logger.info('No LBs to ping')
|
||||||
|
return (0, 0)
|
||||||
for lb in lb_list:
|
for lb in lb_list:
|
||||||
node_list.append(lb['name'])
|
node_list.append(lb['name'])
|
||||||
gearman = GearJobs(self.logger, self.args)
|
gearman = GearJobs(self.logger, self.args)
|
||||||
@ -129,8 +132,15 @@ class Sched(object):
|
|||||||
return tested, repaired
|
return tested, repaired
|
||||||
|
|
||||||
def _send_fails(self, failed_nodes, node_list):
|
def _send_fails(self, failed_nodes, node_list):
|
||||||
|
api = AdminAPI(self.args.api_server, self.logger)
|
||||||
for node in failed_nodes:
|
for node in failed_nodes:
|
||||||
data = self._get_node(node, node_list)
|
data = self._get_node(node, node_list)
|
||||||
|
# device could have been marked offline between getting the list
|
||||||
|
# and testing, check if this is the case
|
||||||
|
status_code, device_status = api.get_device(data['id'])
|
||||||
|
if device_status['status'] != 'ONLINE':
|
||||||
|
continue
|
||||||
|
|
||||||
message = (
|
message = (
|
||||||
'Load balancer failed\n'
|
'Load balancer failed\n'
|
||||||
'ID: {0}\n'
|
'ID: {0}\n'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user