Merge "Add recheck to statsd so that false alarms don't happen"
This commit is contained in:
commit
7b2a30f7f5
@ -67,4 +67,5 @@ datadog_api_key=0987654321
|
||||
datadog_app_key=1234567890
|
||||
datadog_message_tail="@user@domain.com"
|
||||
datadog_tags=service:lbaas
|
||||
datadog_env=prod
|
||||
ping_interval = 60
|
||||
|
@ -98,6 +98,13 @@ class AdminAPI(object):
|
||||
), body
|
||||
)
|
||||
|
||||
def get_device(self, device_id):
|
||||
return self._get(
|
||||
'{url}/devices/{device_id}'.format(
|
||||
url=self.url, device_id=device_id
|
||||
)
|
||||
)
|
||||
|
||||
def _get_node_list(self, limit, marker):
|
||||
return self._get(
|
||||
'{url}/devices?marker={marker}&limit={limit}'
|
||||
|
@ -22,7 +22,7 @@ class DatadogDriver(AlertDriver):
|
||||
super(DatadogDriver, self).__init__(logger, args)
|
||||
|
||||
def send_alert(self, message, device_id):
|
||||
title = 'Load balancer failure'
|
||||
title = 'Load balancer failure in {0}'.format(self.args.datadog_env)
|
||||
text = 'Load balancer failed with message {0} {1}'.format(
|
||||
message, self.args.datadog_message_tail
|
||||
)
|
||||
@ -33,7 +33,7 @@ class DatadogDriver(AlertDriver):
|
||||
self.logger.info('Datadog alert response: {0}'.format(resp))
|
||||
|
||||
def send_repair(self, message, device_id):
|
||||
title = 'Load balancer recovered'
|
||||
title = 'Load balancer recovered in {0}'.format(self.args.datadog_env)
|
||||
text = 'Load balancer recovered with message {0} {1}'.format(
|
||||
message, self.args.datadog_message_tail
|
||||
)
|
||||
|
@ -82,6 +82,10 @@ def main():
|
||||
'--datadog_tags',
|
||||
help='A space separated list of tags for Datadog alerts'
|
||||
)
|
||||
options.parser.add_argument(
|
||||
'--datadog_env', default='unknown',
|
||||
help='Server enironment'
|
||||
)
|
||||
|
||||
args = options.run()
|
||||
|
||||
|
@ -90,6 +90,9 @@ class Sched(object):
|
||||
if api.is_online():
|
||||
lb_list = api.get_ping_list()
|
||||
pings = len(lb_list)
|
||||
if pings == 0:
|
||||
self.logger.info('No LBs to ping')
|
||||
return (0, 0)
|
||||
for lb in lb_list:
|
||||
node_list.append(lb['name'])
|
||||
gearman = GearJobs(self.logger, self.args)
|
||||
@ -129,8 +132,15 @@ class Sched(object):
|
||||
return tested, repaired
|
||||
|
||||
def _send_fails(self, failed_nodes, node_list):
|
||||
api = AdminAPI(self.args.api_server, self.logger)
|
||||
for node in failed_nodes:
|
||||
data = self._get_node(node, node_list)
|
||||
# device could have been marked offline between getting the list
|
||||
# and testing, check if this is the case
|
||||
status_code, device_status = api.get_device(data['id'])
|
||||
if device_status['status'] != 'ONLINE':
|
||||
continue
|
||||
|
||||
message = (
|
||||
'Load balancer failed\n'
|
||||
'ID: {0}\n'
|
||||
|
Loading…
x
Reference in New Issue
Block a user