diff --git a/libra/statsd/admin_api.py b/libra/statsd/admin_api.py index b57311d9..30fda4d5 100644 --- a/libra/statsd/admin_api.py +++ b/libra/statsd/admin_api.py @@ -18,6 +18,10 @@ import sys import json +class APIError(Exception): + pass + + class AdminAPI(object): def __init__(self, addresses, logger): self.logger = logger @@ -42,7 +46,9 @@ class AdminAPI(object): limit = 20 lb_list = [] while True: - nodes = self._get_node_list(limit, marker) + success, nodes = self._get_node_list(limit, marker) + if not success: + raise APIError # if we hit an empty device list we have hit end of list if not len(nodes['devices']): break diff --git a/libra/statsd/gearman.py b/libra/statsd/gearman.py index ed8715b5..f8209cd5 100644 --- a/libra/statsd/gearman.py +++ b/libra/statsd/gearman.py @@ -12,8 +12,6 @@ # License for the specific language governing permissions and limitations # under the License. -import gearman - from libra.common.json_gearman import JSONGearmanClient @@ -27,23 +25,23 @@ class GearJobs(object): failed_list = [] job_data = {"hpcs_action": "STATS"} for node in node_list: - list_of_jobs.append(dict(task=node, data=job_data)) + list_of_jobs.append(dict(task=str(node), data=job_data)) submitted_pings = self.gm_client.submit_multiple_jobs( list_of_jobs, background=False, wait_until_complete=True, poll_timeout=5.0 ) for ping in submitted_pings: - if ping.state == gearman.JOB_UNKNOWN: + if ping.state == 'UNKNOWN': # TODO: Gearman server failed, ignoring for now self.logger.error('Gearman Job server fail') continue if ping.timed_out: # Ping timeout - failed_list.append(ping['task']) + failed_list.append(ping.job.task) continue if ping.result['hpcs_response'] == 'FAIL': # Error returned by Gearman - failed_list.append(ping['task']) + failed_list.append(ping.job.task) continue return failed_list diff --git a/libra/statsd/scheduler.py b/libra/statsd/scheduler.py index 0c1f5287..34ed598b 100644 --- a/libra/statsd/scheduler.py +++ b/libra/statsd/scheduler.py @@ -82,7 +82,7 @@ class Sched(object): failed_nodes = gearman.send_pings(node_list) failed = len(failed_nodes) if failed > 0: - self._send_fails(lb_list, failed_nodes, lb_list) + self._send_fails(failed_nodes, lb_list) else: self.logger.error('No working API server found') return (0, 0) @@ -95,14 +95,15 @@ class Sched(object): message = ( 'Load balancer failed\n' 'ID: {0}\n' - 'IP: (1}\n' + 'IP: {1}\n' 'tenant: {2}\n'.format( - data['id'], data['floatingIPAddr'], + data['id'], data['floatingIpAddr'], data['loadBalancers'][0]['hpcs_tenantid'] ) ) for driver in self.drivers: - driver.send_alert(message, data['id']) + instance = driver(self.logger) + instance.send_alert(message, data['id']) def _get_node(self, node, node_list): for n in node_list: