diff --git a/bindep.txt b/bindep.txt new file mode 100644 index 0000000..64b038b --- /dev/null +++ b/bindep.txt @@ -0,0 +1,2 @@ +gcc [compile] +libc-dev [compile] diff --git a/openstack_tools/cmd/prometheus/__init__.py b/openstack_tools/cmd/prometheus/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/openstack_tools/cmd/prometheus/routers_l3_ha.py b/openstack_tools/cmd/prometheus/routers_l3_ha.py new file mode 100644 index 0000000..f46fccf --- /dev/null +++ b/openstack_tools/cmd/prometheus/routers_l3_ha.py @@ -0,0 +1,80 @@ +# Copyright (c) 2020 CLOUD&HEAT GmbH https://www.cloudandheat.com +# Copyright 2020 VEXXHOST, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Generate data for all routers and their L3 HA states. + +There are scenarios where an L3 HA router can end up being active in many +different L3 agents. This can be tricky to find and cause chaos in the system, +while effort should be done in finding the root cause of this, this will help +alert and catch any occurances of it. +""" + +import argparse +import glob +import time +import os + +import psutil + + +def main(): + """Entry-point for script.""" + + parser = argparse.ArgumentParser() + parser.add_argument("--metric", default="node_openstack_l3_router_master", + help="Name of metric") + parser.add_argument("--state", default="/var/lib/neutron", + help="Neutron state path") + parser.add_argument("--loop", type=int, help="Loop every N seconds") + parser.add_argument("--output", help="Output file (default to STDOUT)") + args = parser.parse_args() + + while True: + ha_conf_dir = os.path.join(args.state, 'ha_confs') + pid_glob = os.path.join(ha_conf_dir, '*.pid.keepalived-vrrp') + pid_files = glob.glob(pid_glob) + + output = "" + for pid_file in pid_files: + with open(pid_file) as pid_fd: + pid = int(pid_fd.read()) + + # Check if the process is _actually_ running + if psutil.pid_exists(pid) is False: + continue + + state_path = pid_file.replace('.pid.keepalived-vrrp', '') + state_file = os.path.join(state_path, 'state') + + router_id = os.path.basename(state_path) + with open(state_file) as state_fd: + master = 1 if 'master' in state_fd.read() else 0 + + output += '%s{router_id="%s"} %d\n' % ( + args.metric, + router_id, + master + ) + + if args.output: + with open(args.output, 'w') as output_fd: + output_fd.write(output) + + print(output) + + if args.loop: + time.sleep(args.loop) + else: + break diff --git a/requirements.txt b/requirements.txt index cfe5f30..7a760b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ ovs +psutil \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 128249f..c8e746f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,4 +25,5 @@ packages = [entry_points] console_scripts = + openstack-prometheus-routers-l3-ha = openstack_tools.cmd.prometheus.routers_l3_ha:main openstack-cleanup-openvswitch = openstack_tools.cmd.cleanup.openvswitch:main