Merge "add backround ping during disruptive actions"
This commit is contained in:
commit
b565e89f22
@ -62,3 +62,5 @@ RECEIVED = _ping.RECEIVED
|
|||||||
UNRECEIVED = _ping.UNRECEIVED
|
UNRECEIVED = _ping.UNRECEIVED
|
||||||
|
|
||||||
PingStatistics = _statistics.PingStatistics
|
PingStatistics = _statistics.PingStatistics
|
||||||
|
write_ping_to_file = _ping.write_ping_to_file
|
||||||
|
check_ping_statistics = _ping.check_ping_statistics
|
||||||
|
@ -15,9 +15,14 @@
|
|||||||
# under the License.
|
# under the License.
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import glob
|
||||||
|
import json
|
||||||
|
import io
|
||||||
|
import os
|
||||||
import time
|
import time
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
|
|
||||||
import netaddr
|
import netaddr
|
||||||
from oslo_log import log
|
from oslo_log import log
|
||||||
|
|
||||||
@ -415,3 +420,77 @@ def handle_ping_unknow_host_error(text):
|
|||||||
if text.endswith(suffix):
|
if text.endswith(suffix):
|
||||||
details = text[:-len(suffix)].strip().split()[-1]
|
details = text[:-len(suffix)].strip().split()[-1]
|
||||||
raise _exception.UnknowHostError(details=details)
|
raise _exception.UnknowHostError(details=details)
|
||||||
|
|
||||||
|
|
||||||
|
def ping_to_json(ping_result: _statistics.PingStatistics) -> str:
|
||||||
|
'''Transform an iter_statistics.statistics object
|
||||||
|
into a json string with ping ip and result'''
|
||||||
|
destination = str(ping_result.destination)
|
||||||
|
transmitted = ping_result.transmitted
|
||||||
|
received = ping_result.received
|
||||||
|
timestamp = time.ctime(ping_result.begin_interval)
|
||||||
|
ping_result_line_dict = {"destination": destination,
|
||||||
|
"transmitted": transmitted,
|
||||||
|
"received": received,
|
||||||
|
"timestamp": timestamp}
|
||||||
|
return json.dumps(ping_result_line_dict)
|
||||||
|
|
||||||
|
|
||||||
|
def write_ping_to_file(ping_ip=None, output_dir='tobiko_ping_results'):
|
||||||
|
'''use iter_statistics to ping a host and record statistics
|
||||||
|
put results in output_dir filenames correlate with vm fip'''
|
||||||
|
output_dir_path = f'{sh.get_user_home_dir()}/{output_dir}'
|
||||||
|
if not os.path.exists(output_dir_path):
|
||||||
|
os.makedirs(output_dir_path)
|
||||||
|
output_filename = f'ping_{ping_ip}.log'
|
||||||
|
output_path = os.path.join(output_dir_path, output_filename)
|
||||||
|
LOG.info(f'starting ping process to > {ping_ip} , '
|
||||||
|
f'output file is : {output_path}')
|
||||||
|
ping_result_statistics = iter_statistics(parameters=None,
|
||||||
|
host=ping_ip, until=None,
|
||||||
|
timeout=99999,
|
||||||
|
check=True)
|
||||||
|
for ping_result in ping_result_statistics:
|
||||||
|
with open(output_path, "at") as ping_result_file:
|
||||||
|
ping_result_file.write(ping_to_json(ping_result) + "\n")
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
|
||||||
|
def get_vm_ping_log_files(glob_ping_log_pattern='tobiko_ping_results/ping_'
|
||||||
|
'*.log'):
|
||||||
|
"""return a list of files mathcing : the pattern"""
|
||||||
|
glob_path = f'{sh.get_user_home_dir()}/{glob_ping_log_pattern}'
|
||||||
|
for filename in glob.glob(glob_path):
|
||||||
|
LOG.info(f'found following ping_vm_log files {filename}')
|
||||||
|
vm_ping_log_filename = filename
|
||||||
|
yield vm_ping_log_filename
|
||||||
|
|
||||||
|
|
||||||
|
def rename_ping_staistics_file_to_checked(filepath):
|
||||||
|
"""append _checked to a ping statistics file once finished it's check"""
|
||||||
|
os.rename(filepath, f'{filepath}_checked')
|
||||||
|
|
||||||
|
|
||||||
|
def check_ping_statistics(failure_limit=10):
|
||||||
|
"""Gets a list of ping_vm_log files and
|
||||||
|
iterates their lines, checks if max ping
|
||||||
|
failures have been reached per fip=file"""
|
||||||
|
# iterate over ping_vm_log files:
|
||||||
|
for filename in list(get_vm_ping_log_files()):
|
||||||
|
with io.open(filename, 'rt') as fd:
|
||||||
|
LOG.info(f'checking ping log file: {filename}, '
|
||||||
|
f'failure_limit is :{failure_limit}')
|
||||||
|
failure_counter = 0
|
||||||
|
for ping_line in fd.readlines():
|
||||||
|
ping_line = json.loads(ping_line.rstrip())
|
||||||
|
if ping_line['transmitted'] != ping_line['received']:
|
||||||
|
failure_counter += 1
|
||||||
|
LOG.debug(f'found ping failure to :'
|
||||||
|
f' {ping_line["destination"]}')
|
||||||
|
if failure_counter >= failure_limit:
|
||||||
|
rename_ping_staistics_file_to_checked(filename)
|
||||||
|
tobiko.fail(f'{failure_counter} pings failure found '
|
||||||
|
f'to vm fip destination: '
|
||||||
|
f'{ping_line["destination"]}')
|
||||||
|
LOG.info(f'no failures in ping log file: {filename}')
|
||||||
|
rename_ping_staistics_file_to_checked(filename)
|
||||||
|
@ -24,6 +24,7 @@ from tobiko.shell.sh import _io
|
|||||||
from tobiko.shell.sh import _local
|
from tobiko.shell.sh import _local
|
||||||
from tobiko.shell.sh import _mktemp
|
from tobiko.shell.sh import _mktemp
|
||||||
from tobiko.shell.sh import _nameservers
|
from tobiko.shell.sh import _nameservers
|
||||||
|
from tobiko.shell.sh import _path
|
||||||
from tobiko.shell.sh import _process
|
from tobiko.shell.sh import _process
|
||||||
from tobiko.shell.sh import _ps
|
from tobiko.shell.sh import _ps
|
||||||
from tobiko.shell.sh import _reboot
|
from tobiko.shell.sh import _reboot
|
||||||
@ -71,6 +72,10 @@ ListNameserversFixture = _nameservers.ListNameserversFixture
|
|||||||
list_nameservers = _nameservers.list_nameservers
|
list_nameservers = _nameservers.list_nameservers
|
||||||
|
|
||||||
process = _process.process
|
process = _process.process
|
||||||
|
start_background_process = _process.start_background_process
|
||||||
|
check_or_start_background_process =\
|
||||||
|
_process.check_or_start_background_process
|
||||||
|
get_user_home_dir = _path.get_user_home_dir
|
||||||
str_from_stream = _process.str_from_stream
|
str_from_stream = _process.str_from_stream
|
||||||
ShellProcessFixture = _process.ShellProcessFixture
|
ShellProcessFixture = _process.ShellProcessFixture
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
import typing # noqa
|
import typing # noqa
|
||||||
|
from os.path import expanduser
|
||||||
|
|
||||||
from oslo_log import log
|
from oslo_log import log
|
||||||
|
|
||||||
@ -25,6 +26,11 @@ import tobiko
|
|||||||
LOG = log.getLogger(__name__)
|
LOG = log.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def get_user_home_dir():
|
||||||
|
"""return a str path of the current user's home"""
|
||||||
|
return expanduser("~")
|
||||||
|
|
||||||
|
|
||||||
class ExecutePathFixture(tobiko.SharedFixture):
|
class ExecutePathFixture(tobiko.SharedFixture):
|
||||||
|
|
||||||
def __init__(self, executable_dirs=None, environ=None):
|
def __init__(self, executable_dirs=None, environ=None):
|
||||||
|
@ -16,11 +16,17 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
import io
|
import io
|
||||||
|
import os
|
||||||
|
import time
|
||||||
import typing # noqa
|
import typing # noqa
|
||||||
|
from multiprocessing import Process as MultiProcess
|
||||||
|
|
||||||
|
import psutil
|
||||||
from oslo_log import log
|
from oslo_log import log
|
||||||
|
|
||||||
|
|
||||||
import tobiko
|
import tobiko
|
||||||
|
from tobiko.shell import sh
|
||||||
from tobiko.shell.sh import _command
|
from tobiko.shell.sh import _command
|
||||||
from tobiko.shell.sh import _exception
|
from tobiko.shell.sh import _exception
|
||||||
from tobiko.shell.sh import _io
|
from tobiko.shell.sh import _io
|
||||||
@ -457,3 +463,104 @@ def default_sudo_command():
|
|||||||
def network_namespace_command(network_namespace, command):
|
def network_namespace_command(network_namespace, command):
|
||||||
return _command.shell_command(['/sbin/ip', 'netns', 'exec',
|
return _command.shell_command(['/sbin/ip', 'netns', 'exec',
|
||||||
network_namespace]) + command
|
network_namespace]) + command
|
||||||
|
|
||||||
|
|
||||||
|
def start_background_process(bg_function=None, bg_process_name=None, **kwargs):
|
||||||
|
"""Background process that will take a function name as parameter
|
||||||
|
and execute it in the background using a separate non attached process.
|
||||||
|
That process will continue to run even after Tobiko exists.
|
||||||
|
params:
|
||||||
|
bg_function= function name to run in background
|
||||||
|
bg_process_pid_file= file path that will contain the process pid, multiple
|
||||||
|
processes can use the same file pid are appended.
|
||||||
|
outputs: writes processes pids to a file, each in a line
|
||||||
|
returns: the process object"""
|
||||||
|
|
||||||
|
# define a parent process that would be killed and orphan the actual
|
||||||
|
# background process to run unattached in the background
|
||||||
|
# this is so the background process won't be stopped when tobiko exists
|
||||||
|
def _background_process_parent():
|
||||||
|
p = MultiProcess(target=bg_function, name=bg_process_name,
|
||||||
|
kwargs=kwargs)
|
||||||
|
p.start()
|
||||||
|
LOG.info(
|
||||||
|
f'Started background function: {bg_function.__name__} process pid '
|
||||||
|
f'is: {p.pid}, process name: {bg_process_name}, '
|
||||||
|
f'main execution process continues...')
|
||||||
|
# append bg_process pid to a file
|
||||||
|
bg_process_pids_file_name = f'{sh.get_user_home_dir()}/' \
|
||||||
|
f'{bg_process_name}_pids_file'
|
||||||
|
with open(bg_process_pids_file_name, "at") as bg_process_pid_file:
|
||||||
|
bg_process_pid_file.write(str(p.pid) + "\n")
|
||||||
|
LOG.debug(f'Writing pid: {p.pid} to pids file:'
|
||||||
|
f' {bg_process_pids_file_name}')
|
||||||
|
|
||||||
|
# start parent process, nested with a started child process
|
||||||
|
# then kill the parent
|
||||||
|
d = MultiProcess(target=_background_process_parent)
|
||||||
|
d.daemon = False
|
||||||
|
d.start()
|
||||||
|
LOG.debug(f'Background process parent started pid: {d.pid}')
|
||||||
|
time.sleep(1)
|
||||||
|
d.terminate()
|
||||||
|
LOG.debug(f'Background process orphaned, parent killed parent pid:'
|
||||||
|
f' {d.pid}')
|
||||||
|
|
||||||
|
|
||||||
|
def stop_process(pid_list):
|
||||||
|
"""Stop (kill) a process from a list"""
|
||||||
|
for pid in pid_list:
|
||||||
|
|
||||||
|
LOG.info(f'stopping process with pid: {pid}')
|
||||||
|
sh.execute(f'sudo kill -9 {pid}')
|
||||||
|
|
||||||
|
|
||||||
|
def get_bg_procs_pids(bg_process_name):
|
||||||
|
"""return a list of pids from the specified bg_process_name file"""
|
||||||
|
bg_process_pids_file_name = f'{sh.get_user_home_dir()}/' \
|
||||||
|
f'{bg_process_name}_pids_file'
|
||||||
|
bg_process_name_pid_list = []
|
||||||
|
if os.path.isfile(bg_process_pids_file_name):
|
||||||
|
LOG.info(f'found previous background process file :'
|
||||||
|
f' {bg_process_pids_file_name}, cheking it`s processes.')
|
||||||
|
# go over file's pids
|
||||||
|
with io.open(bg_process_pids_file_name, 'rt') as fd:
|
||||||
|
for line in fd.readlines():
|
||||||
|
pid = line.rstrip()
|
||||||
|
try:
|
||||||
|
proc = psutil.Process(int(pid))
|
||||||
|
# continue if pid is not a valid int or doesn't exist
|
||||||
|
except (TypeError, ValueError, psutil.NoSuchProcess):
|
||||||
|
continue
|
||||||
|
# check if process is running
|
||||||
|
if proc.status() != psutil.STATUS_ZOMBIE:
|
||||||
|
LOG.debug(f'skipping process {pid} , it\'s a zombie')
|
||||||
|
bg_process_name_pid_list.append(pid)
|
||||||
|
return bg_process_name_pid_list
|
||||||
|
|
||||||
|
|
||||||
|
def check_or_start_background_process(bg_function=None,
|
||||||
|
bg_process_name=None,
|
||||||
|
check_function=None, **kwargs):
|
||||||
|
""" Check if process exists, if so stop the process,
|
||||||
|
then execute some check logic i.e. a check function.
|
||||||
|
if the process by name isn't running,
|
||||||
|
start a separate process i.e a background function
|
||||||
|
params:
|
||||||
|
bg_process_name= process name
|
||||||
|
bg_function: function name
|
||||||
|
check_function: function name """
|
||||||
|
procs_running_list = get_bg_procs_pids(bg_process_name)
|
||||||
|
if procs_running_list:
|
||||||
|
stop_process(procs_running_list)
|
||||||
|
# execute process check i.e. go over process results file
|
||||||
|
LOG.info(f'running a check function: {check_function} '
|
||||||
|
f'on results of processes: {bg_process_name}')
|
||||||
|
check_function()
|
||||||
|
|
||||||
|
else: # if background process is not present , start one:
|
||||||
|
LOG.info(f'No previous background processes found:'
|
||||||
|
f' {bg_process_name}, starting a new background process '
|
||||||
|
f'of function: {bg_function}')
|
||||||
|
start_background_process(bg_function=bg_function,
|
||||||
|
bg_process_name=bg_process_name, **kwargs)
|
||||||
|
@ -124,6 +124,9 @@ class DisruptTripleoNodesTest(testtools.TestCase):
|
|||||||
def test_0vercloud_health_check(self):
|
def test_0vercloud_health_check(self):
|
||||||
OvercloudHealthCheck.run_before(skip_mac_table_size_test=False)
|
OvercloudHealthCheck.run_before(skip_mac_table_size_test=False)
|
||||||
|
|
||||||
|
def test_check_background_vm_ping(self):
|
||||||
|
nova.check_or_start_background_vm_ping()
|
||||||
|
|
||||||
def test_hard_reboot_controllers_recovery(self):
|
def test_hard_reboot_controllers_recovery(self):
|
||||||
OvercloudHealthCheck.run_before()
|
OvercloudHealthCheck.run_before()
|
||||||
cloud_disruptions.reset_all_controller_nodes()
|
cloud_disruptions.reset_all_controller_nodes()
|
||||||
|
@ -7,11 +7,13 @@ from oslo_log import log
|
|||||||
import pandas
|
import pandas
|
||||||
|
|
||||||
import tobiko
|
import tobiko
|
||||||
|
from tobiko import tripleo
|
||||||
from tobiko.tripleo import overcloud
|
from tobiko.tripleo import overcloud
|
||||||
from tobiko.shell import ping
|
from tobiko.shell import ping
|
||||||
from tobiko.shell import sh
|
from tobiko.shell import sh
|
||||||
from tobiko.openstack import nova
|
from tobiko.openstack import nova
|
||||||
from tobiko.openstack import topology
|
from tobiko.openstack import topology
|
||||||
|
from tobiko.openstack import stacks
|
||||||
from tobiko.tripleo import containers
|
from tobiko.tripleo import containers
|
||||||
|
|
||||||
|
|
||||||
@ -123,6 +125,7 @@ def check_ping_vm_fip(fip):
|
|||||||
def check_df_vms_ping(df):
|
def check_df_vms_ping(df):
|
||||||
"""input: dataframe with vms_ids
|
"""input: dataframe with vms_ids
|
||||||
try to ping all vms in df"""
|
try to ping all vms in df"""
|
||||||
|
|
||||||
for vm_id in df.vm_id.to_list():
|
for vm_id in df.vm_id.to_list():
|
||||||
check_ping_vm_fip(vm_floating_ip(vm_id))
|
check_ping_vm_fip(vm_floating_ip(vm_id))
|
||||||
|
|
||||||
@ -218,3 +221,26 @@ def check_computes_vms_running_via_virsh():
|
|||||||
else:
|
else:
|
||||||
LOG.info(f"{vm_id} is not in running state on "
|
LOG.info(f"{vm_id} is not in running state on "
|
||||||
f"{compute.hostname}")
|
f"{compute.hostname}")
|
||||||
|
|
||||||
|
|
||||||
|
def get_nova_server_floating_ip():
|
||||||
|
"""get an a running's vm floating_ip"""
|
||||||
|
return tobiko.setup_fixture(
|
||||||
|
stacks.CirrosServerStackFixture).floating_ip_address
|
||||||
|
|
||||||
|
|
||||||
|
# Test is inteded for D/S env
|
||||||
|
@tripleo.skip_if_missing_overcloud
|
||||||
|
def check_or_start_background_vm_ping():
|
||||||
|
"""Check if process exists, if so stop and check ping health
|
||||||
|
if not : start a new separate ping process.
|
||||||
|
Executes a Background ping to a vm floating_ip,
|
||||||
|
this test is intended to be run and picked up again
|
||||||
|
by the next tobiko run. Ping results are parsed
|
||||||
|
and a failure is raised if ping failure is above a certain amount"""
|
||||||
|
ping_vm_fip = get_nova_server_floating_ip()
|
||||||
|
sh.check_or_start_background_process(
|
||||||
|
bg_function=ping.write_ping_to_file,
|
||||||
|
bg_process_name='tobiko_background_ping',
|
||||||
|
check_function=ping.check_ping_statistics,
|
||||||
|
ping_ip=ping_vm_fip)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user