Add API timing debug statements to openstack driver

This can help identify performance issues.

It also adds a timer to the keyscan for all statemachine drivers.

Change-Id: I389bd425458c05fc99c7b9f4640de7796cdafc06
This commit is contained in:
James E. Blair 2023-01-19 14:56:02 -08:00 committed by Benjamin Schanzel
parent 13007b6825
commit efcb814005
No known key found for this signature in database
3 changed files with 74 additions and 37 deletions

View File

@ -32,6 +32,7 @@ from nodepool.driver import statemachine
from nodepool import exceptions from nodepool import exceptions
from nodepool import stats from nodepool import stats
from nodepool import version from nodepool import version
from nodepool.nodeutils import Timer
CACHE_TTL = 10 CACHE_TTL = 10
@ -454,8 +455,10 @@ class OpenStackAdapter(statemachine.Adapter):
yield OpenStackInstance(self.provider, server, quota) yield OpenStackInstance(self.provider, server, quota)
def getQuotaLimits(self): def getQuotaLimits(self):
with Timer(self.log, 'API call get_compute_limits'):
compute = self._client.get_compute_limits() compute = self._client.get_compute_limits()
try: try:
with Timer(self.log, 'API call get_volume_limits'):
volume = self._client.get_volume_limits() volume = self._client.get_volume_limits()
except EndpointNotFound: except EndpointNotFound:
volume = None volume = None
@ -520,6 +523,7 @@ class OpenStackAdapter(statemachine.Adapter):
metadata = {} metadata = {}
if image_format: if image_format:
metadata['disk_format'] = image_format metadata['disk_format'] = image_format
with Timer(self.log, 'API call create_image'):
image = self._client.create_image( image = self._client.create_image(
name=image_name, name=image_name,
filename=filename, filename=filename,
@ -532,6 +536,7 @@ class OpenStackAdapter(statemachine.Adapter):
def deleteImage(self, external_id): def deleteImage(self, external_id):
self.log.debug(f"Deleting image {external_id}") self.log.debug(f"Deleting image {external_id}")
with Timer(self.log, 'API call delete_image'):
return self._client.delete_image(external_id) return self._client.delete_image(external_id)
# Local implementation # Local implementation
@ -598,6 +603,7 @@ class OpenStackAdapter(statemachine.Adapter):
create_args['meta'] = instance_properties create_args['meta'] = instance_properties
try: try:
with Timer(self.log, 'API call create_server'):
return self._client.create_server(wait=False, **create_args) return self._client.create_server(wait=False, **create_args)
except openstack.exceptions.BadRequestException: except openstack.exceptions.BadRequestException:
# We've gotten a 400 error from nova - which means the request # We've gotten a 400 error from nova - which means the request
@ -616,19 +622,23 @@ class OpenStackAdapter(statemachine.Adapter):
# This method is wrapped with an LRU cache in the constructor. # This method is wrapped with an LRU cache in the constructor.
def _listAZs(self): def _listAZs(self):
with Timer(self.log, 'API call list_availability_zone_names'):
return self._client.list_availability_zone_names() return self._client.list_availability_zone_names()
# This method is wrapped with an LRU cache in the constructor. # This method is wrapped with an LRU cache in the constructor.
def _findImage(self, name): def _findImage(self, name):
with Timer(self.log, 'API call get_image'):
return self._client.get_image(name, filters={'status': 'active'}) return self._client.get_image(name, filters={'status': 'active'})
# This method is wrapped with an LRU cache in the constructor. # This method is wrapped with an LRU cache in the constructor.
def _listFlavors(self): def _listFlavors(self):
with Timer(self.log, 'API call list_flavors'):
return self._client.list_flavors(get_extra=False) return self._client.list_flavors(get_extra=False)
# This method is only used by the nodepool alien-image-list # This method is only used by the nodepool alien-image-list
# command and only works with the openstack driver. # command and only works with the openstack driver.
def _listImages(self): def _listImages(self):
with Timer(self.log, 'API call list_images'):
return self._client.list_images() return self._client.list_images()
def _getFlavors(self): def _getFlavors(self):
@ -663,6 +673,7 @@ class OpenStackAdapter(statemachine.Adapter):
# This method is wrapped with an LRU cache in the constructor. # This method is wrapped with an LRU cache in the constructor.
def _findNetwork(self, name): def _findNetwork(self, name):
with Timer(self.log, 'API call get_network'):
network = self._client.get_network(name) network = self._client.get_network(name)
if not network: if not network:
raise Exception("Unable to find network %s in provider %s" % ( raise Exception("Unable to find network %s in provider %s" % (
@ -671,6 +682,7 @@ class OpenStackAdapter(statemachine.Adapter):
@cachetools.func.ttl_cache(maxsize=1, ttl=CACHE_TTL) @cachetools.func.ttl_cache(maxsize=1, ttl=CACHE_TTL)
def _listServers(self): def _listServers(self):
with Timer(self.log, 'API call list_servers'):
return self._client.list_servers(bare=True) return self._client.list_servers(bare=True)
@cachetools.func.ttl_cache(maxsize=1, ttl=CACHE_TTL) @cachetools.func.ttl_cache(maxsize=1, ttl=CACHE_TTL)
@ -682,6 +694,7 @@ class OpenStackAdapter(statemachine.Adapter):
@cachetools.func.ttl_cache(maxsize=1, ttl=CACHE_TTL) @cachetools.func.ttl_cache(maxsize=1, ttl=CACHE_TTL)
def _listFloatingIps(self): def _listFloatingIps(self):
with Timer(self.log, 'API call list_floating_ips'):
return self._client.list_floating_ips() return self._client.list_floating_ips()
def _refreshServer(self, obj): def _refreshServer(self, obj):
@ -705,6 +718,7 @@ class OpenStackAdapter(statemachine.Adapter):
def _getServerByIdNow(self, server_id): def _getServerByIdNow(self, server_id):
# A synchronous get server by id. Only to be used in error # A synchronous get server by id. Only to be used in error
# handling where we can't wait for the list to update. # handling where we can't wait for the list to update.
with Timer(self.log, 'API call get_server_by_id'):
return self._client.get_server_by_id(server_id) return self._client.get_server_by_id(server_id)
def _refreshServerDelete(self, obj): def _refreshServerDelete(self, obj):
@ -734,34 +748,42 @@ class OpenStackAdapter(statemachine.Adapter):
return None return None
def _needsFloatingIp(self, server): def _needsFloatingIp(self, server):
with Timer(self.log, 'API call _needs_floating_ip'):
return self._client._needs_floating_ip( return self._client._needs_floating_ip(
server=server, nat_destination=None) server=server, nat_destination=None)
def _createFloatingIp(self, server): def _createFloatingIp(self, server):
with Timer(self.log, 'API call create_floating_ip'):
return self._client.create_floating_ip(server=server, wait=True) return self._client.create_floating_ip(server=server, wait=True)
def _attachIpToServer(self, server, fip): def _attachIpToServer(self, server, fip):
# skip_attach is ignored for nova, which is the only time we # skip_attach is ignored for nova, which is the only time we
# should actually call this method. # should actually call this method.
with Timer(self.log, 'API call _attach_ip_to_server'):
return self._client._attach_ip_to_server( return self._client._attach_ip_to_server(
server=server, floating_ip=fip, server=server, floating_ip=fip,
skip_attach=True) skip_attach=True)
def _hasFloatingIps(self): def _hasFloatingIps(self):
# Not a network call
return self._client._has_floating_ips() return self._client._has_floating_ips()
def _getFloatingIps(self, server): def _getFloatingIps(self, server):
fips = openstack.cloud.meta.find_nova_interfaces( fips = openstack.cloud.meta.find_nova_interfaces(
server['addresses'], ext_tag='floating') server['addresses'], ext_tag='floating')
fips = [self._client.get_floating_ip( ret = []
id=None, filters={'floating_ip_address': fip['addr']}) for fip in fips:
for fip in fips] with Timer(self.log, 'API call get_floating_ip'):
return fips ret.append(self._client.get_floating_ip(
id=None, filters={'floating_ip_address': fip['addr']}))
return ret
def _deleteFloatingIp(self, fip): def _deleteFloatingIp(self, fip):
with Timer(self.log, 'API call delete_floating_ip'):
self._client.delete_floating_ip(fip['id'], retry=0) self._client.delete_floating_ip(fip['id'], retry=0)
def _deleteServer(self, external_id): def _deleteServer(self, external_id):
with Timer(self.log, 'API call delete_server'):
self._client.delete_server(external_id) self._client.delete_server(external_id)
def _getFlavorFromServer(self, server): def _getFlavorFromServer(self, server):

View File

@ -23,7 +23,7 @@ from concurrent.futures.thread import ThreadPoolExecutor
from nodepool.driver import Driver, NodeRequestHandler, Provider from nodepool.driver import Driver, NodeRequestHandler, Provider
from nodepool.driver.utils import QuotaInformation, QuotaSupport from nodepool.driver.utils import QuotaInformation, QuotaSupport
from nodepool.nodeutils import nodescan from nodepool.nodeutils import nodescan, Timer
from nodepool.logconfig import get_annotated_logger from nodepool.logconfig import get_annotated_logger
from nodepool import stats from nodepool import stats
from nodepool import exceptions from nodepool import exceptions
@ -35,7 +35,7 @@ import cachetools
def keyscan(host_key_checking, node_id, interface_ip, def keyscan(host_key_checking, node_id, interface_ip,
connection_type, connection_port, connection_type, connection_port,
timeout): timeout, log):
"""A standalone function for scanning keys to pass to a thread/process """A standalone function for scanning keys to pass to a thread/process
pool executor pool executor
""" """
@ -49,6 +49,7 @@ def keyscan(host_key_checking, node_id, interface_ip,
gather_hostkeys = True gather_hostkeys = True
else: else:
gather_hostkeys = False gather_hostkeys = False
with Timer(log, 'Keyscan'):
keys = nodescan(interface_ip, port=connection_port, keys = nodescan(interface_ip, port=connection_port,
timeout=timeout, gather_hostkeys=gather_hostkeys) timeout=timeout, gather_hostkeys=gather_hostkeys)
except Exception: except Exception:
@ -220,7 +221,8 @@ class StateMachineNodeLauncher(stats.StatsReporter):
label.host_key_checking, label.host_key_checking,
node.id, node.interface_ip, node.id, node.interface_ip,
node.connection_type, node.connection_port, node.connection_type, node.connection_port,
self.manager.provider.boot_timeout) self.manager.provider.boot_timeout,
self.log)
self.keyscan_future = future self.keyscan_future = future
except kze.SessionExpiredError: except kze.SessionExpiredError:
# Our node lock is gone, leaving the node state as BUILDING. # Our node lock is gone, leaving the node state as BUILDING.

View File

@ -168,3 +168,16 @@ class Attributes(object):
def __init__(self, **kw): def __init__(self, **kw):
setattr(self, '__dict__', kw) setattr(self, '__dict__', kw)
class Timer:
def __init__(self, log, msg):
self.log = log
self.msg = msg
def __enter__(self):
self.start = time.perf_counter()
def __exit__(self, type, value, traceback):
delta = time.perf_counter() - self.start
self.log.debug(f'{self.msg} in {delta}')