Deploy with CPU power management enabled

Until we fixed https://bugs.launchpad.net/nova/+bug/2056613 and
https://bugs.launchpad.net/nova/+bug/2056612, libvirt CPU power
management was broken with the `isolate` thread emualtor thread
policy, and with live migration. This was noticed more or less by
accident when the patch above this one triggered the right conditions
for our existing tests with incomplete coverage to break.

To prevent his from happening again, this patch turns on CPU power
management in our CI job, and modifies the test to assume that it's
turned on.

Change-Id: Ia48289ee4f909a33bfe6dac19eccff558b448da9
This commit is contained in:
Artom Lifshitz 2024-02-21 13:11:45 -05:00
parent 766ff042bd
commit 869fdccf31
4 changed files with 29 additions and 53 deletions

View File

@ -65,6 +65,7 @@
compute-feature-enabled: compute-feature-enabled:
volume_backed_live_migration: true volume_backed_live_migration: true
stable_compute_uuid_supported: true stable_compute_uuid_supported: true
cpu_power_management: true
auth: auth:
tempest_roles: creator tempest_roles: creator
post-config: post-config:
@ -79,6 +80,7 @@
cpu_mode: custom cpu_mode: custom
cpu_models: Nehalem cpu_models: Nehalem
cpu_model_extra_flags: vme,+ssse3,-mmx cpu_model_extra_flags: vme,+ssse3,-mmx
cpu_power_management: True
virt_type: kvm virt_type: kvm
rx_queue_size: 1024 rx_queue_size: 1024
swtpm_enabled: True swtpm_enabled: True
@ -103,6 +105,7 @@
cpu_mode: custom cpu_mode: custom
cpu_models: Nehalem cpu_models: Nehalem
cpu_model_extra_flags: vme,+ssse3,-mmx cpu_model_extra_flags: vme,+ssse3,-mmx
cpu_power_management: True
virt_type: kvm virt_type: kvm
rx_queue_size: 1024 rx_queue_size: 1024
swtpm_enabled: True swtpm_enabled: True

View File

@ -954,14 +954,12 @@ class NUMACPUDedicatedLiveMigrationTest(NUMALiveMigrationBase):
# Validate shared server A now has a shared cpuset that is a equal # Validate shared server A now has a shared cpuset that is a equal
# to it's new host's cpu_shared_set # to it's new host's cpu_shared_set
# FIXME(jparker) change host1_shared_set to host2_shared_set once
# Nova bug 1869804 has been addressed
shared_set_a = self._get_shared_cpuset(shared_server_a['id']) shared_set_a = self._get_shared_cpuset(shared_server_a['id'])
host_a_shared_set = host_sm_a.get_cpu_shared_set() host_b_shared_set = host_sm_b.get_cpu_shared_set()
self.assertCountEqual( self.assertCountEqual(
shared_set_a, host_a_shared_set, 'After migration of server %s, ' shared_set_a, host_b_shared_set, 'After migration of server %s, '
'shared CPU set %s is not equal to new shared set %s' % 'shared CPU set %s is not equal to new shared set %s' %
(shared_server_a['id'], shared_set_a, host_a_shared_set)) (shared_server_a['id'], shared_set_a, host_b_shared_set))
# Live migrate dedicated server A to the same host holding # Live migrate dedicated server A to the same host holding
# dedicated server B. End result should be all 4 servers are on # dedicated server B. End result should be all 4 servers are on

View File

@ -35,22 +35,29 @@ class TestCPUStateMgmt(base.BaseWhiteboxComputeTest):
vcpus=1, vcpus=1,
extra_specs={'hw:cpu_policy': 'dedicated'}) extra_specs={'hw:cpu_policy': 'dedicated'})
def _assert_cpus_initial_state(self, host, shared_cpus, dedicated_cpus, def test_cpu_state(self):
sysfsclient): host = self.list_compute_hosts()[0]
"""Assert that nova-compute disabled dedicated CPUs on startup""" sysfsclient = clients.SysFSClient(host)
# In case we didn't have a full set specified, at least make sure that sm = clients.NovaServiceManager(host, 'nova-compute',
# our shared CPUs are in the subset of online CPUs (i.e. we didn't self.os_admin.services_client)
# offline any of the shared ones). dedicated_cpus = sm.get_cpu_dedicated_set()
shared_cpus = sm.get_cpu_shared_set()
if len(dedicated_cpus) < 2:
raise self.skipException('Multiple dedicated CPUs required')
# Assert that nova-compute disabled dedicated CPUs on startup. In case
# we didn't have a full set specified, at least make sure that our
# shared CPUs are in the subset of online CPUs (i.e. we didn't offline
# any of the shared ones).
online = sysfsclient.get_sysfs_value('devices/system/cpu/online') online = sysfsclient.get_sysfs_value('devices/system/cpu/online')
self.assertTrue(shared_cpus.issubset(hardware.parse_cpu_spec(online))) self.assertTrue(shared_cpus.issubset(hardware.parse_cpu_spec(online)))
# All our dedicated CPUs should be offlined at service startup. # All our dedicated CPUs should be offlined (this assumes running
offline = sysfsclient.get_sysfs_value('devices/system/cpu/offline') # serially with no other guests using the dedicated CPUs).
self.assertEqual(dedicated_cpus, hardware.parse_cpu_spec(offline))
def _assert_cpu_onlined_guest(self, host, dedicated_cpus, sysfsclient):
offline_before = hardware.parse_cpu_spec( offline_before = hardware.parse_cpu_spec(
sysfsclient.get_sysfs_value('devices/system/cpu/offline')) sysfsclient.get_sysfs_value('devices/system/cpu/offline'))
self.assertEqual(dedicated_cpus, offline_before)
server = self.create_test_server(clients=self.os_admin, server = self.create_test_server(clients=self.os_admin,
flavor=self.flavor['id'], flavor=self.flavor['id'],
@ -67,41 +74,6 @@ class TestCPUStateMgmt(base.BaseWhiteboxComputeTest):
server['id']) server['id'])
# Once it is gone, the dedicated CPU should be offline again # Once it is gone, the dedicated CPU should be offline again
offline = hardware.parse_cpu_spec( offline_final = hardware.parse_cpu_spec(
sysfsclient.get_sysfs_value('devices/system/cpu/offline')) sysfsclient.get_sysfs_value('devices/system/cpu/offline'))
self.assertEqual(offline_before, offline) self.assertEqual(offline_before, offline_final)
def online_test_cpu(self, cpus, sysfsclient):
"""Put our test CPUs back to online status"""
for cpu in cpus:
sysfsclient.set_sysfs_value(
'devices/system/cpu/cpu%i/online' % cpu, '1')
def test_cpu_state(self):
host = self.list_compute_hosts()[0]
sysfsclient = clients.SysFSClient(host)
# Check that we don't have any offline CPUs to start with
offline = sysfsclient.get_sysfs_value('devices/system/cpu/offline')
self.assertEqual("", offline,
'System has offlined CPUs unexpectedly!')
sm = clients.NovaServiceManager(host, 'nova-compute',
self.os_admin.services_client)
dedicated_cpus = sm.get_cpu_dedicated_set()
shared_cpus = sm.get_cpu_shared_set()
opts = [('libvirt', 'cpu_power_management', 'True'),
('libvirt', 'cpu_power_management_strategy', 'cpu_state')]
if len(dedicated_cpus) < 2:
raise self.skipException('Multiple dedicated CPUs required')
# Nova will not online the CPUs it manages on shutdown, so we need
# to re-online it before we finish here to leave the system as we
# found it
self.addCleanup(self.online_test_cpu, dedicated_cpus, sysfsclient)
with sm.config_options(*tuple(opts)):
self._assert_cpus_initial_state(host, shared_cpus, dedicated_cpus,
sysfsclient)
self._assert_cpu_onlined_guest(host, dedicated_cpus, sysfsclient)

View File

@ -330,4 +330,7 @@ compute_features_group_opts = [
cfg.BoolOpt('bochs_display_support', cfg.BoolOpt('bochs_display_support',
default=False, default=False,
help="Guests support bochs display device"), help="Guests support bochs display device"),
cfg.BoolOpt('cpu_power_management',
default=False,
help="Libvirt CPU power management is turned on."),
] ]