From 3ff555d1c8511119e51c2c06e6be3c49fdea89f7 Mon Sep 17 00:00:00 2001 From: James Parker Date: Wed, 29 Nov 2023 11:16:58 -0500 Subject: [PATCH] Update nodes yaml to use cpu architecture Migrate cpu relevent configuration parameters from tempest.conf to nova_yamls file. Before cpu information about the hosts such as the cpu topology and dedicated/shared set was pulled from tempest.conf. This change moves the information to the nodes yaml approach [1] for accessing host specific service information. The format of the information can be seen below: compute-0.redhat.local: services: libvirt: container_name: nova_virtqemud start_command: 'systemctl start tripleo_nova_virtqemud' stop_command: 'systemctl stop tripleo_nova_virtqemud' nova-compute: container_name: nova_compute config_path: '/var/lib/config-data/puppet-generated/nova_libvirt/etc/nova/nova.conf' start_command: 'systemctl start tripleo_nova_compute' stop_command: 'systemctl stop tripleo_nova_compute' cpu_shared_set: 0,1 cpu_dedicated_set: 4,5,6,7 numa: node-0: cpus: "0-3" node-1: cpus: "4-7" [1] https://opendev.org/openstack/whitebox-tempest-plugin/commit/3fe1d72fa6d9d287a19c26d5080b43fd3f203a0a Change-Id: I1f22131dc04a2d7a5f010da2dfa3f4e9524656a2 --- .zuul.yaml | 2 + playbooks/templates/compute_nodes.yaml.j2 | 13 -- .../whitebox-devstack-ceph-multinode.yaml.j2 | 22 +++ .../whitebox-devstack-multinode.yaml.j2 | 34 ++++ playbooks/whitebox/pre.yaml | 30 ++-- .../api/compute/test_cpu_pinning.py | 148 +++++++++++++----- whitebox_tempest_plugin/utils.py | 9 ++ 7 files changed, 187 insertions(+), 71 deletions(-) delete mode 100644 playbooks/templates/compute_nodes.yaml.j2 create mode 100644 playbooks/templates/whitebox-devstack-ceph-multinode.yaml.j2 create mode 100644 playbooks/templates/whitebox-devstack-multinode.yaml.j2 diff --git a/.zuul.yaml b/.zuul.yaml index 29a11ba1..b7a3a3aa 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -54,6 +54,7 @@ # addition to test vTPM hosts need swtpm as well extra_packages: ovmf,swtpm-tools tempest_exclude_regex: ^whitebox_tempest_plugin\.api\.compute\.test_hugepages + compute_node_template_name: whitebox-devstack-multinode.yaml.j2 devstack_localrc: MAX_COMPUTE_NODES: 2 NOVA_SERVICE_REPORT_INTERVAL: 10 @@ -150,6 +151,7 @@ vars: tox_envlist: all tempest_concurrency: 1 + compute_node_template_name: whitebox-devstack-ceph-multinode.yaml.j2 devstack_plugins: whitebox-tempest-plugin: https://opendev.org/openstack/whitebox-tempest-plugin.git tempest_test_regex: '^whitebox_tempest_plugin.api.compute.test_rbd_direct_download' diff --git a/playbooks/templates/compute_nodes.yaml.j2 b/playbooks/templates/compute_nodes.yaml.j2 deleted file mode 100644 index addc33f7..00000000 --- a/playbooks/templates/compute_nodes.yaml.j2 +++ /dev/null @@ -1,13 +0,0 @@ -{% for compute in computes -%} -{{ compute }}: - services: - libvirt: - start_command: 'systemctl start libvirtd' - stop_command: 'systemctl stop libvirtd' - mask_command: 'systemctl mask libvirtd' - unmask_command: 'systemctl unmask libvirtd' - nova-compute: - config_path: '/etc/nova/nova-cpu.conf' - start_command: 'systemctl start devstack@n-cpu' - stop_command: 'systemctl stop devstack@n-cpu' -{% endfor %} \ No newline at end of file diff --git a/playbooks/templates/whitebox-devstack-ceph-multinode.yaml.j2 b/playbooks/templates/whitebox-devstack-ceph-multinode.yaml.j2 new file mode 100644 index 00000000..5b7428a5 --- /dev/null +++ b/playbooks/templates/whitebox-devstack-ceph-multinode.yaml.j2 @@ -0,0 +1,22 @@ +{{ hostvars['controller']['ansible_fqdn'] }}: + services: + libvirt: + start_command: 'systemctl start libvirtd' + stop_command: 'systemctl stop libvirtd' + mask_command: 'systemctl mask libvirtd' + unmask_command: 'systemctl unmask libvirtd' + nova-compute: + config_path: '/etc/nova/nova-cpu.conf' + start_command: 'systemctl start devstack@n-cpu' + stop_command: 'systemctl stop devstack@n-cpu' +{{ hostvars['compute1']['ansible_fqdn'] }}: + services: + libvirt: + start_command: 'systemctl start libvirtd' + stop_command: 'systemctl stop libvirtd' + mask_command: 'systemctl mask libvirtd' + unmask_command: 'systemctl unmask libvirtd' + nova-compute: + config_path: '/etc/nova/nova-cpu.conf' + start_command: 'systemctl start devstack@n-cpu' + stop_command: 'systemctl stop devstack@n-cpu' diff --git a/playbooks/templates/whitebox-devstack-multinode.yaml.j2 b/playbooks/templates/whitebox-devstack-multinode.yaml.j2 new file mode 100644 index 00000000..204ec631 --- /dev/null +++ b/playbooks/templates/whitebox-devstack-multinode.yaml.j2 @@ -0,0 +1,34 @@ +{{ hostvars['controller']['ansible_fqdn'] }}: + services: + libvirt: + start_command: 'systemctl start libvirtd' + stop_command: 'systemctl stop libvirtd' + mask_command: 'systemctl mask libvirtd' + unmask_command: 'systemctl unmask libvirtd' + nova-compute: + config_path: '/etc/nova/nova-cpu.conf' + start_command: 'systemctl start devstack@n-cpu' + stop_command: 'systemctl stop devstack@n-cpu' + cpu_shared_set: {{ hostvars['controller']['devstack_local_conf']['post-config']['$NOVA_CONF']['compute']['cpu_shared_set'] }} + cpu_dedicated_set: {{ hostvars['controller']['devstack_local_conf']['post-config']['$NOVA_CONF']['compute']['cpu_dedicated_set'] }} + topology: + socket-0: + numa-0: + cpus: 0-7 +{{ hostvars['compute-host']['ansible_fqdn'] }}: + services: + libvirt: + start_command: 'systemctl start libvirtd' + stop_command: 'systemctl stop libvirtd' + mask_command: 'systemctl mask libvirtd' + unmask_command: 'systemctl unmask libvirtd' + nova-compute: + config_path: '/etc/nova/nova-cpu.conf' + start_command: 'systemctl start devstack@n-cpu' + stop_command: 'systemctl stop devstack@n-cpu' + cpu_shared_set: {{ hostvars['compute-host']['devstack_local_conf']['post-config']['$NOVA_CONF']['compute']['cpu_shared_set'] }} + cpu_dedicated_set: {{ hostvars['compute-host']['devstack_local_conf']['post-config']['$NOVA_CONF']['compute']['cpu_dedicated_set'] }} + topology: + socket-0: + numa-0: + cpus: 0-7 diff --git a/playbooks/whitebox/pre.yaml b/playbooks/whitebox/pre.yaml index 42e2e819..a83fce48 100644 --- a/playbooks/whitebox/pre.yaml +++ b/playbooks/whitebox/pre.yaml @@ -28,23 +28,21 @@ ansible_become: yes copy_sshkey_target_user: 'tempest' - - name: Collect compute hostnames - set_fact: - computes: "{{ ansible_play_hosts_all|map('extract', hostvars, 'ansible_fqdn')|list }}" - run_once: true + - name: Create compute nodes file + block: + - name: Render compute_nodes.yaml template + template: + src: "../templates/{{compute_node_template_name}}" + dest: /home/zuul/compute_nodes.yaml + run_once: true + delegate_to: controller - - name: Render compute_nodes.yaml template - template: - src: ../templates/compute_nodes.yaml.j2 - dest: /home/zuul/compute_nodes.yaml - run_once: true - delegate_to: controller - - - name: Output the rendered file at /home/zuul/compute_nodes.yaml - shell: | - cat /home/zuul/compute_nodes.yaml - run_once: true - delegate_to: controller + - name: Output the rendered file at /home/zuul/compute_nodes.yaml + shell: | + cat /home/zuul/compute_nodes.yaml + run_once: true + delegate_to: controller + when: compute_node_template_name is defined - hosts: compute tasks: diff --git a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py index d59bf30e..890ffaa5 100644 --- a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py +++ b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py @@ -121,22 +121,76 @@ class BasePinningTest(base.BaseWhiteboxComputeTest, numa_topology = whitebox_utils.normalize_json(numa_topology) return numa_topology + def _get_host_cpu_dedicated_set(self, host): + """Return cpu dedicated or shared set configured for the provided host. + """ + cpu_set = \ + whitebox_utils.get_host_details(host).get('cpu_dedicated_set', []) + return hardware.parse_cpu_spec(cpu_set) + + def _get_host_cpu_shared_set(self, host): + """Return cpu dedicated or shared set configured for the provided host. + """ + cpu_set = \ + whitebox_utils.get_host_details(host).get('cpu_shared_set', []) + return hardware.parse_cpu_spec(cpu_set) + + def _get_shared_set_size(self): + gathered_lists = [self._get_host_cpu_shared_set(host) + for host in self.hosts_details.keys()] + return gathered_lists + + def _get_dedicated_set_size(self): + gathered_lists = [self._get_host_cpu_dedicated_set(host) + for host in self.hosts_details.keys()] + return gathered_lists + class CPUPolicyTest(BasePinningTest): """Validate CPU policy support.""" + minimum_shared_cpus = 2 + minimum_dedicated_cpus = 2 + def setUp(self): super().setUp() - self.dedicated_vcpus = ( - CONF.whitebox_hardware.dedicated_cpus_per_numa * - len(CONF.whitebox_hardware.cpu_topology)) // 2 - self.shared_vcpus = ( - CONF.whitebox_hardware.shared_cpus_per_numa * - len(CONF.whitebox_hardware.cpu_topology)) // 2 + self.hosts_details = whitebox_utils.get_all_hosts_details() + + # Get the configured shared CPUs of each compute host and confirm + # that every host has the minimum number of shared CPUs necessary + # to preform test + shared_cpus_per_host = self._get_shared_set_size() + if any(len(cpus) < self.minimum_shared_cpus for cpus in + shared_cpus_per_host): + raise self.skipException( + 'A Host in the deployment does not have the minimum required ' + '%s shared cpus necessary to execute the tests' % + (self.minimum_shared_cpus)) + available_shared_vcpus = \ + min(shared_cpus_per_host, key=lambda x: len(x)) + + # Get the configured dedicated CPUs of each compute host and confirm + # that every host has the minimum number of shared CPUs necessary + # to preform test + dedicated_cpus_per_host = self._get_dedicated_set_size() + if any(len(cpus) < self.minimum_dedicated_cpus for cpus in + dedicated_cpus_per_host): + raise self.skipException( + 'A Host in the deployment does not have the minimum required ' + '%s dedicated cpus necessary to execute the tests' % + (self.minimum_dedicated_cpus)) + available_dedicated_vcpus = \ + min(dedicated_cpus_per_host, key=lambda x: len(x)) + + # Calculate the number of cpus to use in the flavors such the total + # size allows for two guests are capable to be scheduled to the same + # host + self.dedicated_cpus_per_guest = len(available_dedicated_vcpus) // 2 + self.shared_vcpus_per_guest = len(available_shared_vcpus) // 2 def test_cpu_shared(self): """Ensure an instance with an explicit 'shared' policy work.""" - flavor = self.create_flavor(vcpus=self.shared_vcpus, + flavor = self.create_flavor(vcpus=self.shared_vcpus_per_guest, extra_specs=self.shared_cpu_policy) self.create_test_server(flavor=flavor['id'], wait_until='ACTIVE') @@ -147,7 +201,7 @@ class CPUPolicyTest(BasePinningTest): default. However, we check specifics of that later and only assert that things aren't overlapping here. """ - flavor = self.create_flavor(vcpus=self.dedicated_vcpus, + flavor = self.create_flavor(vcpus=self.dedicated_cpus_per_guest, extra_specs=self.dedicated_cpu_policy) server_a = self.create_test_server(flavor=flavor['id'], wait_until='ACTIVE') @@ -156,13 +210,16 @@ class CPUPolicyTest(BasePinningTest): wait_until='ACTIVE') cpu_pinnings_a = self.get_server_cpu_pinning(server_a['id']) cpu_pinnings_b = self.get_server_cpu_pinning(server_b['id']) - - self.assertEqual( - len(cpu_pinnings_a), self.dedicated_vcpus, - "Instance should be pinned but it is unpinned") - self.assertEqual( - len(cpu_pinnings_b), self.dedicated_vcpus, - "Instance should be pinned but it is unpinned") + host = self.get_host_for_server(server_a['id']) + dedicated_vcpus = self._get_host_cpu_dedicated_set(host) + self.assertTrue( + set(cpu_pinnings_a.values()).issubset(dedicated_vcpus), + "Instance A's pinning %s should be a subset of pinning range %s" + % (cpu_pinnings_a, dedicated_vcpus)) + self.assertTrue( + set(cpu_pinnings_b.values()).issubset(dedicated_vcpus), + "Instance B's pinning %s should be a subset of pinning range %s" + % (cpu_pinnings_b, dedicated_vcpus)) self.assertTrue( set(cpu_pinnings_a.values()).isdisjoint( @@ -175,17 +232,20 @@ class CPUPolicyTest(BasePinningTest): 'Resize not available.') def test_resize_pinned_server_to_unpinned(self): """Ensure resizing an instance to unpinned actually drops pinning.""" - flavor_a = self.create_flavor(vcpus=self.dedicated_vcpus, + flavor_a = self.create_flavor(vcpus=self.dedicated_cpus_per_guest, extra_specs=self.dedicated_cpu_policy) server = self.create_test_server(flavor=flavor_a['id'], wait_until='ACTIVE') + cpu_pinnings = self.get_server_cpu_pinning(server['id']) + host = self.get_host_for_server(server['id']) + dedicated_vcpus = self._get_host_cpu_dedicated_set(host) + self.assertTrue( + set(cpu_pinnings.values()).issubset(dedicated_vcpus), + "Instance pinning %s should be a subset of pinning range %s" + % (cpu_pinnings, dedicated_vcpus)) - self.assertEqual( - len(cpu_pinnings), self.dedicated_vcpus, - "Instance should be pinned but is unpinned") - - flavor_b = self.create_flavor(vcpus=self.shared_vcpus, + flavor_b = self.create_flavor(vcpus=self.shared_vcpus_per_guest, extra_specs=self.shared_cpu_policy) self.resize_server(server['id'], flavor_b['id']) cpu_pinnings = self.get_server_cpu_pinning(server['id']) @@ -198,7 +258,7 @@ class CPUPolicyTest(BasePinningTest): 'Resize not available.') def test_resize_unpinned_server_to_pinned(self): """Ensure resizing an instance to pinned actually applies pinning.""" - flavor_a = self.create_flavor(vcpus=self.shared_vcpus, + flavor_a = self.create_flavor(vcpus=self.shared_vcpus_per_guest, extra_specs=self.shared_cpu_policy) server = self.create_test_server(flavor=flavor_a['id'], wait_until='ACTIVE') @@ -208,26 +268,32 @@ class CPUPolicyTest(BasePinningTest): len(cpu_pinnings), 0, "Instance should be unpinned but is pinned") - flavor_b = self.create_flavor(vcpus=self.dedicated_vcpus, + flavor_b = self.create_flavor(vcpus=self.dedicated_cpus_per_guest, extra_specs=self.dedicated_cpu_policy) self.resize_server(server['id'], flavor_b['id']) - cpu_pinnings = self.get_server_cpu_pinning(server['id']) - self.assertEqual( - len(cpu_pinnings), self.dedicated_vcpus, - "Resized instance should be pinned but is still unpinned") + cpu_pinnings = self.get_server_cpu_pinning(server['id']) + host = self.get_host_for_server(server['id']) + dedicated_vcpus = self._get_host_cpu_dedicated_set(host) + self.assertTrue( + set(cpu_pinnings.values()).issubset(dedicated_vcpus), + "After resize instance %s pinning %s should be a subset of " + "pinning range %s" % (server['id'], cpu_pinnings, dedicated_vcpus)) def test_reboot_pinned_server(self): """Ensure pinning information is persisted after a reboot.""" - flavor = self.create_flavor(vcpus=self.dedicated_vcpus, + flavor = self.create_flavor(vcpus=self.dedicated_cpus_per_guest, extra_specs=self.dedicated_cpu_policy) server = self.create_test_server(flavor=flavor['id'], wait_until='ACTIVE') - cpu_pinnings = self.get_server_cpu_pinning(server['id']) - self.assertEqual( - len(cpu_pinnings), self.dedicated_vcpus, - "CPU pinning was not applied to new instance.") + cpu_pinnings = self.get_server_cpu_pinning(server['id']) + host = self.get_host_for_server(server['id']) + dedicated_vcpus = self._get_host_cpu_dedicated_set(host) + self.assertTrue( + set(cpu_pinnings.values()).issubset(dedicated_vcpus), + "After resize instance %s pinning %s should be a subset of " + "pinning range %s" % (server['id'], cpu_pinnings, dedicated_vcpus)) self.reboot_server(server['id'], 'HARD') cpu_pinnings = self.get_server_cpu_pinning(server['id']) @@ -235,8 +301,8 @@ class CPUPolicyTest(BasePinningTest): # we don't actually assert that the same pinning information is used # because that's not expected. We just care that _some_ pinning is in # effect - self.assertEqual( - len(cpu_pinnings), self.dedicated_vcpus, + self.assertTrue( + set(cpu_pinnings.values()).issubset(dedicated_vcpus), "Rebooted instance has lost its pinning information") @@ -428,9 +494,7 @@ class EmulatorThreadTest(BasePinningTest, numa_helper.NUMAHelperMixin): # Determine the compute host the guest was scheduled to and gather # the cpu shared set from the host host = self.get_host_for_server(server['id']) - host_sm = clients.NovaServiceManager(host, 'nova-compute', - self.os_admin.services_client) - cpu_shared_set = host_sm.get_cpu_shared_set() + cpu_shared_set = self._get_host_cpu_shared_set(host) # Gather the emulator threads from the server emulator_threads = \ @@ -506,8 +570,10 @@ class EmulatorThreadTest(BasePinningTest, numa_helper.NUMAHelperMixin): # Create a flavor using the isolate threads_policy and then launch # an instance with the flavor - flavor = self.create_flavor(threads_policy='isolate', - vcpus=(self.dedicated_cpus_per_numa - 1)) + flavor = self.create_flavor( + threads_policy='isolate', + vcpus=(self.dedicated_cpus_per_numa - 1) + ) server = self.create_test_server(flavor=flavor['id'], wait_until='ACTIVE') @@ -520,9 +586,7 @@ class EmulatorThreadTest(BasePinningTest, numa_helper.NUMAHelperMixin): # Determine the compute host the guest was scheduled to and gather # the cpu dedicated set from the host host = self.get_host_for_server(server['id']) - host_sm = clients.NovaServiceManager(host, 'nova-compute', - self.os_admin.services_client) - cpu_dedicated_set = host_sm.get_cpu_dedicated_set() + cpu_dedicated_set = self._get_host_cpu_dedicated_set(host) # Confirm the pinned cpus from the guest are part of the dedicated # range of the compute host it is scheduled to diff --git a/whitebox_tempest_plugin/utils.py b/whitebox_tempest_plugin/utils.py index 1e94afe4..e8d34b24 100644 --- a/whitebox_tempest_plugin/utils.py +++ b/whitebox_tempest_plugin/utils.py @@ -81,3 +81,12 @@ def get_host_details(host): with open(nodes_location, "r") as f: _nodes = yaml.safe_load(f) return _nodes.get(host) + + +def get_all_hosts_details(): + global _nodes + if _nodes is None: + nodes_location = CONF.whitebox.nodes_yaml + with open(nodes_location, "r") as f: + _nodes = yaml.safe_load(f) + return _nodes