From 3ff555d1c8511119e51c2c06e6be3c49fdea89f7 Mon Sep 17 00:00:00 2001
From: James Parker <jparker@redhat.com>
Date: Wed, 29 Nov 2023 11:16:58 -0500
Subject: [PATCH] Update nodes yaml to use cpu architecture

Migrate cpu relevent configuration parameters from tempest.conf to
nova_yamls file. Before cpu information about the hosts such as the cpu
topology and dedicated/shared set was pulled from tempest.conf. This
change moves the information to the nodes yaml approach [1] for
accessing host specific service information.  The format of the
information can be seen below:

compute-0.redhat.local:
  services:
    libvirt:
      container_name: nova_virtqemud
      start_command: 'systemctl start tripleo_nova_virtqemud'
      stop_command: 'systemctl stop tripleo_nova_virtqemud'
    nova-compute:
      container_name: nova_compute
      config_path: '/var/lib/config-data/puppet-generated/nova_libvirt/etc/nova/nova.conf'
      start_command: 'systemctl start tripleo_nova_compute'
      stop_command: 'systemctl stop tripleo_nova_compute'
  cpu_shared_set: 0,1
  cpu_dedicated_set: 4,5,6,7
  numa:
    node-0:
      cpus: "0-3"
    node-1:
      cpus: "4-7"

[1] https://opendev.org/openstack/whitebox-tempest-plugin/commit/3fe1d72fa6d9d287a19c26d5080b43fd3f203a0a

Change-Id: I1f22131dc04a2d7a5f010da2dfa3f4e9524656a2
---
 .zuul.yaml                                    |   2 +
 playbooks/templates/compute_nodes.yaml.j2     |  13 --
 .../whitebox-devstack-ceph-multinode.yaml.j2  |  22 +++
 .../whitebox-devstack-multinode.yaml.j2       |  34 ++++
 playbooks/whitebox/pre.yaml                   |  30 ++--
 .../api/compute/test_cpu_pinning.py           | 148 +++++++++++++-----
 whitebox_tempest_plugin/utils.py              |   9 ++
 7 files changed, 187 insertions(+), 71 deletions(-)
 delete mode 100644 playbooks/templates/compute_nodes.yaml.j2
 create mode 100644 playbooks/templates/whitebox-devstack-ceph-multinode.yaml.j2
 create mode 100644 playbooks/templates/whitebox-devstack-multinode.yaml.j2

diff --git a/.zuul.yaml b/.zuul.yaml
index 29a11ba1..b7a3a3aa 100644
--- a/.zuul.yaml
+++ b/.zuul.yaml
@@ -54,6 +54,7 @@
       # addition to test vTPM hosts need swtpm as well
       extra_packages: ovmf,swtpm-tools
       tempest_exclude_regex: ^whitebox_tempest_plugin\.api\.compute\.test_hugepages
+      compute_node_template_name: whitebox-devstack-multinode.yaml.j2
       devstack_localrc:
         MAX_COMPUTE_NODES: 2
         NOVA_SERVICE_REPORT_INTERVAL: 10
@@ -150,6 +151,7 @@
     vars:
       tox_envlist: all
       tempest_concurrency: 1
+      compute_node_template_name: whitebox-devstack-ceph-multinode.yaml.j2
       devstack_plugins:
         whitebox-tempest-plugin: https://opendev.org/openstack/whitebox-tempest-plugin.git
       tempest_test_regex: '^whitebox_tempest_plugin.api.compute.test_rbd_direct_download'
diff --git a/playbooks/templates/compute_nodes.yaml.j2 b/playbooks/templates/compute_nodes.yaml.j2
deleted file mode 100644
index addc33f7..00000000
--- a/playbooks/templates/compute_nodes.yaml.j2
+++ /dev/null
@@ -1,13 +0,0 @@
-{% for compute in computes -%}
-{{ compute }}:
-  services:
-    libvirt:
-      start_command: 'systemctl start libvirtd'
-      stop_command: 'systemctl stop libvirtd'
-      mask_command: 'systemctl mask libvirtd'
-      unmask_command: 'systemctl unmask libvirtd'
-    nova-compute:
-      config_path: '/etc/nova/nova-cpu.conf'
-      start_command: 'systemctl start devstack@n-cpu'
-      stop_command: 'systemctl stop devstack@n-cpu'
-{% endfor %}
\ No newline at end of file
diff --git a/playbooks/templates/whitebox-devstack-ceph-multinode.yaml.j2 b/playbooks/templates/whitebox-devstack-ceph-multinode.yaml.j2
new file mode 100644
index 00000000..5b7428a5
--- /dev/null
+++ b/playbooks/templates/whitebox-devstack-ceph-multinode.yaml.j2
@@ -0,0 +1,22 @@
+{{ hostvars['controller']['ansible_fqdn'] }}:
+  services:
+    libvirt:
+      start_command: 'systemctl start libvirtd'
+      stop_command: 'systemctl stop libvirtd'
+      mask_command: 'systemctl mask libvirtd'
+      unmask_command: 'systemctl unmask libvirtd'
+    nova-compute:
+      config_path: '/etc/nova/nova-cpu.conf'
+      start_command: 'systemctl start devstack@n-cpu'
+      stop_command: 'systemctl stop devstack@n-cpu'
+{{ hostvars['compute1']['ansible_fqdn'] }}:
+  services:
+    libvirt:
+      start_command: 'systemctl start libvirtd'
+      stop_command: 'systemctl stop libvirtd'
+      mask_command: 'systemctl mask libvirtd'
+      unmask_command: 'systemctl unmask libvirtd'
+    nova-compute:
+      config_path: '/etc/nova/nova-cpu.conf'
+      start_command: 'systemctl start devstack@n-cpu'
+      stop_command: 'systemctl stop devstack@n-cpu'
diff --git a/playbooks/templates/whitebox-devstack-multinode.yaml.j2 b/playbooks/templates/whitebox-devstack-multinode.yaml.j2
new file mode 100644
index 00000000..204ec631
--- /dev/null
+++ b/playbooks/templates/whitebox-devstack-multinode.yaml.j2
@@ -0,0 +1,34 @@
+{{ hostvars['controller']['ansible_fqdn'] }}:
+  services:
+    libvirt:
+      start_command: 'systemctl start libvirtd'
+      stop_command: 'systemctl stop libvirtd'
+      mask_command: 'systemctl mask libvirtd'
+      unmask_command: 'systemctl unmask libvirtd'
+    nova-compute:
+      config_path: '/etc/nova/nova-cpu.conf'
+      start_command: 'systemctl start devstack@n-cpu'
+      stop_command: 'systemctl stop devstack@n-cpu'
+  cpu_shared_set: {{ hostvars['controller']['devstack_local_conf']['post-config']['$NOVA_CONF']['compute']['cpu_shared_set'] }}
+  cpu_dedicated_set: {{ hostvars['controller']['devstack_local_conf']['post-config']['$NOVA_CONF']['compute']['cpu_dedicated_set'] }}
+  topology:
+      socket-0:
+        numa-0:
+          cpus: 0-7
+{{ hostvars['compute-host']['ansible_fqdn'] }}:
+  services:
+    libvirt:
+      start_command: 'systemctl start libvirtd'
+      stop_command: 'systemctl stop libvirtd'
+      mask_command: 'systemctl mask libvirtd'
+      unmask_command: 'systemctl unmask libvirtd'
+    nova-compute:
+      config_path: '/etc/nova/nova-cpu.conf'
+      start_command: 'systemctl start devstack@n-cpu'
+      stop_command: 'systemctl stop devstack@n-cpu'
+  cpu_shared_set: {{ hostvars['compute-host']['devstack_local_conf']['post-config']['$NOVA_CONF']['compute']['cpu_shared_set'] }}
+  cpu_dedicated_set: {{ hostvars['compute-host']['devstack_local_conf']['post-config']['$NOVA_CONF']['compute']['cpu_dedicated_set'] }}
+  topology:
+      socket-0:
+        numa-0:
+          cpus: 0-7
diff --git a/playbooks/whitebox/pre.yaml b/playbooks/whitebox/pre.yaml
index 42e2e819..a83fce48 100644
--- a/playbooks/whitebox/pre.yaml
+++ b/playbooks/whitebox/pre.yaml
@@ -28,23 +28,21 @@
           ansible_become: yes
           copy_sshkey_target_user: 'tempest'
 
-    - name: Collect compute hostnames
-      set_fact:
-        computes: "{{ ansible_play_hosts_all|map('extract', hostvars, 'ansible_fqdn')|list }}"
-      run_once: true
+    - name: Create compute nodes file
+      block:
+      - name: Render compute_nodes.yaml template
+        template:
+              src: "../templates/{{compute_node_template_name}}"
+              dest: /home/zuul/compute_nodes.yaml
+        run_once: true
+        delegate_to: controller
 
-    - name: Render compute_nodes.yaml template
-      template:
-            src: ../templates/compute_nodes.yaml.j2
-            dest: /home/zuul/compute_nodes.yaml
-      run_once: true
-      delegate_to: controller
-
-    - name: Output the rendered file at /home/zuul/compute_nodes.yaml
-      shell: |
-        cat /home/zuul/compute_nodes.yaml
-      run_once: true
-      delegate_to: controller
+      - name: Output the rendered file at /home/zuul/compute_nodes.yaml
+        shell: |
+          cat /home/zuul/compute_nodes.yaml
+        run_once: true
+        delegate_to: controller
+      when: compute_node_template_name is defined
 
 - hosts: compute
   tasks:
diff --git a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py
index d59bf30e..890ffaa5 100644
--- a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py
+++ b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py
@@ -121,22 +121,76 @@ class BasePinningTest(base.BaseWhiteboxComputeTest,
             numa_topology = whitebox_utils.normalize_json(numa_topology)
         return numa_topology
 
+    def _get_host_cpu_dedicated_set(self, host):
+        """Return cpu dedicated or shared set configured for the provided host.
+        """
+        cpu_set = \
+            whitebox_utils.get_host_details(host).get('cpu_dedicated_set', [])
+        return hardware.parse_cpu_spec(cpu_set)
+
+    def _get_host_cpu_shared_set(self, host):
+        """Return cpu dedicated or shared set configured for the provided host.
+        """
+        cpu_set = \
+            whitebox_utils.get_host_details(host).get('cpu_shared_set', [])
+        return hardware.parse_cpu_spec(cpu_set)
+
+    def _get_shared_set_size(self):
+        gathered_lists = [self._get_host_cpu_shared_set(host)
+                          for host in self.hosts_details.keys()]
+        return gathered_lists
+
+    def _get_dedicated_set_size(self):
+        gathered_lists = [self._get_host_cpu_dedicated_set(host)
+                          for host in self.hosts_details.keys()]
+        return gathered_lists
+
 
 class CPUPolicyTest(BasePinningTest):
     """Validate CPU policy support."""
 
+    minimum_shared_cpus = 2
+    minimum_dedicated_cpus = 2
+
     def setUp(self):
         super().setUp()
-        self.dedicated_vcpus = (
-            CONF.whitebox_hardware.dedicated_cpus_per_numa *
-            len(CONF.whitebox_hardware.cpu_topology)) // 2
-        self.shared_vcpus = (
-            CONF.whitebox_hardware.shared_cpus_per_numa *
-            len(CONF.whitebox_hardware.cpu_topology)) // 2
+        self.hosts_details = whitebox_utils.get_all_hosts_details()
+
+        # Get the configured shared CPUs of each compute host and confirm
+        # that every host has the minimum number of shared CPUs necessary
+        # to preform test
+        shared_cpus_per_host = self._get_shared_set_size()
+        if any(len(cpus) < self.minimum_shared_cpus for cpus in
+               shared_cpus_per_host):
+            raise self.skipException(
+                'A Host in the deployment does not have the minimum required '
+                '%s shared cpus necessary to execute the tests' %
+                (self.minimum_shared_cpus))
+        available_shared_vcpus = \
+            min(shared_cpus_per_host, key=lambda x: len(x))
+
+        # Get the configured dedicated CPUs of each compute host and confirm
+        # that every host has the minimum number of shared CPUs necessary
+        # to preform test
+        dedicated_cpus_per_host = self._get_dedicated_set_size()
+        if any(len(cpus) < self.minimum_dedicated_cpus for cpus in
+               dedicated_cpus_per_host):
+            raise self.skipException(
+                'A Host in the deployment does not have the minimum required '
+                '%s dedicated cpus necessary to execute the tests' %
+                (self.minimum_dedicated_cpus))
+        available_dedicated_vcpus = \
+            min(dedicated_cpus_per_host, key=lambda x: len(x))
+
+        # Calculate the number of cpus to use in the flavors such the total
+        # size allows for two guests are capable to be scheduled to the same
+        # host
+        self.dedicated_cpus_per_guest = len(available_dedicated_vcpus) // 2
+        self.shared_vcpus_per_guest = len(available_shared_vcpus) // 2
 
     def test_cpu_shared(self):
         """Ensure an instance with an explicit 'shared' policy work."""
-        flavor = self.create_flavor(vcpus=self.shared_vcpus,
+        flavor = self.create_flavor(vcpus=self.shared_vcpus_per_guest,
                                     extra_specs=self.shared_cpu_policy)
         self.create_test_server(flavor=flavor['id'], wait_until='ACTIVE')
 
@@ -147,7 +201,7 @@ class CPUPolicyTest(BasePinningTest):
         default. However, we check specifics of that later and only assert that
         things aren't overlapping here.
         """
-        flavor = self.create_flavor(vcpus=self.dedicated_vcpus,
+        flavor = self.create_flavor(vcpus=self.dedicated_cpus_per_guest,
                                     extra_specs=self.dedicated_cpu_policy)
         server_a = self.create_test_server(flavor=flavor['id'],
                                            wait_until='ACTIVE')
@@ -156,13 +210,16 @@ class CPUPolicyTest(BasePinningTest):
             wait_until='ACTIVE')
         cpu_pinnings_a = self.get_server_cpu_pinning(server_a['id'])
         cpu_pinnings_b = self.get_server_cpu_pinning(server_b['id'])
-
-        self.assertEqual(
-            len(cpu_pinnings_a), self.dedicated_vcpus,
-            "Instance should be pinned but it is unpinned")
-        self.assertEqual(
-            len(cpu_pinnings_b), self.dedicated_vcpus,
-            "Instance should be pinned but it is unpinned")
+        host = self.get_host_for_server(server_a['id'])
+        dedicated_vcpus = self._get_host_cpu_dedicated_set(host)
+        self.assertTrue(
+            set(cpu_pinnings_a.values()).issubset(dedicated_vcpus),
+            "Instance A's pinning %s should be a subset of pinning range %s"
+            % (cpu_pinnings_a, dedicated_vcpus))
+        self.assertTrue(
+            set(cpu_pinnings_b.values()).issubset(dedicated_vcpus),
+            "Instance B's pinning %s should be a subset of pinning range %s"
+            % (cpu_pinnings_b, dedicated_vcpus))
 
         self.assertTrue(
             set(cpu_pinnings_a.values()).isdisjoint(
@@ -175,17 +232,20 @@ class CPUPolicyTest(BasePinningTest):
                           'Resize not available.')
     def test_resize_pinned_server_to_unpinned(self):
         """Ensure resizing an instance to unpinned actually drops pinning."""
-        flavor_a = self.create_flavor(vcpus=self.dedicated_vcpus,
+        flavor_a = self.create_flavor(vcpus=self.dedicated_cpus_per_guest,
                                       extra_specs=self.dedicated_cpu_policy)
         server = self.create_test_server(flavor=flavor_a['id'],
                                          wait_until='ACTIVE')
+
         cpu_pinnings = self.get_server_cpu_pinning(server['id'])
+        host = self.get_host_for_server(server['id'])
+        dedicated_vcpus = self._get_host_cpu_dedicated_set(host)
+        self.assertTrue(
+            set(cpu_pinnings.values()).issubset(dedicated_vcpus),
+            "Instance pinning %s should be a subset of pinning range %s"
+            % (cpu_pinnings, dedicated_vcpus))
 
-        self.assertEqual(
-            len(cpu_pinnings), self.dedicated_vcpus,
-            "Instance should be pinned but is unpinned")
-
-        flavor_b = self.create_flavor(vcpus=self.shared_vcpus,
+        flavor_b = self.create_flavor(vcpus=self.shared_vcpus_per_guest,
                                       extra_specs=self.shared_cpu_policy)
         self.resize_server(server['id'], flavor_b['id'])
         cpu_pinnings = self.get_server_cpu_pinning(server['id'])
@@ -198,7 +258,7 @@ class CPUPolicyTest(BasePinningTest):
                           'Resize not available.')
     def test_resize_unpinned_server_to_pinned(self):
         """Ensure resizing an instance to pinned actually applies pinning."""
-        flavor_a = self.create_flavor(vcpus=self.shared_vcpus,
+        flavor_a = self.create_flavor(vcpus=self.shared_vcpus_per_guest,
                                       extra_specs=self.shared_cpu_policy)
         server = self.create_test_server(flavor=flavor_a['id'],
                                          wait_until='ACTIVE')
@@ -208,26 +268,32 @@ class CPUPolicyTest(BasePinningTest):
             len(cpu_pinnings), 0,
             "Instance should be unpinned but is pinned")
 
-        flavor_b = self.create_flavor(vcpus=self.dedicated_vcpus,
+        flavor_b = self.create_flavor(vcpus=self.dedicated_cpus_per_guest,
                                       extra_specs=self.dedicated_cpu_policy)
         self.resize_server(server['id'], flavor_b['id'])
-        cpu_pinnings = self.get_server_cpu_pinning(server['id'])
 
-        self.assertEqual(
-            len(cpu_pinnings), self.dedicated_vcpus,
-            "Resized instance should be pinned but is still unpinned")
+        cpu_pinnings = self.get_server_cpu_pinning(server['id'])
+        host = self.get_host_for_server(server['id'])
+        dedicated_vcpus = self._get_host_cpu_dedicated_set(host)
+        self.assertTrue(
+            set(cpu_pinnings.values()).issubset(dedicated_vcpus),
+            "After resize instance %s pinning %s should be a subset of "
+            "pinning range %s" % (server['id'], cpu_pinnings, dedicated_vcpus))
 
     def test_reboot_pinned_server(self):
         """Ensure pinning information is persisted after a reboot."""
-        flavor = self.create_flavor(vcpus=self.dedicated_vcpus,
+        flavor = self.create_flavor(vcpus=self.dedicated_cpus_per_guest,
                                     extra_specs=self.dedicated_cpu_policy)
         server = self.create_test_server(flavor=flavor['id'],
                                          wait_until='ACTIVE')
-        cpu_pinnings = self.get_server_cpu_pinning(server['id'])
 
-        self.assertEqual(
-            len(cpu_pinnings), self.dedicated_vcpus,
-            "CPU pinning was not applied to new instance.")
+        cpu_pinnings = self.get_server_cpu_pinning(server['id'])
+        host = self.get_host_for_server(server['id'])
+        dedicated_vcpus = self._get_host_cpu_dedicated_set(host)
+        self.assertTrue(
+            set(cpu_pinnings.values()).issubset(dedicated_vcpus),
+            "After resize instance %s pinning %s should be a subset of "
+            "pinning range %s" % (server['id'], cpu_pinnings, dedicated_vcpus))
 
         self.reboot_server(server['id'], 'HARD')
         cpu_pinnings = self.get_server_cpu_pinning(server['id'])
@@ -235,8 +301,8 @@ class CPUPolicyTest(BasePinningTest):
         # we don't actually assert that the same pinning information is used
         # because that's not expected. We just care that _some_ pinning is in
         # effect
-        self.assertEqual(
-            len(cpu_pinnings), self.dedicated_vcpus,
+        self.assertTrue(
+            set(cpu_pinnings.values()).issubset(dedicated_vcpus),
             "Rebooted instance has lost its pinning information")
 
 
@@ -428,9 +494,7 @@ class EmulatorThreadTest(BasePinningTest, numa_helper.NUMAHelperMixin):
         # Determine the compute host the guest was scheduled to and gather
         # the cpu shared set from the host
         host = self.get_host_for_server(server['id'])
-        host_sm = clients.NovaServiceManager(host, 'nova-compute',
-                                             self.os_admin.services_client)
-        cpu_shared_set = host_sm.get_cpu_shared_set()
+        cpu_shared_set = self._get_host_cpu_shared_set(host)
 
         # Gather the emulator threads from the server
         emulator_threads = \
@@ -506,8 +570,10 @@ class EmulatorThreadTest(BasePinningTest, numa_helper.NUMAHelperMixin):
 
         # Create a flavor using the isolate threads_policy and then launch
         # an instance with the flavor
-        flavor = self.create_flavor(threads_policy='isolate',
-                                    vcpus=(self.dedicated_cpus_per_numa - 1))
+        flavor = self.create_flavor(
+            threads_policy='isolate',
+            vcpus=(self.dedicated_cpus_per_numa - 1)
+        )
 
         server = self.create_test_server(flavor=flavor['id'],
                                          wait_until='ACTIVE')
@@ -520,9 +586,7 @@ class EmulatorThreadTest(BasePinningTest, numa_helper.NUMAHelperMixin):
         # Determine the compute host the guest was scheduled to and gather
         # the cpu dedicated set from the host
         host = self.get_host_for_server(server['id'])
-        host_sm = clients.NovaServiceManager(host, 'nova-compute',
-                                             self.os_admin.services_client)
-        cpu_dedicated_set = host_sm.get_cpu_dedicated_set()
+        cpu_dedicated_set = self._get_host_cpu_dedicated_set(host)
 
         # Confirm the pinned cpus from the guest are part of the dedicated
         # range of the compute host it is scheduled to
diff --git a/whitebox_tempest_plugin/utils.py b/whitebox_tempest_plugin/utils.py
index 1e94afe4..e8d34b24 100644
--- a/whitebox_tempest_plugin/utils.py
+++ b/whitebox_tempest_plugin/utils.py
@@ -81,3 +81,12 @@ def get_host_details(host):
         with open(nodes_location, "r") as f:
             _nodes = yaml.safe_load(f)
     return _nodes.get(host)
+
+
+def get_all_hosts_details():
+    global _nodes
+    if _nodes is None:
+        nodes_location = CONF.whitebox.nodes_yaml
+        with open(nodes_location, "r") as f:
+            _nodes = yaml.safe_load(f)
+    return _nodes