Move hosts back to freepool in case of failure

When reserving multiple hosts, if a host fails to be moved to the host
aggregate of the reservation, all hosts previously moved into the same
aggregate were left in it instead of being moved back to the freepool.
This would result in failures to start future leases allocating any of
these hosts.

Change-Id: I007a9c7c644b6488a7a08e9ebf3029b15cb33b45
Closes-Bug: #1791733
This commit is contained in:
Pierre Riteau 2019-02-12 17:17:09 +00:00
parent 7dc9a6a7bf
commit 3ef8b6638a
5 changed files with 87 additions and 38 deletions

View File

@ -174,11 +174,12 @@ class PhysicalHostPlugin(base.BasePlugin, nova.NovaClientWrapper):
"""Add the hosts in the pool.""" """Add the hosts in the pool."""
host_reservation = db_api.host_reservation_get(resource_id) host_reservation = db_api.host_reservation_get(resource_id)
pool = nova.ReservationPool() pool = nova.ReservationPool()
hosts = []
for allocation in db_api.host_allocation_get_all_by_values( for allocation in db_api.host_allocation_get_all_by_values(
reservation_id=host_reservation['reservation_id']): reservation_id=host_reservation['reservation_id']):
host = db_api.host_get(allocation['compute_host_id']) host = db_api.host_get(allocation['compute_host_id'])
pool.add_computehost(host_reservation['aggregate_id'], hosts.append(host['service_name'])
host['service_name']) pool.add_computehost(host_reservation['aggregate_id'], hosts)
def before_end(self, resource_id): def before_end(self, resource_id):
"""Take an action before the end of a lease.""" """Take an action before the end of a lease."""
@ -665,16 +666,18 @@ class PhysicalHostPlugin(base.BasePlugin, nova.NovaClientWrapper):
str(min_hosts) + '-' + str(max_hosts), str(min_hosts) + '-' + str(max_hosts),
dates_after['start_date'], dates_after['end_date']) dates_after['start_date'], dates_after['end_date'])
if len(host_ids) >= min_hosts: if len(host_ids) >= min_hosts:
new_hosts = []
pool = nova.ReservationPool() pool = nova.ReservationPool()
for host_id in host_ids: for host_id in host_ids:
db_api.host_allocation_create( db_api.host_allocation_create(
{'compute_host_id': host_id, {'compute_host_id': host_id,
'reservation_id': reservation_id}) 'reservation_id': reservation_id})
if reservation_status == status.reservation.ACTIVE:
# Add new host into the aggregate.
new_host = db_api.host_get(host_id) new_host = db_api.host_get(host_id)
new_hosts.append(new_host['service_name'])
if reservation_status == status.reservation.ACTIVE:
# Add new hosts into the aggregate.
pool.add_computehost(host_reservation['aggregate_id'], pool.add_computehost(host_reservation['aggregate_id'],
new_host['service_name']) new_hosts)
else: else:
raise manager_ex.NotEnoughHostsAvailable() raise manager_ex.NotEnoughHostsAvailable()

View File

@ -1423,8 +1423,7 @@ class PhysicalHostPluginTestCase(tests.TestCase):
'reservation_id': '706eb3bc-07ed-4383-be93-b32845ece672' 'reservation_id': '706eb3bc-07ed-4383-be93-b32845ece672'
} }
) )
add_computehost.assert_called_with( add_computehost.assert_called_with(1, ['host3_hostname'])
1, 'host3_hostname')
host_reservation_update.assert_called_with( host_reservation_update.assert_called_with(
'91253650-cc34-4c4f-bbe8-c943aa7d0c9b', '91253650-cc34-4c4f-bbe8-c943aa7d0c9b',
{'count_range': '1-3'} {'count_range': '1-3'}
@ -1685,8 +1684,7 @@ class PhysicalHostPluginTestCase(tests.TestCase):
self.fake_phys_plugin.on_start(u'04de74e8-193a-49d2-9ab8-cba7b49e45e8') self.fake_phys_plugin.on_start(u'04de74e8-193a-49d2-9ab8-cba7b49e45e8')
add_computehost.assert_called_with( add_computehost.assert_called_with(1, ['host1_hostname'])
1, 'host1_hostname')
def test_before_end_with_no_action(self): def test_before_end_with_no_action(self):
host_reservation_get = self.patch(self.db_api, 'host_reservation_get') host_reservation_get = self.patch(self.db_api, 'host_reservation_get')

View File

@ -16,6 +16,7 @@ import uuid as uuidgen
from keystoneauth1 import session from keystoneauth1 import session
from keystoneauth1 import token_endpoint from keystoneauth1 import token_endpoint
import mock
from novaclient import client as nova_client from novaclient import client as nova_client
from novaclient import exceptions as nova_exceptions from novaclient import exceptions as nova_exceptions
from novaclient.v2 import availability_zones from novaclient.v2 import availability_zones
@ -158,7 +159,7 @@ class ReservationPoolTestCase(tests.TestCase):
def _patch_get_aggregate_from_name_or_id(self): def _patch_get_aggregate_from_name_or_id(self):
def get_fake_aggregate(*args): def get_fake_aggregate(*args):
if self.freepool_name in args: if self.freepool_name in args or self.fake_freepool.id in args:
return self.fake_freepool return self.fake_freepool
else: else:
return self.fake_aggregate return self.fake_aggregate
@ -286,7 +287,7 @@ class ReservationPoolTestCase(tests.TestCase):
check0 = self.nova.aggregates.add_host check0 = self.nova.aggregates.add_host
check0.assert_any_call(self.fake_aggregate.id, 'host3') check0.assert_any_call(self.fake_aggregate.id, 'host3')
check1 = self.nova.aggregates.remove_host check1 = self.nova.aggregates.remove_host
check1.assert_any_call(self.fake_aggregate.id, 'host3') check1.assert_any_call(self.fake_freepool.id, 'host3')
def test_add_computehost_with_host_id(self): def test_add_computehost_with_host_id(self):
# NOTE(sbauza): Freepool.hosts only contains names of hosts, not UUIDs # NOTE(sbauza): Freepool.hosts only contains names of hosts, not UUIDs
@ -335,6 +336,24 @@ class ReservationPoolTestCase(tests.TestCase):
check = self.nova.aggregates.add_host check = self.nova.aggregates.add_host
check.assert_called_once_with(self.fake_freepool.id, 'host2') check.assert_called_once_with(self.fake_freepool.id, 'host2')
def test_add_computehost_revert(self):
self._patch_get_aggregate_from_name_or_id()
self.fake_freepool.hosts = ['host1', 'host2']
self.assertRaises(manager_exceptions.HostNotInFreePool,
self.pool.add_computehost,
'pool', ['host1', 'host2', 'host3'])
check0 = self.nova.aggregates.add_host
check0.assert_has_calls([mock.call(self.fake_aggregate.id, 'host1'),
mock.call(self.fake_aggregate.id, 'host2'),
mock.call('freepool', 'host1'),
mock.call('freepool', 'host2')])
check1 = self.nova.aggregates.remove_host
check1.assert_has_calls([mock.call(self.fake_freepool.id, 'host1'),
mock.call(self.fake_freepool.id, 'host2'),
mock.call(self.fake_aggregate.id, 'host1'),
mock.call(self.fake_aggregate.id, 'host2')])
def test_remove_computehost_from_freepool(self): def test_remove_computehost_from_freepool(self):
self._patch_get_aggregate_from_name_or_id() self._patch_get_aggregate_from_name_or_id()
self.pool.remove_computehost(self.freepool_name, 'host3') self.pool.remove_computehost(self.freepool_name, 'host3')

View File

@ -327,20 +327,24 @@ class ReservationPool(NovaClientWrapper):
except manager_exceptions.AggregateNotFound: except manager_exceptions.AggregateNotFound:
return [] return []
def add_computehost(self, pool, host, stay_in=False): def add_computehost(self, pool, hosts, stay_in=False):
"""Add a compute host to an aggregate. """Add compute host(s) to an aggregate.
The `host` must exist otherwise raise an error Each host must exist and be in the freepool, otherwise raise an error.
and the `host` must be in the freepool.
:param pool: Name or UUID of the pool to rattach the host :param pool: Name or UUID of the pool to rattach the host
:param host: Name (not UUID) of the host to associate :param hosts: Names (not UUID) of hosts to associate
:type host: str :type host: str or list of str
Return the related aggregate. Return the related aggregate.
Raise an aggregate exception if something wrong. Raise an aggregate exception if something wrong.
""" """
if not isinstance(hosts, list):
hosts = [hosts]
added_hosts = []
removed_hosts = []
agg = self.get_aggregate_from_name_or_id(pool) agg = self.get_aggregate_from_name_or_id(pool)
try: try:
@ -348,26 +352,44 @@ class ReservationPool(NovaClientWrapper):
except manager_exceptions.AggregateNotFound: except manager_exceptions.AggregateNotFound:
raise manager_exceptions.NoFreePool() raise manager_exceptions.NoFreePool()
try:
for host in hosts:
if freepool_agg.id != agg.id and not stay_in: if freepool_agg.id != agg.id and not stay_in:
if host not in freepool_agg.hosts: if host not in freepool_agg.hosts:
raise manager_exceptions.HostNotInFreePool( raise manager_exceptions.HostNotInFreePool(
host=host, freepool_name=freepool_agg.name) host=host, freepool_name=freepool_agg.name)
LOG.info("removing host '%(host)s' from aggregate freepool " LOG.info("removing host '%(host)s' from freepool "
"%(name)s", {'host': host, 'name': freepool_agg.name}) "aggregate %(name)s",
{'host': host, 'name': freepool_agg.name})
try: try:
self.remove_computehost(freepool_agg.id, host) self.remove_computehost(freepool_agg.id, host)
removed_hosts.append(host)
except nova_exception.NotFound: except nova_exception.NotFound:
raise manager_exceptions.HostNotFound(host=host) raise manager_exceptions.HostNotFound(host=host)
LOG.info("adding host '%(host)s' to aggregate %(id)s", LOG.info("adding host '%(host)s' to aggregate %(id)s",
{'host': host, 'id': agg.id}) {'host': host, 'id': agg.id})
try: try:
return self.nova.aggregates.add_host(agg.id, host) self.nova.aggregates.add_host(agg.id, host)
added_hosts.append(host)
except nova_exception.NotFound: except nova_exception.NotFound:
raise manager_exceptions.HostNotFound(host=host) raise manager_exceptions.HostNotFound(host=host)
except nova_exception.Conflict as e: except nova_exception.Conflict as e:
raise manager_exceptions.AggregateAlreadyHasHost( raise manager_exceptions.AggregateAlreadyHasHost(
pool=pool, host=host, nova_exception=str(e)) pool=pool, host=host, nova_exception=str(e))
except Exception as e:
if added_hosts:
LOG.warn('Removing hosts added to aggregate %s: %s',
agg.id, added_hosts)
for host in added_hosts:
self.nova.aggregates.remove_host(agg.id, host)
if removed_hosts:
LOG.warn('Adding hosts back to freepool: %s', removed_hosts)
for host in removed_hosts:
self.nova.aggregates.add_host(freepool_agg.name, host)
raise e
return self.get_aggregate_from_name_or_id(pool)
def remove_all_computehosts(self, pool): def remove_all_computehosts(self, pool):
"""Remove all compute hosts attached to an aggregate.""" """Remove all compute hosts attached to an aggregate."""

View File

@ -0,0 +1,7 @@
---
fixes:
- |
If a host fails to be moved to a host aggregate while starting a
reservation, any host previously moved into the same aggregate is now moved
back to the freepool. This helps to prevent failures to start future leases
allocating the same hosts.