diff --git a/nodepool/driver/__init__.py b/nodepool/driver/__init__.py index 5590fc71d..3e85b9148 100644 --- a/nodepool/driver/__init__.py +++ b/nodepool/driver/__init__.py @@ -195,6 +195,17 @@ class Provider(ProviderNotifications): """ pass + @abc.abstractmethod + def idle(self): + """Idle the provider + + This is called before stop(). Providers should use this as a signal + to idle themselves and stop performing any actions that may interfere + with a new version of this provider starting up. + + """ + pass + @abc.abstractmethod def join(self): """Wait for provider to finish diff --git a/nodepool/driver/kubernetes/provider.py b/nodepool/driver/kubernetes/provider.py index 1ad75c7e4..0c9f8dd5a 100644 --- a/nodepool/driver/kubernetes/provider.py +++ b/nodepool/driver/kubernetes/provider.py @@ -56,6 +56,9 @@ class KubernetesProvider(Provider, QuotaSupport): self.log.debug("Stopping") self.ready = False + def idle(self): + pass + def listNodes(self): servers = [] diff --git a/nodepool/driver/openshift/provider.py b/nodepool/driver/openshift/provider.py index 9c579a6d8..3894c5c81 100644 --- a/nodepool/driver/openshift/provider.py +++ b/nodepool/driver/openshift/provider.py @@ -52,6 +52,9 @@ class OpenshiftProvider(Provider, QuotaSupport): def stop(self): self.log.debug("Stopping") + def idle(self): + pass + def listNodes(self): servers = [] diff --git a/nodepool/driver/openstack/provider.py b/nodepool/driver/openstack/provider.py index 3d64cef9c..c5b189e89 100755 --- a/nodepool/driver/openstack/provider.py +++ b/nodepool/driver/openstack/provider.py @@ -69,6 +69,9 @@ class OpenStackProvider(Provider, QuotaSupport): self.running = False self._server_list_watcher_stop_event.set() + def idle(self): + pass + def join(self): self._server_list_watcher.join() diff --git a/nodepool/driver/statemachine.py b/nodepool/driver/statemachine.py index de62e982a..49bb4d77d 100644 --- a/nodepool/driver/statemachine.py +++ b/nodepool/driver/statemachine.py @@ -510,6 +510,9 @@ class StateMachineProvider(Provider, QuotaSupport): self.adapter.stop() self.log.debug("Stopped") + def idle(self): + pass + def join(self): self.log.debug("Joining") if self.state_machine_thread: diff --git a/nodepool/driver/static/provider.py b/nodepool/driver/static/provider.py index f9c279c20..d98586350 100644 --- a/nodepool/driver/static/provider.py +++ b/nodepool/driver/static/provider.py @@ -59,6 +59,9 @@ class StaticNodeProvider(Provider, QuotaSupport): # multiple threads (e.g. cleanup and deleted node worker). self._register_lock = threading.Lock() self._node_slots = {} # nodeTuple -> [node] + # Flag to indicates we need to stop processing state that could + # interfere with a newer versions of ourselves running. + self._idle = False def _getSlot(self, node): return self._node_slots[nodeTuple(node)].index(node) @@ -412,6 +415,9 @@ class StaticNodeProvider(Provider, QuotaSupport): def stop(self): self.log.debug("Stopping") + def idle(self): + self._idle = True + def poolNodes(self): return { nodeTuple(n): n @@ -437,6 +443,8 @@ class StaticNodeProvider(Provider, QuotaSupport): return True def cleanupLeakedResources(self): + if self._idle: + return with self._register_lock: self.getRegisteredNodes() for pool in self.provider.pools.values(): @@ -458,6 +466,9 @@ class StaticNodeProvider(Provider, QuotaSupport): ''' Re-register the deleted node. ''' + if self._idle: + return + # It's possible a deleted node no longer exists in our config, so # don't bother to reregister. node_tuple = nodeTuple(node) diff --git a/nodepool/driver/test/provider.py b/nodepool/driver/test/provider.py index a56cc9251..a242304b8 100644 --- a/nodepool/driver/test/provider.py +++ b/nodepool/driver/test/provider.py @@ -29,6 +29,9 @@ class TestProvider(Provider): def stop(self): pass + def idle(self): + pass + def join(self): pass diff --git a/nodepool/provider_manager.py b/nodepool/provider_manager.py index 06674e894..6195a2783 100644 --- a/nodepool/provider_manager.py +++ b/nodepool/provider_manager.py @@ -50,6 +50,9 @@ class ProviderManager(object): if old_config: oldmanager = old_config.provider_managers.get(p.name) if oldmanager and p != oldmanager.provider: + # Signal that actions not safe to run on both the old and + # new providers while we synchronize should cease to run. + oldmanager.idle() stop_managers.append(oldmanager) oldmanager = None if oldmanager: