Merge "Graceful error handling via workflow commands"
This commit is contained in:
commit
e8e8512f3f
@ -12,7 +12,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating instance <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<update-cf-stack template="Windows">
|
||||
<update-cf-stack template="Windows" error="exception">
|
||||
<parameter name="mappings">
|
||||
<map>
|
||||
<mapping name="instanceName"><select path="state.hostname"/></mapping>
|
||||
@ -40,12 +40,19 @@
|
||||
<parameter name="text">Instance <select path="state.hostname"/> (<select path="name"/>) created</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to deploy instance <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="message" default="unknown Heat error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</update-cf-stack>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'activeDirectory')].units[?(@.temp.instanceName and @.adminPassword and @.adminPassword != @.state.adminPassword)]"
|
||||
desc="Units of AD services which have got instances deployed but the local admin passwords not set yet">
|
||||
<send-command template="SetPassword">
|
||||
<send-command template="SetPassword" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -64,12 +71,19 @@
|
||||
<select path="adminPassword"/>
|
||||
</set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to set admin password on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'activeDirectory' and @.adminPassword and @.adminPassword != @.state.domainAdminPassword)].units[?(@.temp.instanceName and @.isMaster)]"
|
||||
desc="Deployed master-units of AD services for which the domain admin password is not set yet">
|
||||
<send-command template="SetPassword">
|
||||
<send-command template="SetPassword" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -88,6 +102,13 @@
|
||||
<select path="::adminPassword"/>
|
||||
</set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to set domain administrator password on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
@ -97,7 +118,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating Primary Domain Controller on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<send-command template="CreatePrimaryDC">
|
||||
<send-command template="CreatePrimaryDC" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -121,12 +142,19 @@
|
||||
<parameter name="text">Primary Domain Controller created</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to create a Primary DC on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'activeDirectory' and @.state.primaryDc and not @.state.primaryDcIp)].units[?(@.temp.instanceName and @.isMaster)]"
|
||||
desc="Master Units of AD services on which the Primary Domain Controller has been configured but DNS ip has not been asked for">
|
||||
<send-command template="AskDnsIp" result="ip">
|
||||
<send-command template="AskDnsIp" result="ip" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -138,12 +166,19 @@
|
||||
<select source="ip" path="0.Result.0"/>
|
||||
</set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable assign DNS IP on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type != 'activeDirectory')].units[?(@.state.domain and not @.domain)]"
|
||||
desc="Any non-AD services of the environment which has been part of the domain but needs to leave it">
|
||||
<send-command template="LeaveDomain">
|
||||
<send-command template="LeaveDomain" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id" source="unit"/>
|
||||
</parameter>
|
||||
@ -165,6 +200,13 @@
|
||||
</report>
|
||||
<set path="state.domain"><null/></set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unit <select path="state.hostname" source="unit"/> (<select path="name" source="unit"/>) was unable to leave the domain due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
@ -178,7 +220,7 @@
|
||||
</set>
|
||||
<rule desc="Domain controller exists with the assigned DNS IP">
|
||||
<parameter name="match">/$.services[?(@.type == 'activeDirectory' and @.domain == '<select path="domain"/>' and @.state.primaryDcIp)]</parameter>
|
||||
<send-command template="JoinDomain">
|
||||
<send-command template="JoinDomain" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id" source="unit"/>
|
||||
</parameter>
|
||||
@ -215,6 +257,13 @@
|
||||
<parameter name="text">Unit <select path="state.hostname" source="unit"/> (<select path="name" source="unit"/>) has joined domain <select path="domain"/></parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unit <select path="state.hostname" source="unit"/> (<select path="name" source="unit"/>) was unable to join the domain due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
</rule>
|
||||
@ -226,7 +275,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating Secondary Domain Controller on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<send-command template="CreateSecondaryDC">
|
||||
<send-command template="CreateSecondaryDC" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -254,6 +303,17 @@
|
||||
<parameter name="text">Domain <select path="::domain"/> created</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to create Secondary Domain Controller on unit <select path="state.hostname" source="unit"/> (<select path="name" source="unit"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<report entity="service" level="error">
|
||||
<parameter name="id"><select path="::id"/></parameter>
|
||||
<parameter name="text">Unable to create domain <select path="::domain"/></parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
</workflow>
|
||||
</workflow>
|
||||
|
@ -12,7 +12,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating instance <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<update-cf-stack template="Windows">
|
||||
<update-cf-stack template="Windows" error="exception">
|
||||
<parameter name="mappings">
|
||||
<map>
|
||||
<mapping name="instanceName"><select path="state.hostname"/></mapping>
|
||||
@ -40,12 +40,19 @@
|
||||
<parameter name="text">Instance <select path="state.hostname"/> (<select path="name"/>) created</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to deploy instance <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="message" default="unknown Heat error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</update-cf-stack>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type in ('webServerFarm', 'aspNetAppFarm'))].units[?(@.state.hostname and not @.temp.registeredWithLB)]"
|
||||
desc="Units of web-farms services which have a hostname assigned but are not registered with LB">
|
||||
<update-cf-stack template="LoadBalancer" result="outputs">
|
||||
<update-cf-stack template="LoadBalancer" result="outputs" error="exception">
|
||||
<parameter name="mappings">
|
||||
<map>
|
||||
<mapping name="instanceName"><select path="state.hostname"/></mapping>
|
||||
@ -57,12 +64,19 @@
|
||||
<set path="temp.registeredWithLB"><true/></set>
|
||||
<set path="::uri">http://<select source="outputs" path="LoadBalancerIP"/>:<select path="::loadBalancerPort"/></set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to create a Server Farm load balancer on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="message" default="unknown Heat error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</update-cf-stack>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type in ('webServer', 'aspNetApp', 'webServerFarm', 'aspNetAppFarm') and @.adminPassword and @.adminPassword != @.state.adminPassword)].units[?(@.temp.instanceName)]"
|
||||
desc="Units of web services which have got an instance deployed but has not got a correct admin password ">
|
||||
<send-command template="SetPassword">
|
||||
<send-command template="SetPassword" error='exception'>
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -81,6 +95,13 @@
|
||||
<select path="::adminPassword"/>
|
||||
</set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to set admin password on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
@ -91,7 +112,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating IIS Web Server on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<send-command template="InstallIIS">
|
||||
<send-command template="InstallIIS" error='exception'>
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -105,6 +126,13 @@
|
||||
<parameter name="text">IIS <select path="state.hostname"/> (<select path="name"/>) has started</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to install IIS on <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
@ -114,7 +142,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Deploying WebApp <select path="::name"/> on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<send-command template="DeployWebApp">
|
||||
<send-command template="DeployWebApp" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -135,6 +163,13 @@
|
||||
<parameter name="text">WebApp <select path="::name"/> has been deployed on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to deploy WebApp on <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
|
@ -43,7 +43,7 @@
|
||||
<parameter name="match">$[?(@.state.domain != '<select path="domain" source="ad"/>')]</parameter>
|
||||
<parameter name="desc">Units which are not part of the target domain but need to join</parameter>
|
||||
|
||||
<send-command template="JoinDomain">
|
||||
<send-command template="JoinDomain" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -82,8 +82,15 @@
|
||||
<parameter name="text">Unit <select path="state.hostname"/> (<select path="name"/>) has joined domain <select path="domain" source="ad"/></parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unit <select path="state.hostname"/> (<select path="name"/>) was unable to join the domain due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
</rule>
|
||||
|
||||
</workflow>
|
||||
</workflow>
|
||||
|
@ -13,7 +13,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating instance <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<update-cf-stack template="Windows">
|
||||
<update-cf-stack template="Windows" error="exception">
|
||||
<parameter name="mappings">
|
||||
<map>
|
||||
<mapping name="instanceName"><select path="state.hostname"/></mapping>
|
||||
@ -41,6 +41,13 @@
|
||||
<parameter name="text">Instance <select path="state.hostname"/> (<select path="name"/>) created</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to deploy instance <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="message" default="unknown Heat error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</update-cf-stack>
|
||||
</rule>
|
||||
|
||||
@ -58,7 +65,7 @@
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer' and @.adminPassword and @.adminPassword != @.state.adminPassword)].units[?(@.temp.instanceName)]"
|
||||
desc="Units of SQL Server Cluster services which have got an instance deployed but has not got a correct admin password">
|
||||
<send-command template="SetPassword">
|
||||
<send-command template="SetPassword" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -77,12 +84,19 @@
|
||||
<select path="::adminPassword"/>
|
||||
</set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to set admin password on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer')].units[?(@.state.domain and not @.state.failoverClusterPrerequisitesInstalled)]"
|
||||
desc="Units of SQL Server Cluster services that are already joined AD domain">
|
||||
<send-command template="SqlServerCluster/FailoverClusterPrerequisites">
|
||||
<send-command template="SqlServerCluster/FailoverClusterPrerequisites" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -109,12 +123,19 @@
|
||||
<parameter name="text">Failover cluster prerequisites installed on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to install prerequisites on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer' and not @.state.failoverClusterCreated)].units[?(@.state.failoverClusterPrerequisitesInstalled)]" limit="1"
|
||||
desc="First unit of SQL Server Cluster services that is already has failover cluster prerequisites installed">
|
||||
<send-command template="SqlServerCluster/FailoverCluster">
|
||||
<send-command template="SqlServerCluster/FailoverCluster" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -153,12 +174,19 @@
|
||||
<parameter name="text">Failover cluster created for SQL Server Cluster service (<select path="::name"/>)</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="Service" level="error">
|
||||
<parameter name="id"><select path="::id"/></parameter>
|
||||
<parameter name="text">Unable to create failover cluster for SQL Server Service <select path="::name"/> due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer' and @.state.failoverClusterCreated and not @.state.agEnvironmentConfigured)].units[*]" limit="1"
|
||||
desc="First unit of SQL Server Cluster services that is already has failover cluster created">
|
||||
<send-command template="SqlServerCluster/ConfigureEnvironmentForAOAG">
|
||||
<send-command template="SqlServerCluster/ConfigureEnvironmentForAOAG" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -179,12 +207,19 @@
|
||||
<parameter name="text">Environment for AlwaysOn Availability Group of SQL Server Cluster service (<select path="::name"/>) configured</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="service" level="error">
|
||||
<parameter name="id"><select path="::id"/></parameter>
|
||||
<parameter name="text">Unable to configure the environment for AlwaysOn Availability Group of SQL Server Cluster service (<select path="::name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer' and @.state.agEnvironmentConfigured)].units[?(@.state.failoverClusterPrerequisitesInstalled and not @.state.sqlServerInstalled)]"
|
||||
desc="All units of SQL Server Cluster services that is already has environment configured">
|
||||
<send-command template="SqlServerCluster/InstallSqlServerForAOAG">
|
||||
<send-command template="SqlServerCluster/InstallSqlServerForAOAG" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -217,12 +252,19 @@
|
||||
<parameter name="text">SQL Server installed on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to install SQL Server on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer')].units[?(@.state.sqlServerInstalled and not @.state.alwaysOnInitialized)]"
|
||||
desc="All units of SQL Server Cluster services that has SQL Server installed">
|
||||
<send-command template="SqlServerCluster/InitializeAlwaysOn">
|
||||
<send-command template="SqlServerCluster/InitializeAlwaysOn" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -255,12 +297,19 @@
|
||||
<parameter name="text">AlwaysOn AG initialized for <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to initialize AlwaysOn AG for <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer')].units[?(@.state.alwaysOnInitialized and not @.state.primaryReplicaInitialized)]"
|
||||
desc="All units of SQL Server Cluster services that has AlwaysOn initialized">
|
||||
<send-command template="SqlServerCluster/InitializeAOAGPrimaryReplica">
|
||||
<send-command template="SqlServerCluster/InitializeAOAGPrimaryReplica" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -308,12 +357,19 @@
|
||||
<parameter name="text">Primary replica for SQL Server AG initialized for <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to initialize primary replica for SQL Server AG for <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer')].units[?(@.state.primaryReplicaInitialized and not @.state.secondaryReplicaInitialized)]"
|
||||
desc="All units of SQL Server Cluster services that has primary replica initialized">
|
||||
<send-command template="SqlServerCluster/InitializeAOAGSecondaryReplica">
|
||||
<send-command template="SqlServerCluster/InitializeAOAGSecondaryReplica" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -346,7 +402,14 @@
|
||||
<parameter name="text">Secondary replica for SQL Server AG initialized for <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to initialize secondary replica for SQL Server AG for <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
</workflow>
|
||||
</workflow>
|
||||
|
@ -12,7 +12,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating instance <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<update-cf-stack template="Windows">
|
||||
<update-cf-stack template="Windows" error="exception">
|
||||
<parameter name="mappings">
|
||||
<map>
|
||||
<mapping name="instanceName"><select path="state.hostname"/></mapping>
|
||||
@ -40,12 +40,19 @@
|
||||
<parameter name="text">Instance <select path="state.hostname"/> (<select path="name"/>) created</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to deploy instance <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="message" default="unknown Heat error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</update-cf-stack>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlServer' and @.adminPassword and @.adminPassword != @.state.adminPassword)].units[?(@.temp.instanceName)]"
|
||||
desc="Units of SQL Server services which have got an instance deployed but has not got a correct admin password">
|
||||
<send-command template="SetPassword">
|
||||
<send-command template="SetPassword" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -64,6 +71,13 @@
|
||||
<select path="::adminPassword"/>
|
||||
</set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to set admin password on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
@ -74,7 +88,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating MS SQL Server on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<send-command template="InstallMsSqlServer">
|
||||
<send-command template="InstallMsSqlServer" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -99,6 +113,13 @@
|
||||
<parameter name="text">MS SQL Server <select path="state.hostname"/> (<select path="name"/>) has started</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to install MS SQL Server on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
|
@ -77,7 +77,6 @@ class ConductorWorkflowService(service.Service):
|
||||
def _task_received(self, message):
|
||||
task = message.body or {}
|
||||
message_id = message.id
|
||||
reporter = None
|
||||
with self.create_rmq_client() as mq:
|
||||
try:
|
||||
log.info('Starting processing task {0}: {1}'.format(
|
||||
@ -96,7 +95,8 @@ class ConductorWorkflowService(service.Service):
|
||||
reporter)
|
||||
workflows.append(workflow)
|
||||
|
||||
while True:
|
||||
stop = False
|
||||
while not stop:
|
||||
try:
|
||||
while True:
|
||||
result = False
|
||||
@ -112,17 +112,18 @@ class ConductorWorkflowService(service.Service):
|
||||
log.debug("No pending commands found, "
|
||||
"seems like we are done")
|
||||
break
|
||||
if self.check_stop_requested(task):
|
||||
log.info("Workflow stop requested")
|
||||
stop = True
|
||||
except Exception as ex:
|
||||
reporter.report_generic(
|
||||
"Unexpected error has occurred", ex.message,
|
||||
'error')
|
||||
log.exception(ex)
|
||||
break
|
||||
command_dispatcher.close()
|
||||
except reporting.ReportedException as e:
|
||||
log.exception("Exception has occurred and was reported to API")
|
||||
except Exception as e:
|
||||
log.exception("Unexpected exception has occurred")
|
||||
if reporter:
|
||||
reporter.report_generic("Unexpected error has occurred",
|
||||
e.message, 'error')
|
||||
if stop:
|
||||
log.info("Workflow stopped by 'stop' command")
|
||||
finally:
|
||||
self.cleanup(task, reporter)
|
||||
result_msg = Message()
|
||||
@ -156,3 +157,9 @@ class ConductorWorkflowService(service.Service):
|
||||
if reporter:
|
||||
reporter.report_generic("Unexpected error has occurred",
|
||||
e.message, 'error')
|
||||
|
||||
def check_stop_requested(self, model):
|
||||
if 'temp' in model:
|
||||
if '_stop_requested' in model['temp']:
|
||||
return model['temp']['_stop_requested']
|
||||
return False
|
||||
|
@ -20,14 +20,35 @@ import string
|
||||
import time
|
||||
|
||||
import xml_code_engine
|
||||
from openstack.common import log as logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def update_cf_stack(engine, context, body, template, result=None, **kwargs):
|
||||
def update_cf_stack(engine, context, body, template, result=None, error=None,
|
||||
**kwargs):
|
||||
command_dispatcher = context['/commandDispatcher']
|
||||
|
||||
def callback(result_value):
|
||||
def callback(result_value, error_result=None):
|
||||
if result is not None:
|
||||
context[result] = result_value
|
||||
|
||||
if error_result is not None:
|
||||
if error is not None:
|
||||
context[error] = {
|
||||
'message': getattr(error_result, 'message', None),
|
||||
'strerror': getattr(error_result, 'strerror', None),
|
||||
'timestamp': time.time()
|
||||
}
|
||||
failure_handler = body.find('failure')
|
||||
if failure_handler is not None:
|
||||
log.warning("Handling exception in failure block")
|
||||
engine.evaluate_content(failure_handler, context)
|
||||
return
|
||||
else:
|
||||
log.error("No failure block found for exception")
|
||||
raise error_result
|
||||
|
||||
success_handler = body.find('success')
|
||||
if success_handler is not None:
|
||||
engine.evaluate_content(success_handler, context)
|
||||
|
@ -93,64 +93,64 @@ class HeatExecutor(CommandBase):
|
||||
self._delete_pending_list) > 0
|
||||
|
||||
def execute_pending(self):
|
||||
try:
|
||||
r1 = self._execute_pending_updates()
|
||||
r2 = self._execute_pending_deletes()
|
||||
except Exception as e:
|
||||
self._reporter.report_generic("Unable to execute Heat command",
|
||||
e.message, "error")
|
||||
trace = sys.exc_info()[2]
|
||||
raise ReportedException(e.message), None, trace
|
||||
r1 = self._execute_pending_updates()
|
||||
r2 = self._execute_pending_deletes()
|
||||
return r1 or r2
|
||||
|
||||
def _execute_pending_updates(self):
|
||||
if not len(self._update_pending_list):
|
||||
return False
|
||||
|
||||
template, arguments = self._get_current_template()
|
||||
stack_exists = (template != {})
|
||||
try:
|
||||
template, arguments = self._get_current_template()
|
||||
stack_exists = (template != {})
|
||||
# do not need to merge with current stack cause we rebuilding it
|
||||
# from scratch on every deployment
|
||||
template, arguments = ({}, {})
|
||||
|
||||
# do not need to merge with current stack cause we rebuilding it from
|
||||
# scratch on every deployment
|
||||
template, arguments = ({}, {})
|
||||
for t in self._update_pending_list:
|
||||
template = muranoconductor.helpers.merge_dicts(template,
|
||||
t['template'])
|
||||
arguments = muranoconductor.helpers.merge_dicts(arguments,
|
||||
t['arguments'],
|
||||
max_levels=1)
|
||||
log.info(
|
||||
'Executing heat template {0} with arguments {1} on stack {2}'
|
||||
.format(anyjson.dumps(template), arguments, self._stack))
|
||||
|
||||
for t in self._update_pending_list:
|
||||
template = muranoconductor.helpers.merge_dicts(
|
||||
template, t['template'])
|
||||
arguments = muranoconductor.helpers.merge_dicts(
|
||||
arguments, t['arguments'], max_levels=1)
|
||||
if stack_exists:
|
||||
self._heat_client.stacks.update(
|
||||
stack_id=self._stack,
|
||||
parameters=arguments,
|
||||
template=template)
|
||||
log.debug(
|
||||
'Waiting for the stack {0} to be update'.format(
|
||||
self._stack))
|
||||
outs = self._wait_state('UPDATE_COMPLETE')
|
||||
log.info('Stack {0} updated'.format(self._stack))
|
||||
else:
|
||||
self._heat_client.stacks.create(
|
||||
stack_name=self._stack,
|
||||
parameters=arguments,
|
||||
template=template)
|
||||
|
||||
log.info(
|
||||
'Executing heat template {0} with arguments {1} on stack {2}'
|
||||
.format(anyjson.dumps(template), arguments, self._stack))
|
||||
log.debug('Waiting for the stack {0} to be create'.format(
|
||||
self._stack))
|
||||
outs = self._wait_state('CREATE_COMPLETE')
|
||||
log.info('Stack {0} created'.format(self._stack))
|
||||
|
||||
if stack_exists:
|
||||
self._heat_client.stacks.update(
|
||||
stack_id=self._stack,
|
||||
parameters=arguments,
|
||||
template=template)
|
||||
log.debug(
|
||||
'Waiting for the stack {0} to be update'.format(self._stack))
|
||||
outs = self._wait_state('UPDATE_COMPLETE')
|
||||
log.info('Stack {0} updated'.format(self._stack))
|
||||
else:
|
||||
self._heat_client.stacks.create(
|
||||
stack_name=self._stack,
|
||||
parameters=arguments,
|
||||
template=template)
|
||||
pending_list = self._update_pending_list
|
||||
self._update_pending_list = []
|
||||
|
||||
log.debug('Waiting for the stack {0} to be create'.format(
|
||||
self._stack))
|
||||
outs = self._wait_state('CREATE_COMPLETE')
|
||||
log.info('Stack {0} created'.format(self._stack))
|
||||
|
||||
pending_list = self._update_pending_list
|
||||
self._update_pending_list = []
|
||||
|
||||
for item in pending_list:
|
||||
item['callback'](outs)
|
||||
|
||||
return True
|
||||
for item in pending_list:
|
||||
item['callback'](outs)
|
||||
return True
|
||||
except Exception as ex:
|
||||
pending_list = self._update_pending_list
|
||||
self._update_pending_list = []
|
||||
for item in pending_list:
|
||||
item['callback'](None, ex)
|
||||
return True
|
||||
|
||||
def _execute_pending_deletes(self):
|
||||
if not len(self._delete_pending_list):
|
||||
|
@ -18,7 +18,8 @@ class WindowsAgentExecutor(CommandBase):
|
||||
self._reporter = reporter
|
||||
rmqclient.declare(self._results_queue)
|
||||
|
||||
def execute(self, template, mappings, unit, service, callback):
|
||||
def execute(self, template, mappings, unit, service, callback,
|
||||
timeout=None):
|
||||
with open('data/templates/agent/%s.template' % template) as t_file:
|
||||
template_data = t_file.read()
|
||||
|
||||
@ -29,7 +30,8 @@ class WindowsAgentExecutor(CommandBase):
|
||||
queue = ('%s-%s-%s' % (self._stack, service, unit)).lower()
|
||||
self._pending_list.append({
|
||||
'id': msg_id,
|
||||
'callback': callback
|
||||
'callback': callback,
|
||||
'timeout': timeout
|
||||
})
|
||||
|
||||
msg = Message()
|
||||
@ -49,15 +51,53 @@ class WindowsAgentExecutor(CommandBase):
|
||||
|
||||
with self._rmqclient.open(self._results_queue) as subscription:
|
||||
while self.has_pending_commands():
|
||||
log.debug("Waiting for responses to be returned by the agent. "
|
||||
"%i total responses remain", len(self._pending_list))
|
||||
msg = subscription.get_message()
|
||||
msg.ack()
|
||||
msg_id = msg.id.lower()
|
||||
item, index = muranoconductor.helpers.find(
|
||||
lambda t: t['id'] == msg_id, self._pending_list)
|
||||
if item:
|
||||
self._pending_list.pop(index)
|
||||
item['callback'](msg.body)
|
||||
|
||||
# TODO: Add extended initialization timeout
|
||||
# By now, all the timeouts are defined by the command input
|
||||
# however, the first reply which we wait for being returned
|
||||
# from the unit may be delayed due to long unit initialization
|
||||
# and startup. So, for the nonitialized units we need to extend
|
||||
# the command's timeout with the initialization timeout
|
||||
timeout = self.get_max_timeout()
|
||||
if timeout:
|
||||
span_message = "for {0} seconds".format(timeout)
|
||||
else:
|
||||
span_message = 'infinitely'
|
||||
log.debug("Waiting %s for responses to be returned"
|
||||
" by the agent. %i total responses remain",
|
||||
span_message, len(self._pending_list))
|
||||
msg = subscription.get_message(timeout=timeout)
|
||||
if msg:
|
||||
msg.ack()
|
||||
msg_id = msg.id.lower()
|
||||
item, index = muranoconductor.helpers.find(
|
||||
lambda t: t['id'] == msg_id, self._pending_list)
|
||||
if item:
|
||||
self._pending_list.pop(index)
|
||||
item['callback'](msg.body)
|
||||
else:
|
||||
while self.has_pending_commands():
|
||||
item = self._pending_list.pop()
|
||||
item['callback'](AgentTimeoutException(timeout))
|
||||
return True
|
||||
|
||||
def get_max_timeout(self):
|
||||
res = 0
|
||||
for item in self._pending_list:
|
||||
if item['timeout'] is None: # if at least 1 item has no timeout
|
||||
return None # then return None (i.e. infinite)
|
||||
res = max(res, item['timeout'])
|
||||
return res
|
||||
|
||||
|
||||
class AgentTimeoutException(Exception):
|
||||
def __init__(self, timeout):
|
||||
self.message = "Unable to receive any response from the agent" \
|
||||
" in {0} sec".format(timeout)
|
||||
self.timeout = timeout
|
||||
|
||||
|
||||
class UnhandledAgentException(Exception):
|
||||
def __init__(self, errors):
|
||||
self.message = "An unhandled exception has " \
|
||||
"occurred in the Agent: {0}".format(errors)
|
||||
self.errors = errors
|
||||
|
@ -12,33 +12,77 @@
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from muranoconductor.commands.windows_agent import AgentTimeoutException
|
||||
from muranoconductor.commands.windows_agent import UnhandledAgentException
|
||||
|
||||
import xml_code_engine
|
||||
|
||||
from openstack.common import log as logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def send_command(engine, context, body, template, service, unit, mappings=None,
|
||||
result=None, **kwargs):
|
||||
result=None, error=None, timeout=None, **kwargs):
|
||||
if not mappings:
|
||||
mappings = {}
|
||||
command_dispatcher = context['/commandDispatcher']
|
||||
if timeout:
|
||||
timeout = int(timeout)
|
||||
|
||||
def callback(result_value):
|
||||
log.info(
|
||||
'Received result from {2} for {0}: {1}'.format(
|
||||
template, result_value, unit))
|
||||
if result is not None:
|
||||
context[result] = result_value['Result']
|
||||
ok = []
|
||||
errors = []
|
||||
if isinstance(result_value, AgentTimeoutException):
|
||||
errors.append({
|
||||
'type': "timeout",
|
||||
'messages': [result_value.message],
|
||||
'timeout': result_value.timeout
|
||||
})
|
||||
else:
|
||||
if result_value['IsException']:
|
||||
msg = "A general exception has occurred in the Agent: " + \
|
||||
result_value['Result']
|
||||
errors.append({
|
||||
'type': "general",
|
||||
'messages': [msg],
|
||||
})
|
||||
|
||||
success_handler = body.find('success')
|
||||
if success_handler is not None:
|
||||
engine.evaluate_content(success_handler, context)
|
||||
else:
|
||||
for res in result_value['Result']:
|
||||
if res['IsException']:
|
||||
errors.append({
|
||||
'type': 'inner',
|
||||
'messages': res['Result']
|
||||
})
|
||||
else:
|
||||
ok.append(res)
|
||||
|
||||
if ok:
|
||||
if result is not None:
|
||||
context[result] = ok
|
||||
success_handler = body.find('success')
|
||||
if success_handler is not None:
|
||||
engine.evaluate_content(success_handler, context)
|
||||
if errors:
|
||||
if error is not None:
|
||||
context[error] = errors
|
||||
failure_handler = body.find('failure')
|
||||
if failure_handler is not None:
|
||||
engine.evaluate_content(failure_handler, context)
|
||||
else:
|
||||
log.error("No failure block found for exception")
|
||||
if isinstance(result_value, AgentTimeoutException):
|
||||
raise result_value
|
||||
else:
|
||||
raise UnhandledAgentException(errors)
|
||||
|
||||
command_dispatcher.execute(
|
||||
name='agent', template=template, mappings=mappings,
|
||||
unit=unit, service=service, callback=callback)
|
||||
unit=unit, service=service, callback=callback, timeout=timeout)
|
||||
|
||||
|
||||
xml_code_engine.XmlCodeEngine.register_function(send_command, "send-command")
|
||||
|
@ -203,6 +203,13 @@ class Workflow(object):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _stop_func(context, body, engine, **kwargs):
|
||||
if not 'temp' in context['/dataSource']:
|
||||
context['/dataSource']['temp'] = {}
|
||||
|
||||
context['/dataSource']['temp']['_stop_requested'] = True
|
||||
|
||||
|
||||
xml_code_engine.XmlCodeEngine.register_function(
|
||||
Workflow._rule_func, 'rule')
|
||||
@ -216,6 +223,9 @@ xml_code_engine.XmlCodeEngine.register_function(
|
||||
xml_code_engine.XmlCodeEngine.register_function(
|
||||
Workflow._select_func, 'select')
|
||||
|
||||
xml_code_engine.XmlCodeEngine.register_function(
|
||||
Workflow._stop_func, 'stop')
|
||||
|
||||
xml_code_engine.XmlCodeEngine.register_function(
|
||||
Workflow._select_all_func, 'select-all')
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user