Graceful error handling via workflow commands
Workflow rules can now define "failure" blocks to handle the various errors and exceptions occurred while executing the actions. These blocks are passed to 'update-cf-stack' and 'send-command' functions, so their callbacks can call them to handle the errors and exceptions. The actual error and exception data may be passed via context in the same way as result is passed to "success" handlers If 'failure" block is skipped, the global exception is risen, which interrupts the workflow execution and reports an unhandled error to the API at the error level. To gracefully stop the workflow execution without throwing an exception, a 'failure' block may define a '<stop/>' command, which interrupts the execution after the end of current loop and without throwing any exceptions These changes allow to handle exceptions and unexpected states while interacting with heat, as well as agent-side exceptions delivered to conductor from Agent. Also, the same approach works for handling a timeout while waiting for task result from the Agent. To support timeouts a 'send-command' function must be passed with a 'timeout' parameter. If it is skipped, the timeout is considered to be infinite. The workflows have been update with failure blocks on all the commands. These blocks contain error-level reporting and <stop/> command to interrupt the flow. No timeouts were set in workflows, so timeout feature is currently inactive for the existing workflows (as the 'inititialization timeout' concept needs to be introduced) Change-Id: Ia791d4656463240ed197bcd90b9d9eae648270af
This commit is contained in:
parent
4a0d31c09e
commit
8203a6ac07
@ -12,7 +12,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating instance <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<update-cf-stack template="Windows">
|
||||
<update-cf-stack template="Windows" error="exception">
|
||||
<parameter name="mappings">
|
||||
<map>
|
||||
<mapping name="instanceName"><select path="state.hostname"/></mapping>
|
||||
@ -40,12 +40,19 @@
|
||||
<parameter name="text">Instance <select path="state.hostname"/> (<select path="name"/>) created</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to deploy instance <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="message" default="unknown Heat error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</update-cf-stack>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'activeDirectory')].units[?(@.temp.instanceName and @.adminPassword and @.adminPassword != @.state.adminPassword)]"
|
||||
desc="Units of AD services which have got instances deployed but the local admin passwords not set yet">
|
||||
<send-command template="SetPassword">
|
||||
<send-command template="SetPassword" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -64,12 +71,19 @@
|
||||
<select path="adminPassword"/>
|
||||
</set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to set admin password on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'activeDirectory' and @.adminPassword and @.adminPassword != @.state.domainAdminPassword)].units[?(@.temp.instanceName and @.isMaster)]"
|
||||
desc="Deployed master-units of AD services for which the domain admin password is not set yet">
|
||||
<send-command template="SetPassword">
|
||||
<send-command template="SetPassword" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -88,6 +102,13 @@
|
||||
<select path="::adminPassword"/>
|
||||
</set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to set domain administrator password on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
@ -97,7 +118,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating Primary Domain Controller on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<send-command template="CreatePrimaryDC">
|
||||
<send-command template="CreatePrimaryDC" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -121,12 +142,19 @@
|
||||
<parameter name="text">Primary Domain Controller created</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to create a Primary DC on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'activeDirectory' and @.state.primaryDc and not @.state.primaryDcIp)].units[?(@.temp.instanceName and @.isMaster)]"
|
||||
desc="Master Units of AD services on which the Primary Domain Controller has been configured but DNS ip has not been asked for">
|
||||
<send-command template="AskDnsIp" result="ip">
|
||||
<send-command template="AskDnsIp" result="ip" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -138,12 +166,19 @@
|
||||
<select source="ip" path="0.Result.0"/>
|
||||
</set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable assign DNS IP on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type != 'activeDirectory')].units[?(@.state.domain and not @.domain)]"
|
||||
desc="Any non-AD services of the environment which has been part of the domain but needs to leave it">
|
||||
<send-command template="LeaveDomain">
|
||||
<send-command template="LeaveDomain" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id" source="unit"/>
|
||||
</parameter>
|
||||
@ -165,6 +200,13 @@
|
||||
</report>
|
||||
<set path="state.domain"><null/></set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unit <select path="state.hostname" source="unit"/> (<select path="name" source="unit"/>) was unable to leave the domain due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
@ -178,7 +220,7 @@
|
||||
</set>
|
||||
<rule desc="Domain controller exists with the assigned DNS IP">
|
||||
<parameter name="match">/$.services[?(@.type == 'activeDirectory' and @.domain == '<select path="domain"/>' and @.state.primaryDcIp)]</parameter>
|
||||
<send-command template="JoinDomain">
|
||||
<send-command template="JoinDomain" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id" source="unit"/>
|
||||
</parameter>
|
||||
@ -212,6 +254,13 @@
|
||||
<parameter name="text">Unit <select path="state.hostname" source="unit"/> (<select path="name" source="unit"/>) has joined domain <select path="domain"/></parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unit <select path="state.hostname" source="unit"/> (<select path="name" source="unit"/>) was unable to join the domain due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
</rule>
|
||||
@ -223,7 +272,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating Secondary Domain Controller on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<send-command template="CreateSecondaryDC">
|
||||
<send-command template="CreateSecondaryDC" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -251,6 +300,17 @@
|
||||
<parameter name="text">Domain <select path="::domain"/> created</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to create Secondary Domain Controller on unit <select path="state.hostname" source="unit"/> (<select path="name" source="unit"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<report entity="service" level="error">
|
||||
<parameter name="id"><select path="::id"/></parameter>
|
||||
<parameter name="text">Unable to create domain <select path="::domain"/></parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
</workflow>
|
@ -12,7 +12,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating instance <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<update-cf-stack template="Windows">
|
||||
<update-cf-stack template="Windows" error="exception">
|
||||
<parameter name="mappings">
|
||||
<map>
|
||||
<mapping name="instanceName"><select path="state.hostname"/></mapping>
|
||||
@ -40,12 +40,19 @@
|
||||
<parameter name="text">Instance <select path="state.hostname"/> (<select path="name"/>) created</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to deploy instance <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="message" default="unknown Heat error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</update-cf-stack>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type in ('webServerFarm', 'aspNetAppFarm'))].units[?(@.state.hostname and not @.temp.registeredWithLB)]"
|
||||
desc="Units of web-farms services which have a hostname assigned but are not registered with LB">
|
||||
<update-cf-stack template="LoadBalancer" result="outputs">
|
||||
<update-cf-stack template="LoadBalancer" result="outputs" error="exception">
|
||||
<parameter name="mappings">
|
||||
<map>
|
||||
<mapping name="instanceName"><select path="state.hostname"/></mapping>
|
||||
@ -57,12 +64,19 @@
|
||||
<set path="temp.registeredWithLB"><true/></set>
|
||||
<set path="::uri">http://<select source="outputs" path="LoadBalancerIP"/>:<select path="::loadBalancerPort"/></set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to create a Server Farm load balancer on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="message" default="unknown Heat error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</update-cf-stack>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type in ('webServer', 'aspNetApp', 'webServerFarm', 'aspNetAppFarm') and @.adminPassword and @.adminPassword != @.state.adminPassword)].units[?(@.temp.instanceName)]"
|
||||
desc="Units of web services which have got an instance deployed but has not got a correct admin password ">
|
||||
<send-command template="SetPassword">
|
||||
<send-command template="SetPassword" error='exception'>
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -81,6 +95,13 @@
|
||||
<select path="::adminPassword"/>
|
||||
</set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to set admin password on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
@ -91,7 +112,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating IIS Web Server on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<send-command template="InstallIIS">
|
||||
<send-command template="InstallIIS" error='exception'>
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -105,6 +126,13 @@
|
||||
<parameter name="text">IIS <select path="state.hostname"/> (<select path="name"/>) has started</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to install IIS on <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
@ -114,7 +142,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Deploying WebApp <select path="::name"/> on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<send-command template="DeployWebApp">
|
||||
<send-command template="DeployWebApp" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -135,6 +163,13 @@
|
||||
<parameter name="text">WebApp <select path="::name"/> has been deployed on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to deploy WebApp on <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
|
@ -43,7 +43,7 @@
|
||||
<parameter name="match">$[?(@.state.domain != '<select path="domain" source="ad"/>')]</parameter>
|
||||
<parameter name="desc">Units which are not part of the target domain but need to join</parameter>
|
||||
|
||||
<send-command template="JoinDomain">
|
||||
<send-command template="JoinDomain" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -79,6 +79,13 @@
|
||||
<parameter name="text">Unit <select path="state.hostname"/> (<select path="name"/>) has joined domain <select path="domain" source="ad"/></parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unit <select path="state.hostname"/> (<select path="name"/>) was unable to join the domain due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
</rule>
|
||||
|
@ -13,7 +13,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating instance <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<update-cf-stack template="Windows">
|
||||
<update-cf-stack template="Windows" error="exception">
|
||||
<parameter name="mappings">
|
||||
<map>
|
||||
<mapping name="instanceName"><select path="state.hostname"/></mapping>
|
||||
@ -41,6 +41,13 @@
|
||||
<parameter name="text">Instance <select path="state.hostname"/> (<select path="name"/>) created</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to deploy instance <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="message" default="unknown Heat error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</update-cf-stack>
|
||||
</rule>
|
||||
|
||||
@ -58,7 +65,7 @@
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer' and @.adminPassword and @.adminPassword != @.state.adminPassword)].units[?(@.temp.instanceName)]"
|
||||
desc="Units of SQL Server Cluster services which have got an instance deployed but has not got a correct admin password">
|
||||
<send-command template="SetPassword">
|
||||
<send-command template="SetPassword" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -77,12 +84,19 @@
|
||||
<select path="::adminPassword"/>
|
||||
</set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to set admin password on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer')].units[?(@.state.domain and not @.state.failoverClusterPrerequisitesInstalled)]"
|
||||
desc="Units of SQL Server Cluster services that are already joined AD domain">
|
||||
<send-command template="SqlServerCluster/FailoverClusterPrerequisites">
|
||||
<send-command template="SqlServerCluster/FailoverClusterPrerequisites" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -109,12 +123,19 @@
|
||||
<parameter name="text">Failover cluster prerequisites installed on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to install prerequisites on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer' and not @.state.failoverClusterCreated)].units[?(@.state.failoverClusterPrerequisitesInstalled)]" limit="1"
|
||||
desc="First unit of SQL Server Cluster services that is already has failover cluster prerequisites installed">
|
||||
<send-command template="SqlServerCluster/FailoverCluster">
|
||||
<send-command template="SqlServerCluster/FailoverCluster" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -147,12 +168,19 @@
|
||||
<parameter name="text">Failover cluster created for SQL Server Cluster service (<select path="::name"/>)</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="Service" level="error">
|
||||
<parameter name="id"><select path="::id"/></parameter>
|
||||
<parameter name="text">Unable to create failover cluster for SQL Server Service <select path="::name"/> due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer' and @.state.failoverClusterCreated and not @.state.agEnvironmentConfigured)].units[*]" limit="1"
|
||||
desc="First unit of SQL Server Cluster services that is already has failover cluster created">
|
||||
<send-command template="SqlServerCluster/ConfigureEnvironmentForAOAG">
|
||||
<send-command template="SqlServerCluster/ConfigureEnvironmentForAOAG" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -188,12 +216,19 @@
|
||||
<parameter name="text">Environment for AlwaysOn Availability Group of SQL Server Cluster service (<select path="::name"/>) configured</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="service" level="error">
|
||||
<parameter name="id"><select path="::id"/></parameter>
|
||||
<parameter name="text">Unable to configure the environment for AlwaysOn Availability Group of SQL Server Cluster service (<select path="::name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer' and @.state.agEnvironmentConfigured)].units[?(@.state.failoverClusterPrerequisitesInstalled and not @.state.sqlServerInstalled)]"
|
||||
desc="All units of SQL Server Cluster services that is already has environment configured">
|
||||
<send-command template="SqlServerCluster/InstallSqlServerForAOAG">
|
||||
<send-command template="SqlServerCluster/InstallSqlServerForAOAG" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -220,12 +255,19 @@
|
||||
<parameter name="text">SQL Server installed on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to install SQL Server on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer')].units[?(@.state.sqlServerInstalled and not @.state.alwaysOnInitialized)]"
|
||||
desc="All units of SQL Server Cluster services that has SQL Server installed">
|
||||
<send-command template="SqlServerCluster/InitializeAlwaysOn">
|
||||
<send-command template="SqlServerCluster/InitializeAlwaysOn" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -258,12 +300,19 @@
|
||||
<parameter name="text">AlwaysOn AG initialized for <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to initialize AlwaysOn AG for <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer')].units[?(@.state.alwaysOnInitialized and not @.state.primaryReplicaInitialized)]"
|
||||
desc="All units of SQL Server Cluster services that has AlwaysOn initialized">
|
||||
<send-command template="SqlServerCluster/InitializeAOAGPrimaryReplica">
|
||||
<send-command template="SqlServerCluster/InitializeAOAGPrimaryReplica" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -311,12 +360,19 @@
|
||||
<parameter name="text">Primary replica for SQL Server AG initialized for <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to initialize primary replica for SQL Server AG for <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlClusterServer')].units[?(@.state.primaryReplicaInitialized and not @.state.secondaryReplicaInitialized)]"
|
||||
desc="All units of SQL Server Cluster services that has primary replica initialized">
|
||||
<send-command template="SqlServerCluster/InitializeAOAGSecondaryReplica">
|
||||
<send-command template="SqlServerCluster/InitializeAOAGSecondaryReplica" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -349,6 +405,13 @@
|
||||
<parameter name="text">Secondary replica for SQL Server AG initialized for <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to initialize secondary replica for SQL Server AG for <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating instance <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<update-cf-stack template="Windows">
|
||||
<update-cf-stack template="Windows" error="exception">
|
||||
<parameter name="mappings">
|
||||
<map>
|
||||
<mapping name="instanceName"><select path="state.hostname"/></mapping>
|
||||
@ -40,12 +40,19 @@
|
||||
<parameter name="text">Instance <select path="state.hostname"/> (<select path="name"/>) created</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to deploy instance <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="message" default="unknown Heat error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</update-cf-stack>
|
||||
</rule>
|
||||
|
||||
<rule match="$.services[?(@.type == 'msSqlServer' and @.adminPassword and @.adminPassword != @.state.adminPassword)].units[?(@.temp.instanceName)]"
|
||||
desc="Units of SQL Server services which have got an instance deployed but has not got a correct admin password">
|
||||
<send-command template="SetPassword">
|
||||
<send-command template="SetPassword" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -64,6 +71,13 @@
|
||||
<select path="::adminPassword"/>
|
||||
</set>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to set admin password on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
@ -74,7 +88,7 @@
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Creating MS SQL Server on unit <select path="state.hostname"/> (<select path="name"/>)</parameter>
|
||||
</report>
|
||||
<send-command template="InstallMsSqlServer">
|
||||
<send-command template="InstallMsSqlServer" error="exception">
|
||||
<parameter name="unit">
|
||||
<select path="id"/>
|
||||
</parameter>
|
||||
@ -99,6 +113,13 @@
|
||||
<parameter name="text">MS SQL Server <select path="state.hostname"/> (<select path="name"/>) has started</parameter>
|
||||
</report>
|
||||
</success>
|
||||
<failure>
|
||||
<report entity="unit" level="error">
|
||||
<parameter name="id"><select path="id"/></parameter>
|
||||
<parameter name="text">Unable to install MS SQL Server on unit <select path="state.hostname"/> (<select path="name"/>) due to <select source="exception" path="0.messages.0" default="unknown Agent error"/> </parameter>
|
||||
</report>
|
||||
<stop/>
|
||||
</failure>
|
||||
</send-command>
|
||||
</rule>
|
||||
|
||||
|
@ -77,7 +77,6 @@ class ConductorWorkflowService(service.Service):
|
||||
def _task_received(self, message):
|
||||
task = message.body or {}
|
||||
message_id = message.id
|
||||
reporter = None
|
||||
with self.create_rmq_client() as mq:
|
||||
try:
|
||||
log.info('Starting processing task {0}: {1}'.format(
|
||||
@ -96,7 +95,8 @@ class ConductorWorkflowService(service.Service):
|
||||
reporter)
|
||||
workflows.append(workflow)
|
||||
|
||||
while True:
|
||||
stop = False
|
||||
while not stop:
|
||||
try:
|
||||
while True:
|
||||
result = False
|
||||
@ -112,17 +112,18 @@ class ConductorWorkflowService(service.Service):
|
||||
log.debug("No pending commands found, "
|
||||
"seems like we are done")
|
||||
break
|
||||
if self.check_stop_requested(task):
|
||||
log.info("Workflow stop requested")
|
||||
stop = True
|
||||
except Exception as ex:
|
||||
reporter.report_generic(
|
||||
"Unexpected error has occurred", ex.message,
|
||||
'error')
|
||||
log.exception(ex)
|
||||
break
|
||||
command_dispatcher.close()
|
||||
except reporting.ReportedException as e:
|
||||
log.exception("Exception has occurred and was reported to API")
|
||||
except Exception as e:
|
||||
log.exception("Unexpected exception has occurred")
|
||||
if reporter:
|
||||
reporter.report_generic("Unexpected error has occurred",
|
||||
e.message, 'error')
|
||||
if stop:
|
||||
log.info("Workflow stopped by 'stop' command")
|
||||
finally:
|
||||
self.cleanup(task, reporter)
|
||||
result_msg = Message()
|
||||
@ -156,3 +157,9 @@ class ConductorWorkflowService(service.Service):
|
||||
if reporter:
|
||||
reporter.report_generic("Unexpected error has occurred",
|
||||
e.message, 'error')
|
||||
|
||||
def check_stop_requested(self, model):
|
||||
if 'temp' in model:
|
||||
if '_stop_requested' in model['temp']:
|
||||
return model['temp']['_stop_requested']
|
||||
return False
|
||||
|
@ -20,14 +20,35 @@ import string
|
||||
import time
|
||||
|
||||
import xml_code_engine
|
||||
from openstack.common import log as logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def update_cf_stack(engine, context, body, template, result=None, **kwargs):
|
||||
def update_cf_stack(engine, context, body, template, result=None, error=None,
|
||||
**kwargs):
|
||||
command_dispatcher = context['/commandDispatcher']
|
||||
|
||||
def callback(result_value):
|
||||
def callback(result_value, error_result=None):
|
||||
if result is not None:
|
||||
context[result] = result_value
|
||||
|
||||
if error_result is not None:
|
||||
if error is not None:
|
||||
context[error] = {
|
||||
'message': getattr(error_result, 'message', None),
|
||||
'strerror': getattr(error_result, 'strerror', None),
|
||||
'timestamp': time.time()
|
||||
}
|
||||
failure_handler = body.find('failure')
|
||||
if failure_handler is not None:
|
||||
log.warning("Handling exception in failure block")
|
||||
engine.evaluate_content(failure_handler, context)
|
||||
return
|
||||
else:
|
||||
log.error("No failure block found for exception")
|
||||
raise error_result
|
||||
|
||||
success_handler = body.find('success')
|
||||
if success_handler is not None:
|
||||
engine.evaluate_content(success_handler, context)
|
||||
|
@ -93,64 +93,64 @@ class HeatExecutor(CommandBase):
|
||||
self._delete_pending_list) > 0
|
||||
|
||||
def execute_pending(self):
|
||||
try:
|
||||
r1 = self._execute_pending_updates()
|
||||
r2 = self._execute_pending_deletes()
|
||||
except Exception as e:
|
||||
self._reporter.report_generic("Unable to execute Heat command",
|
||||
e.message, "error")
|
||||
trace = sys.exc_info()[2]
|
||||
raise ReportedException(e.message), None, trace
|
||||
r1 = self._execute_pending_updates()
|
||||
r2 = self._execute_pending_deletes()
|
||||
return r1 or r2
|
||||
|
||||
def _execute_pending_updates(self):
|
||||
if not len(self._update_pending_list):
|
||||
return False
|
||||
|
||||
template, arguments = self._get_current_template()
|
||||
stack_exists = (template != {})
|
||||
try:
|
||||
template, arguments = self._get_current_template()
|
||||
stack_exists = (template != {})
|
||||
# do not need to merge with current stack cause we rebuilding it
|
||||
# from scratch on every deployment
|
||||
template, arguments = ({}, {})
|
||||
|
||||
# do not need to merge with current stack cause we rebuilding it from
|
||||
# scratch on every deployment
|
||||
template, arguments = ({}, {})
|
||||
for t in self._update_pending_list:
|
||||
template = muranoconductor.helpers.merge_dicts(template,
|
||||
t['template'])
|
||||
arguments = muranoconductor.helpers.merge_dicts(arguments,
|
||||
t['arguments'],
|
||||
max_levels=1)
|
||||
log.info(
|
||||
'Executing heat template {0} with arguments {1} on stack {2}'
|
||||
.format(anyjson.dumps(template), arguments, self._stack))
|
||||
|
||||
for t in self._update_pending_list:
|
||||
template = muranoconductor.helpers.merge_dicts(
|
||||
template, t['template'])
|
||||
arguments = muranoconductor.helpers.merge_dicts(
|
||||
arguments, t['arguments'], max_levels=1)
|
||||
if stack_exists:
|
||||
self._heat_client.stacks.update(
|
||||
stack_id=self._stack,
|
||||
parameters=arguments,
|
||||
template=template)
|
||||
log.debug(
|
||||
'Waiting for the stack {0} to be update'.format(
|
||||
self._stack))
|
||||
outs = self._wait_state('UPDATE_COMPLETE')
|
||||
log.info('Stack {0} updated'.format(self._stack))
|
||||
else:
|
||||
self._heat_client.stacks.create(
|
||||
stack_name=self._stack,
|
||||
parameters=arguments,
|
||||
template=template)
|
||||
|
||||
log.info(
|
||||
'Executing heat template {0} with arguments {1} on stack {2}'
|
||||
.format(anyjson.dumps(template), arguments, self._stack))
|
||||
log.debug('Waiting for the stack {0} to be create'.format(
|
||||
self._stack))
|
||||
outs = self._wait_state('CREATE_COMPLETE')
|
||||
log.info('Stack {0} created'.format(self._stack))
|
||||
|
||||
if stack_exists:
|
||||
self._heat_client.stacks.update(
|
||||
stack_id=self._stack,
|
||||
parameters=arguments,
|
||||
template=template)
|
||||
log.debug(
|
||||
'Waiting for the stack {0} to be update'.format(self._stack))
|
||||
outs = self._wait_state('UPDATE_COMPLETE')
|
||||
log.info('Stack {0} updated'.format(self._stack))
|
||||
else:
|
||||
self._heat_client.stacks.create(
|
||||
stack_name=self._stack,
|
||||
parameters=arguments,
|
||||
template=template)
|
||||
pending_list = self._update_pending_list
|
||||
self._update_pending_list = []
|
||||
|
||||
log.debug('Waiting for the stack {0} to be create'.format(
|
||||
self._stack))
|
||||
outs = self._wait_state('CREATE_COMPLETE')
|
||||
log.info('Stack {0} created'.format(self._stack))
|
||||
|
||||
pending_list = self._update_pending_list
|
||||
self._update_pending_list = []
|
||||
|
||||
for item in pending_list:
|
||||
item['callback'](outs)
|
||||
|
||||
return True
|
||||
for item in pending_list:
|
||||
item['callback'](outs)
|
||||
return True
|
||||
except Exception as ex:
|
||||
pending_list = self._update_pending_list
|
||||
self._update_pending_list = []
|
||||
for item in pending_list:
|
||||
item['callback'](None, ex)
|
||||
return True
|
||||
|
||||
def _execute_pending_deletes(self):
|
||||
if not len(self._delete_pending_list):
|
||||
|
@ -18,7 +18,8 @@ class WindowsAgentExecutor(CommandBase):
|
||||
self._reporter = reporter
|
||||
rmqclient.declare(self._results_queue)
|
||||
|
||||
def execute(self, template, mappings, unit, service, callback):
|
||||
def execute(self, template, mappings, unit, service, callback,
|
||||
timeout=None):
|
||||
with open('data/templates/agent/%s.template' % template) as t_file:
|
||||
template_data = t_file.read()
|
||||
|
||||
@ -29,7 +30,8 @@ class WindowsAgentExecutor(CommandBase):
|
||||
queue = ('%s-%s-%s' % (self._stack, service, unit)).lower()
|
||||
self._pending_list.append({
|
||||
'id': msg_id,
|
||||
'callback': callback
|
||||
'callback': callback,
|
||||
'timeout': timeout
|
||||
})
|
||||
|
||||
msg = Message()
|
||||
@ -49,15 +51,53 @@ class WindowsAgentExecutor(CommandBase):
|
||||
|
||||
with self._rmqclient.open(self._results_queue) as subscription:
|
||||
while self.has_pending_commands():
|
||||
log.debug("Waiting for responses to be returned by the agent. "
|
||||
"%i total responses remain", len(self._pending_list))
|
||||
msg = subscription.get_message()
|
||||
msg.ack()
|
||||
msg_id = msg.id.lower()
|
||||
item, index = muranoconductor.helpers.find(
|
||||
lambda t: t['id'] == msg_id, self._pending_list)
|
||||
if item:
|
||||
self._pending_list.pop(index)
|
||||
item['callback'](msg.body)
|
||||
|
||||
# TODO: Add extended initialization timeout
|
||||
# By now, all the timeouts are defined by the command input
|
||||
# however, the first reply which we wait for being returned
|
||||
# from the unit may be delayed due to long unit initialization
|
||||
# and startup. So, for the nonitialized units we need to extend
|
||||
# the command's timeout with the initialization timeout
|
||||
timeout = self.get_max_timeout()
|
||||
if timeout:
|
||||
span_message = "for {0} seconds".format(timeout)
|
||||
else:
|
||||
span_message = 'infinitely'
|
||||
log.debug("Waiting %s for responses to be returned"
|
||||
" by the agent. %i total responses remain",
|
||||
span_message, len(self._pending_list))
|
||||
msg = subscription.get_message(timeout=timeout)
|
||||
if msg:
|
||||
msg.ack()
|
||||
msg_id = msg.id.lower()
|
||||
item, index = muranoconductor.helpers.find(
|
||||
lambda t: t['id'] == msg_id, self._pending_list)
|
||||
if item:
|
||||
self._pending_list.pop(index)
|
||||
item['callback'](msg.body)
|
||||
else:
|
||||
while self.has_pending_commands():
|
||||
item = self._pending_list.pop()
|
||||
item['callback'](AgentTimeoutException(timeout))
|
||||
return True
|
||||
|
||||
def get_max_timeout(self):
|
||||
res = 0
|
||||
for item in self._pending_list:
|
||||
if item['timeout'] is None: # if at least 1 item has no timeout
|
||||
return None # then return None (i.e. infinite)
|
||||
res = max(res, item['timeout'])
|
||||
return res
|
||||
|
||||
|
||||
class AgentTimeoutException(Exception):
|
||||
def __init__(self, timeout):
|
||||
self.message = "Unable to receive any response from the agent" \
|
||||
" in {0} sec".format(timeout)
|
||||
self.timeout = timeout
|
||||
|
||||
|
||||
class UnhandledAgentException(Exception):
|
||||
def __init__(self, errors):
|
||||
self.message = "An unhandled exception has " \
|
||||
"occurred in the Agent: {0}".format(errors)
|
||||
self.errors = errors
|
||||
|
@ -12,33 +12,77 @@
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from muranoconductor.commands.windows_agent import AgentTimeoutException
|
||||
from muranoconductor.commands.windows_agent import UnhandledAgentException
|
||||
|
||||
import xml_code_engine
|
||||
|
||||
from openstack.common import log as logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def send_command(engine, context, body, template, service, unit, mappings=None,
|
||||
result=None, **kwargs):
|
||||
result=None, error=None, timeout=None, **kwargs):
|
||||
if not mappings:
|
||||
mappings = {}
|
||||
command_dispatcher = context['/commandDispatcher']
|
||||
if timeout:
|
||||
timeout = int(timeout)
|
||||
|
||||
def callback(result_value):
|
||||
log.info(
|
||||
'Received result from {2} for {0}: {1}'.format(
|
||||
template, result_value, unit))
|
||||
if result is not None:
|
||||
context[result] = result_value['Result']
|
||||
ok = []
|
||||
errors = []
|
||||
if isinstance(result_value, AgentTimeoutException):
|
||||
errors.append({
|
||||
'type': "timeout",
|
||||
'messages': [result_value.message],
|
||||
'timeout': result_value.timeout
|
||||
})
|
||||
else:
|
||||
if result_value['IsException']:
|
||||
msg = "A general exception has occurred in the Agent: " + \
|
||||
result_value['Result']
|
||||
errors.append({
|
||||
'type': "general",
|
||||
'messages': [msg],
|
||||
})
|
||||
|
||||
success_handler = body.find('success')
|
||||
if success_handler is not None:
|
||||
engine.evaluate_content(success_handler, context)
|
||||
else:
|
||||
for res in result_value['Result']:
|
||||
if res['IsException']:
|
||||
errors.append({
|
||||
'type': 'inner',
|
||||
'messages': res['Result']
|
||||
})
|
||||
else:
|
||||
ok.append(res)
|
||||
|
||||
if ok:
|
||||
if result is not None:
|
||||
context[result] = ok
|
||||
success_handler = body.find('success')
|
||||
if success_handler is not None:
|
||||
engine.evaluate_content(success_handler, context)
|
||||
if errors:
|
||||
if error is not None:
|
||||
context[error] = errors
|
||||
failure_handler = body.find('failure')
|
||||
if failure_handler is not None:
|
||||
engine.evaluate_content(failure_handler, context)
|
||||
else:
|
||||
log.error("No failure block found for exception")
|
||||
if isinstance(result_value, AgentTimeoutException):
|
||||
raise result_value
|
||||
else:
|
||||
raise UnhandledAgentException(errors)
|
||||
|
||||
command_dispatcher.execute(
|
||||
name='agent', template=template, mappings=mappings,
|
||||
unit=unit, service=service, callback=callback)
|
||||
unit=unit, service=service, callback=callback, timeout=timeout)
|
||||
|
||||
|
||||
xml_code_engine.XmlCodeEngine.register_function(send_command, "send-command")
|
||||
|
@ -203,6 +203,13 @@ class Workflow(object):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _stop_func(context, body, engine, **kwargs):
|
||||
if not 'temp' in context['/dataSource']:
|
||||
context['/dataSource']['temp'] = {}
|
||||
|
||||
context['/dataSource']['temp']['_stop_requested'] = True
|
||||
|
||||
|
||||
xml_code_engine.XmlCodeEngine.register_function(
|
||||
Workflow._rule_func, 'rule')
|
||||
@ -216,6 +223,9 @@ xml_code_engine.XmlCodeEngine.register_function(
|
||||
xml_code_engine.XmlCodeEngine.register_function(
|
||||
Workflow._select_func, 'select')
|
||||
|
||||
xml_code_engine.XmlCodeEngine.register_function(
|
||||
Workflow._stop_func, 'stop')
|
||||
|
||||
xml_code_engine.XmlCodeEngine.register_function(
|
||||
Workflow._select_all_func, 'select-all')
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user