diff --git a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/ExnConnector.java b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/ExnConnector.java index cfaed92..74035ef 100644 --- a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/ExnConnector.java +++ b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/ExnConnector.java @@ -94,6 +94,8 @@ public class ExnConnector { public final SyncedPublisher scaleOut; /** The scaleIn endpoint. */ public final SyncedPublisher scaleIn; + /** The deleteCluster endpoint. */ + public final SyncedPublisher deleteCluster; /** * Create a connection to ActiveMQ via the exn middleware, and set up the @@ -119,6 +121,7 @@ public class ExnConnector { deployApplication = new SyncedPublisher("deployApplication", "eu.nebulouscloud.exn.sal.cluster.deployapplication", true, true); scaleOut = new SyncedPublisher("scaleOut", "eu.nebulouscloud.exn.sal.cluster.scaleout", true, true); scaleIn = new SyncedPublisher("scaleIn", "eu.nebulouscloud.exn.sal.cluster.scalein", true, true); + deleteCluster = new SyncedPublisher("deployCluster", "eu.nebulouscloud.exn.sal.cluster.delete", true, true); conn = new Connector("optimiser_controller", callback, @@ -131,7 +134,8 @@ public class ExnConnector { deployCluster, deployApplication, scaleOut, - scaleIn), + scaleIn, + deleteCluster), List.of( new Consumer("ui_app_messages", app_creation_channel, new AppCreationMessageHandler(), true, true), @@ -481,7 +485,10 @@ public class ExnConnector { } /** - * Deploy a cluster created by {@link #defineCluster}. + * Deploy a cluster created by {@link #defineCluster}. Note that the call + * will return before the cluster is ready, i.e., {@link #getCluster} must + * be checked before trying to call {@link #labelNodes} or {@link + * #deployApplication}. * * @param appID The application's id, used for logging only. * @param clusterName The name of the cluster. @@ -566,7 +573,6 @@ public class ExnConnector { * @return true if the call was successful, false otherwise. */ public boolean scaleIn(String appID, List superfluousNodes) { - // NOTE: not yet defined in // https://openproject.nebulouscloud.eu/projects/nebulous-collaboration-hub/wiki/deployment-manager-sal-1#specification-of-endpoints-being-developed ArrayNode body = mapper.createArrayNode(); superfluousNodes.forEach(nodeName -> body.add(nodeName)); @@ -584,5 +590,20 @@ public class ExnConnector { return payload.asBoolean(); } + /** + * Delete a cluster created by {@link #defineCluster}. + * + * @param appID The application's id, used for logging only. + * @param clusterName The name of the cluster. + * @return true if the cluster was successfully deleted, false otherwise. + */ + public boolean deleteCluster(String appID, String clusterName) { + // https://openproject.nebulouscloud.eu/projects/nebulous-collaboration-hub/wiki/deployment-manager-sal-1#specification-of-endpoints-being-developed + Map msg = Map.of("metaData", + Map.of("user", "admin", "clusterName", clusterName)); + Map response = deleteCluster.sendSync(msg, appID, null, false); + JsonNode payload = extractPayloadFromExnResponse(response, appID, "deleteCluster"); + return payload.asBoolean(); + } } diff --git a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousApp.java b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousApp.java index 4764db5..fad37b7 100644 --- a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousApp.java +++ b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousApp.java @@ -13,6 +13,7 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import eu.nebulouscloud.exn.core.Publisher; import lombok.Getter; import lombok.Setter; +import lombok.Synchronized; import lombok.extern.slf4j.Slf4j; import static net.logstash.logback.argument.StructuredArguments.keyValue; @@ -60,6 +61,36 @@ public class NebulousApp { */ @Getter private String clusterName; + /** + * The application status. + * + *

NEW: The application has been created from the GUI and is waiting + * for the performance indicators. + * + *

READY: The application is ready for deployment. + * + *

DEPLOYING: The application is being deployed or redeployed. + * + *

SOLVER_WAITING: The application is deployed, we're waiting for the + * solver to be ready so we can send AMPL and performance indicators. + * + *

RUNNING: The application is running, and under redeployment. + * + *

FAILED: The application is in an invalid state: one or more messages + * could not be parsed, or deployment or redeployment failed. + */ + public enum State { + NEW, + READY, + DEPLOYING, + SOLVER_WAITING, + RUNNING, + FAILED; + } + + @Getter + private State state; + // ---------------------------------------- // App message parsing stuff @@ -114,37 +145,38 @@ public class NebulousApp { * to 1, each subsequent redeployment increases by 1. This value is used * to name node instances generated during that deployment. */ - @Getter @Setter + @Getter private int deployGeneration = 0; /** - * Map of component name to node name(s) deployed for that component. - * Component names are defined in the KubeVela file. We assume that - * component names stay constant during redeployment, i.e., once an - * application is deployed, its KubeVela file will not change. + * Unmodifiable map of component name to node name(s) deployed for that + * component. Component names are defined in the KubeVela file. We + * assume that component names stay constant during redeployment, i.e., + * once an application is deployed, its KubeVela file will not change. * * Note that this map does not include the master node, since this is not * specified in KubeVela. */ @Getter - private Map> componentNodeNames = new HashMap<>(); + private Map> componentNodeNames = Map.of(); /** - * Map from node name to deployed edge or BYON node candidate. We keep - * track of assigned edge candidates, since we do not want to - * doubly-assign edge nodes. We also store the node name, so we can - * "free" the edge candidate when the current component gets redeployed - * and lets go of its edge node. (We do not track cloud node candidates - * since these can be instantiated multiple times.) + * Unmodifiable map from node name to deployed edge or BYON node + * candidate. We keep track of assigned edge candidates, since we do not + * want to doubly-assign edge nodes. We also store the node name, so we + * can "free" the edge candidate when the current component gets + * redeployed and lets go of its edge node. (We do not track cloud node + * candidates since these can be instantiated multiple times.) */ @Getter - private Map nodeEdgeCandidates = new HashMap<>(); - /** Map of component name to its requirements, as currently deployed. - * Each replica of a component has identical requirements. */ - @Getter @Setter - private Map> componentRequirements = new HashMap<>(); - /** Map of component name to its replica count, as currently deployed. */ - @Getter @Setter - private Map componentReplicaCounts = new HashMap<>(); + private Map nodeEdgeCandidates = Map.of(); + /** Unmodifiable map of component name to its requirements, as currently + * deployed. Each replica of a component has identical requirements. */ + @Getter + private Map> componentRequirements = Map.of(); + /** Unmodifiable map of component name to its replica count, as currently + * deployed. */ + @Getter + private Map componentReplicaCounts = Map.of(); /** When an app gets deployed, this is where we send the AMPL file */ private Publisher ampl_message_channel; @@ -153,15 +185,15 @@ public class NebulousApp { // private boolean deployed = false; /** The KubeVela as it was most recently sent to the app's controller. */ - @Getter @Setter + @Getter private JsonNode deployedKubevela; /** For each KubeVela component, the number of deployed nodes. All nodes - * will be identical wrt machine type etc. */ - @Getter @Setter - private Map deployedNodeCounts; - /** For each KubeVela component, the requirements for its node(s). */ - @Getter @Setter - private Map> deployedNodeRequirements; + * will be identical wrt machine type etc. Unmodifiable map. */ + @Getter + private Map deployedNodeCounts = Map.of(); + /** For each KubeVela component, the requirements for its node(s). Unmodifiable map. */ + @Getter + private Map> deployedNodeRequirements = Map.of(); /** * The EXN connector for this class. At the moment all apps share the @@ -183,6 +215,7 @@ public class NebulousApp { public NebulousApp(JsonNode app_message, ObjectNode kubevela, ExnConnector exnConnector) { this.UUID = app_message.at(uuid_path).textValue(); this.name = app_message.at(name_path).textValue(); + this.state = State.READY; this.clusterName = NebulousApps.calculateUniqueClusterName(this.UUID); this.originalAppMessage = app_message; this.originalKubevela = kubevela; @@ -286,6 +319,46 @@ public class NebulousApp { } } + /** + * Set the state from READY to DEPLOYING, and increment the generation. + * + * @return false if deployment could not be started, true otherwise. + */ + @Synchronized + public boolean setStateDeploying() { + if (state != State.READY) { + return false; + } else { + state = State.DEPLOYING; + deployGeneration++; + return true; + } + } + /** Set state from DEPLOYING to RUNNING and update app cluster information. + * @return false if not in state deploying, otherwise true. */ + @Synchronized + public boolean setStateDeploymentFinished(Map> componentRequirements, Map nodeCounts, Map> componentNodeNames, Map nodeEdgeCandidates, JsonNode deployedKubevela) { + if (state != State.DEPLOYING) { + return false; + } else { + // We keep all state read-only so we cannot modify the app object + // before we know deployment is successful + this.componentRequirements = Map.copyOf(componentRequirements); + this.componentReplicaCounts = Map.copyOf(nodeCounts); + this.componentNodeNames = Map.copyOf(componentNodeNames); + this.deployedKubevela = deployedKubevela; + this.nodeEdgeCandidates = Map.copyOf(nodeEdgeCandidates); + state = State.RUNNING; + return true; + } + } + + /** Set state unconditionally to FAILED. No more state changes will be + * possible once the state is set to FAILED. */ + public void setStateFailed() { + state = State.FAILED; + } + /** Utility function to parse a KubeVela string. Can be used from jshell. */ public static JsonNode readKubevelaString(String kubevela) throws JsonMappingException, JsonProcessingException { return yamlMapper.readTree(kubevela); @@ -391,6 +464,11 @@ public class NebulousApp { /** * Calculate AMPL file and send it off to the solver. + * + *

TODO: this should be done once from a message handler that listens + * for an incoming "solver ready" message + * + *

TODO: also send performance indicators to solver here */ public void sendAMPL() { String ampl = AMPLGenerator.generateAMPL(this); diff --git a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousAppDeployer.java b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousAppDeployer.java index 1be402f..8a3c394 100644 --- a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousAppDeployer.java +++ b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousAppDeployer.java @@ -21,8 +21,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; -import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator; - import lombok.extern.slf4j.Slf4j; import static net.logstash.logback.argument.StructuredArguments.keyValue; @@ -145,6 +143,36 @@ public class NebulousAppDeployer { .allMatch(node -> node.get("state").asText().equals("Finished")); } + /** + * Wait until all nodes in cluster are in state "Finished". + * + *

Note: Cluster deployment includes provisioning and booting VMs, + * installing various software packages, bringing up a Kubernetes cluster + * and installing the NebulOuS runtime. This can take some minutes. + */ + private static boolean waitForClusterDeploymentFinished(ExnConnector conn, String clusterName, String appUUID) { + // TODO: find out what state node(s) or the whole cluster are in when + // cluster start fails, and return false in that case. + JsonNode clusterState = conn.getCluster(clusterName); + while (clusterState == null || !isClusterDeploymentFinished(clusterState)) { + log.info("Waiting for cluster deployment to finish...", + keyValue("appId", appUUID), keyValue("clusterName", clusterName), + keyValue("clusterState", clusterState)); + try { + Thread.sleep(10000); + } catch (InterruptedException e1) { + // ignore + } + // TODO: distinguish between clusterState==null because SAL hasn't + // set up its datastructures yet, and clusterState==null because + // the call to getCluster failed. In the latter case we want to + // abort (because someone has deleted the cluster), in the former + // case we want to continue. + clusterState = conn.getCluster(clusterName); + } + return true; + } + /** * Given a KubeVela file, extract node requirements, create the job, start * its nodes and submit KubeVela. @@ -152,45 +180,52 @@ public class NebulousAppDeployer { *

NOTE: this method modifies the NebulousApp object state, storing * various facts about the deployed cluster. * - *

NOTE: this method is under reconstruction, pending the new - * endpoints. - * * @param app The NebulOuS app object. * @param kubevela the KubeVela file to deploy. */ public static void deployApplication(NebulousApp app, JsonNode kubevela) { String appUUID = app.getUUID(); String clusterName = app.getClusterName(); + if (!app.setStateDeploying()) { + // TODO: wait until we got the performance indicators from Marta + log.error("Trying to deploy app that is in state {} (should be READY), aborting deployment", + app.getState().name(), + keyValue("appId", appUUID), keyValue("clusterName", clusterName)); + app.setStateFailed(); + return; + } // The application name is typed in by the user, and is used // internally by SAL as an unquoted filename in a generated shell // script. It shouldn't be this way but it is what it is. String safeAppName = app.getName().replaceAll("[^a-zA-Z0-9-_]", "_"); ExnConnector conn = app.getExnConnector(); - log.info("Starting initial deployment for application", keyValue("appId", appUUID), keyValue("clusterName", clusterName)); + log.info("Starting initial deployment for application", + keyValue("appId", appUUID), keyValue("clusterName", clusterName)); - int deployGeneration = app.getDeployGeneration() + 1; - app.setDeployGeneration(deployGeneration); // The overall flow: // - // 1. Extract node requirements and node counts from the KubeVela - // definition. - // 2. Ask resource broker for node candidates for all components and the - // controller. - // 3. Select node candidates, making sure to only select edge nodes - // once. - // 4. Create a SAL cluster. - // 5. Deploy the SAL cluster. - // 6. Add node affinity traits to the KubeVela file. - // 7. Deploy the SAL application. - // 8. Store cluster state (deployed KubeVela file, etc.) in - // NebulousApp object. + // - Extract node requirements and node counts from the KubeVela + // definition. + // - Rewrite KubeVela: remove performance requirements, add affinity + // traits + // - Ask resource broker for node candidates for all components and the + // controller. + // - Select node candidates, making sure to only select edge nodes + // once. + // - Create a SAL cluster. + // - Deploy the SAL cluster. + // - Add node affinity traits to the KubeVela file. + // - Deploy the SAL application. + // - Store cluster state (deployed KubeVela file, etc.) in + // NebulousApp object. // ------------------------------------------------------------ - // 1. Extract node requirements + // Extract node requirements Map> componentRequirements = KubevelaAnalyzer.getClampedRequirements(kubevela); Map nodeCounts = KubevelaAnalyzer.getNodeCount(kubevela); List controllerRequirements = getControllerRequirements(appUUID); + // HACK: do this only when cloud id = nrec componentRequirements.forEach( (k, reqs) -> reqs.add(new AttributeRequirement("location", "name", RequirementOperator.EQ, "bgo"))); @@ -198,49 +233,68 @@ public class NebulousAppDeployer { Main.logFile("component-counts-" + appUUID + ".txt", nodeCounts); Main.logFile("controller-requirements-" + appUUID + ".txt", controllerRequirements); - // ---------------------------------------- - // 2. Find node candidates + // ------------------------------------------------------------ + // Rewrite KubeVela + JsonNode rewritten = createDeploymentKubevela(kubevela); + String rewritten_kubevela = "---\n# Did not manage to create rewritten KubeVela"; + try { + rewritten_kubevela = yamlMapper.writeValueAsString(rewritten); + } catch (JsonProcessingException e) { + log.error("Failed to convert KubeVela to YAML; this should never happen", + keyValue("appId", appUUID), keyValue("clusterName", clusterName), e); + app.setStateFailed(); + return; + } + Main.logFile("rewritten-kubevela-" + appUUID + ".yaml", rewritten_kubevela); - // TODO: filter by app resources (check enabled: true in resources array) + // ---------------------------------------- + // Find node candidates + + // TODO: filter by app resources / cloud? (check enabled: true in resources array) List controllerCandidates = conn.findNodeCandidates(controllerRequirements, appUUID); if (controllerCandidates.isEmpty()) { - log.error("Could not find node candidates for requirements: {}", + log.error("Could not find node candidates for requirements: {}, aborting deployment", controllerRequirements, keyValue("appId", appUUID), keyValue("clusterName", clusterName)); - // Continue here while we don't really deploy - // return; + app.setStateFailed(); + return; } Map> componentCandidates = new HashMap<>(); for (Map.Entry> e : componentRequirements.entrySet()) { String nodeName = e.getKey(); List requirements = e.getValue(); - // TODO: filter by app resources (check enabled: true in resources array) + // TODO: filter by app resources / cloud? (check enabled: true in resources array) List candidates = conn.findNodeCandidates(requirements, appUUID); if (candidates.isEmpty()) { - log.error("Could not find node candidates for for node {}, requirements: {}", nodeName, requirements, + log.error("Could not find node candidates for for node {}, requirements: {}, aborting deployment", nodeName, requirements, keyValue("appId", appUUID), keyValue("clusterName", clusterName)); - // Continue here while we don't really deploy - // return; + app.setStateFailed(); + return; } componentCandidates.put(nodeName, candidates); } // ------------------------------------------------------------ - // 3. Select node candidates + // Select node candidates + + Map nodeEdgeCandidates = new HashMap<>(app.getNodeEdgeCandidates()); // Controller node log.info("Deciding on controller node candidate", keyValue("appId", appUUID), keyValue("clusterName", clusterName)); - String masterNodeName = "n" + clusterName.toLowerCase() + "-masternode"; // safe because all component node names end with a number + // Take care to only use lowercase, numbers, starting with letter + String masterNodeName = "m" + clusterName.toLowerCase() + "-master"; NodeCandidate masterNodeCandidate = null; if (controllerCandidates.size() > 0) { masterNodeCandidate = controllerCandidates.get(0); if (Set.of(NodeCandidateTypeEnum.BYON, NodeCandidateTypeEnum.EDGE) .contains(masterNodeCandidate.getNodeCandidateType())) { // Mark this candidate as already chosen - app.getNodeEdgeCandidates().put(masterNodeName, masterNodeCandidate); + nodeEdgeCandidates.put(masterNodeName, masterNodeCandidate); } } else { - log.error("Empty node candidate list for controller, continuing without creating node", + log.error("Empty node candidate list for controller, aborting deployment", keyValue("appId", appUUID), keyValue("clusterName", clusterName)); + app.setStateFailed(); + return; } // Component nodes @@ -255,47 +309,57 @@ public class NebulousAppDeployer { // ExnConnector.createCluster // - Each node name and its label (nodeLabels), for // ExnConnector.labelNodes + Map> componentNodeNames = new HashMap<>(); for (Map.Entry> e : componentRequirements.entrySet()) { String componentName = e.getKey(); int numberOfNodes = nodeCounts.get(componentName); Set nodeNames = new HashSet<>(); List candidates = componentCandidates.get(componentName); if (candidates.size() == 0) { - log.error("Empty node candidate list for component {}, continuing without creating node", componentName, + log.error("Empty node candidate list for component {}, aborting deployment", componentName, keyValue("appId", appUUID), keyValue("clusterName", clusterName)); - continue; + app.setStateFailed(); + return; } for (int nodeNumber = 1; nodeNumber <= numberOfNodes; nodeNumber++) { - String nodeName = createNodeName(clusterName, componentName, deployGeneration, nodeNumber); + String nodeName = createNodeName(clusterName, componentName, app.getDeployGeneration(), nodeNumber); NodeCandidate candidate = candidates.stream() - .filter(each -> !app.getNodeEdgeCandidates().values().contains(each)) + .filter(each -> !nodeEdgeCandidates.values().contains(each)) .findFirst() .orElse(null); if (candidate == null) { - log.error("No available node candidate for node {} of component {}", nodeNumber, componentName, + log.error("No available node candidate for node {} of component {}, aborting deployment", nodeNumber, componentName, keyValue("appId", appUUID), keyValue("clusterName", clusterName)); - continue; + app.setStateFailed(); + return; } if (Set.of(NodeCandidateTypeEnum.BYON, NodeCandidateTypeEnum.EDGE).contains(candidate.getNodeCandidateType())) { - app.getNodeEdgeCandidates().put(nodeName, candidate); + nodeEdgeCandidates.put(nodeName, candidate); } clusterNodes.put(nodeName, candidate); nodeLabels.addObject().put(nodeName, "nebulouscloud.eu/" + componentName + "=yes"); nodeNames.add(nodeName); } - app.getComponentNodeNames().put(componentName, nodeNames); + // XXX TODO do not directly mutate this value + componentNodeNames.put(componentName, nodeNames); } - Main.logFile("nodenames-" + appUUID + ".txt", app.getComponentNodeNames()); + Main.logFile("nodenames-" + appUUID + ".txt", componentNodeNames); Main.logFile("master-nodecandidate-" + appUUID + ".txt", masterNodeCandidate); Main.logFile("component-nodecandidates-" + appUUID + ".txt", clusterNodes); try { Main.logFile("component-labels-" + appUUID + ".txt", mapper.writeValueAsString(nodeLabels)); } catch (JsonProcessingException e1) { - // ignore; the labelNodes method will report the same error later + log.error("Internal error: could not convert node labels to string (this should never happen), aborting deployment", + keyValue("appId", appUUID), keyValue("clusterName", clusterName)); + app.setStateFailed(); + return; } + // TODO: send performance indicators (for monitoring system, which + // needs it before cluster creation) + // ------------------------------------------------------------ - // 4. Create cluster + // Create cluster ObjectNode cluster = mapper.createObjectNode(); cluster.put("name", clusterName) @@ -315,68 +379,52 @@ public class NebulousAppDeployer { }); ObjectNode environment = cluster.withObject("/env-var"); environment.put("APPLICATION_ID", appUUID); + // TODO: add other environment variables, also from app creation + // message (it has an "env" array) log.info("Calling defineCluster", keyValue("appId", appUUID), keyValue("clusterName", clusterName)); boolean defineClusterSuccess = conn.defineCluster(appUUID, clusterName, cluster); if (!defineClusterSuccess) { - log.error("Call to defineCluster failed, blindly continuing...", - keyValue("appId", appUUID), keyValue("clusterName", clusterName)); + log.error("Call to defineCluster failed for message body {}, aborting deployment", + cluster, keyValue("appId", appUUID), keyValue("clusterName", clusterName)); + app.setStateFailed(); + return; } // ------------------------------------------------------------ - // 5. Deploy cluster + // Deploy cluster log.info("Calling deployCluster", keyValue("appId", appUUID), keyValue("clusterName", clusterName)); boolean deployClusterSuccess = conn.deployCluster(appUUID, clusterName); if (!deployClusterSuccess) { - log.error("Call to deployCluster failed, blindly continuing...", - keyValue("appId", appUUID), keyValue("clusterName", clusterName)); + log.error("Call to deployCluster failed, trying to delete cluster and aborting deployment", + cluster, keyValue("appId", appUUID), keyValue("clusterName", clusterName)); + app.setStateFailed(); + conn.deleteCluster(appUUID, clusterName); + return; } - JsonNode clusterState = conn.getCluster(clusterName); - while (clusterState == null || !isClusterDeploymentFinished(clusterState)) { - // Cluster deployment includes provisioning and booting VMs, - // installing various software packages, bringing up a Kubernetes - // cluster and installing the NebulOuS runtime. This can take - // some minutes. - log.info("Waiting for cluster deployment to finish...", - keyValue("appId", appUUID), keyValue("clusterName", clusterName), - keyValue("clusterState", clusterState)); - try { - Thread.sleep(10000); - } catch (InterruptedException e1) { - // ignore - } - // TODO: distinguish between clusterState==null because SAL hasn't - // set up its datastructures yet, and clusterState==null because - // the call to getCluster failed. In the latter case we want to - // abort (because someone has deleted the cluster), in the former - // case we want to continue. - clusterState = conn.getCluster(clusterName); + if (!waitForClusterDeploymentFinished(conn, clusterName, appUUID)) { + log.error("Error while waiting for deployCluster to finish, trying to delete cluster and aborting deployment", + cluster, keyValue("appId", appUUID), keyValue("clusterName", clusterName)); + app.setStateFailed(); + conn.deleteCluster(appUUID, clusterName); + return; } + log.info("Cluster deployment finished, continuing with app deployment", - keyValue("appId", appUUID), keyValue("clusterName", clusterName), - keyValue("clusterState", clusterState)); + keyValue("appId", appUUID), keyValue("clusterName", clusterName)); log.info("Calling labelCluster", keyValue("appId", appUUID), keyValue("clusterName", clusterName)); boolean labelClusterSuccess = conn.labelNodes(appUUID, clusterName, nodeLabels); if (!labelClusterSuccess) { - log.error("Call to deployCluster failed, blindly continuing...", + log.error("Call to deployCluster failed, aborting deployment", keyValue("appId", appUUID), keyValue("clusterName", clusterName)); + app.setStateFailed(); + conn.deleteCluster(appUUID, clusterName); + return; } // ------------------------------------------------------------ - // 6. Rewrite KubeVela - JsonNode rewritten = createDeploymentKubevela(kubevela); - String rewritten_kubevela = "---\n# Did not manage to create rewritten KubeVela"; - try { - rewritten_kubevela = yamlMapper.writeValueAsString(rewritten); - } catch (JsonProcessingException e) { - log.error("Failed to convert KubeVela to YAML; this should never happen", - keyValue("appId", appUUID), keyValue("clusterName", clusterName), e); - } - Main.logFile("rewritten-kubevela-" + appUUID + ".yaml", rewritten_kubevela); - - // ------------------------------------------------------------ - // 7. Deploy application + // Deploy application log.info("Calling deployApplication", keyValue("appId", appUUID), keyValue("clusterName", clusterName)); long proActiveJobID = conn.deployApplication(appUUID, clusterName, safeAppName, rewritten_kubevela); @@ -385,19 +433,16 @@ public class NebulousAppDeployer { if (proActiveJobID == 0) { // 0 means conversion from long has failed (because of an invalid // response), OR a ProActive job id of 0. - log.warn("Job ID = 0, this means that deployApplication has probably failed.", + log.error("DeployApplication ProActive job ID = 0, deployApplication has probably failed; aborting deployment.", keyValue("appId", appUUID), keyValue("clusterName", clusterName)); + app.setStateFailed(); + conn.deleteCluster(appUUID, clusterName); + return; } - // ------------------------------------------------------------ - // 8. Update NebulousApp state + // Update NebulousApp state - // TODO: send out AMPL (must be done after deployCluster, once we know - // how to pass the application id into the fresh cluster) - - app.setComponentRequirements(componentRequirements); - app.setComponentReplicaCounts(nodeCounts); - app.setDeployedKubevela(rewritten); + app.setStateDeploymentFinished(componentRequirements, nodeCounts, componentNodeNames, nodeEdgeCandidates, rewritten); log.info("App deployment finished.", keyValue("appId", appUUID), keyValue("clusterName", clusterName)); } @@ -417,11 +462,10 @@ public class NebulousAppDeployer { public static void redeployApplication(NebulousApp app, ObjectNode kubevela) { String appUUID = app.getUUID(); String clusterName = app.getClusterName(); - int deployGeneration = app.getDeployGeneration() + 1; ExnConnector conn = app.getExnConnector(); - app.setDeployGeneration(deployGeneration); + app.setStateDeploying(); - log.info("Starting redeployment generation {}", deployGeneration, + log.info("Starting redeployment generation {}", app.getDeployGeneration(), keyValue("appId", appUUID), keyValue("clusterName", clusterName)); // The overall flow: // @@ -476,7 +520,7 @@ public class NebulousAppDeployer { continue; } for (int nodeNumber = 1; nodeNumber <= nAdd; nodeNumber++) { - String nodeName = createNodeName(clusterName, componentName, deployGeneration, nodeNumber); + String nodeName = createNodeName(clusterName, componentName, app.getDeployGeneration(), nodeNumber); NodeCandidate candidate = candidates.stream() .filter(each -> !app.getNodeEdgeCandidates().values().contains(each)) .findFirst() @@ -532,7 +576,7 @@ public class NebulousAppDeployer { continue; } for (int nodeNumber = 1; nodeNumber <= componentReplicaCounts.get(componentName); nodeNumber++) { - String nodeName = createNodeName(clusterName, componentName, deployGeneration, nodeNumber); + String nodeName = createNodeName(clusterName, componentName, app.getDeployGeneration(), nodeNumber); NodeCandidate candidate = candidates.stream() .filter(each -> !app.getNodeEdgeCandidates().values().contains(each)) .findFirst()