Abort cleanly when deployment fails

- Introduce (first version of) state machine in NebulousApp.
- Make various app state maps read-only so we never have a half-deployed
  state; instead, we update the app object atomically at the end.
- Add deleteCluster endpoint.
- Delete cluster when any deployment step fails.

Change-Id: Ib05934035a808373e001937d614f3191aa926f1b
This commit is contained in:
Rudi Schlatte 2024-04-11 20:15:22 +02:00
parent 4b6d479ae5
commit ab6375304d
3 changed files with 274 additions and 131 deletions

View File

@ -94,6 +94,8 @@ public class ExnConnector {
public final SyncedPublisher scaleOut;
/** The scaleIn endpoint. */
public final SyncedPublisher scaleIn;
/** The deleteCluster endpoint. */
public final SyncedPublisher deleteCluster;
/**
* Create a connection to ActiveMQ via the exn middleware, and set up the
@ -119,6 +121,7 @@ public class ExnConnector {
deployApplication = new SyncedPublisher("deployApplication", "eu.nebulouscloud.exn.sal.cluster.deployapplication", true, true);
scaleOut = new SyncedPublisher("scaleOut", "eu.nebulouscloud.exn.sal.cluster.scaleout", true, true);
scaleIn = new SyncedPublisher("scaleIn", "eu.nebulouscloud.exn.sal.cluster.scalein", true, true);
deleteCluster = new SyncedPublisher("deployCluster", "eu.nebulouscloud.exn.sal.cluster.delete", true, true);
conn = new Connector("optimiser_controller",
callback,
@ -131,7 +134,8 @@ public class ExnConnector {
deployCluster,
deployApplication,
scaleOut,
scaleIn),
scaleIn,
deleteCluster),
List.of(
new Consumer("ui_app_messages", app_creation_channel,
new AppCreationMessageHandler(), true, true),
@ -481,7 +485,10 @@ public class ExnConnector {
}
/**
* Deploy a cluster created by {@link #defineCluster}.
* Deploy a cluster created by {@link #defineCluster}. Note that the call
* will return before the cluster is ready, i.e., {@link #getCluster} must
* be checked before trying to call {@link #labelNodes} or {@link
* #deployApplication}.
*
* @param appID The application's id, used for logging only.
* @param clusterName The name of the cluster.
@ -566,7 +573,6 @@ public class ExnConnector {
* @return true if the call was successful, false otherwise.
*/
public boolean scaleIn(String appID, List<String> superfluousNodes) {
// NOTE: not yet defined in
// https://openproject.nebulouscloud.eu/projects/nebulous-collaboration-hub/wiki/deployment-manager-sal-1#specification-of-endpoints-being-developed
ArrayNode body = mapper.createArrayNode();
superfluousNodes.forEach(nodeName -> body.add(nodeName));
@ -584,5 +590,20 @@ public class ExnConnector {
return payload.asBoolean();
}
/**
* Delete a cluster created by {@link #defineCluster}.
*
* @param appID The application's id, used for logging only.
* @param clusterName The name of the cluster.
* @return true if the cluster was successfully deleted, false otherwise.
*/
public boolean deleteCluster(String appID, String clusterName) {
// https://openproject.nebulouscloud.eu/projects/nebulous-collaboration-hub/wiki/deployment-manager-sal-1#specification-of-endpoints-being-developed
Map<String, Object> msg = Map.of("metaData",
Map.of("user", "admin", "clusterName", clusterName));
Map<String, Object> response = deleteCluster.sendSync(msg, appID, null, false);
JsonNode payload = extractPayloadFromExnResponse(response, appID, "deleteCluster");
return payload.asBoolean();
}
}

View File

@ -13,6 +13,7 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
import eu.nebulouscloud.exn.core.Publisher;
import lombok.Getter;
import lombok.Setter;
import lombok.Synchronized;
import lombok.extern.slf4j.Slf4j;
import static net.logstash.logback.argument.StructuredArguments.keyValue;
@ -60,6 +61,36 @@ public class NebulousApp {
*/
@Getter private String clusterName;
/**
* The application status.
*
* <p>NEW: The application has been created from the GUI and is waiting
* for the performance indicators.
*
* <p>READY: The application is ready for deployment.
*
* <p>DEPLOYING: The application is being deployed or redeployed.
*
* <p>SOLVER_WAITING: The application is deployed, we're waiting for the
* solver to be ready so we can send AMPL and performance indicators.
*
* <p>RUNNING: The application is running, and under redeployment.
*
* <p>FAILED: The application is in an invalid state: one or more messages
* could not be parsed, or deployment or redeployment failed.
*/
public enum State {
NEW,
READY,
DEPLOYING,
SOLVER_WAITING,
RUNNING,
FAILED;
}
@Getter
private State state;
// ----------------------------------------
// App message parsing stuff
@ -114,37 +145,38 @@ public class NebulousApp {
* to 1, each subsequent redeployment increases by 1. This value is used
* to name node instances generated during that deployment.
*/
@Getter @Setter
@Getter
private int deployGeneration = 0;
/**
* Map of component name to node name(s) deployed for that component.
* Component names are defined in the KubeVela file. We assume that
* component names stay constant during redeployment, i.e., once an
* application is deployed, its KubeVela file will not change.
* Unmodifiable map of component name to node name(s) deployed for that
* component. Component names are defined in the KubeVela file. We
* assume that component names stay constant during redeployment, i.e.,
* once an application is deployed, its KubeVela file will not change.
*
* Note that this map does not include the master node, since this is not
* specified in KubeVela.
*/
@Getter
private Map<String, Set<String>> componentNodeNames = new HashMap<>();
private Map<String, Set<String>> componentNodeNames = Map.of();
/**
* Map from node name to deployed edge or BYON node candidate. We keep
* track of assigned edge candidates, since we do not want to
* doubly-assign edge nodes. We also store the node name, so we can
* "free" the edge candidate when the current component gets redeployed
* and lets go of its edge node. (We do not track cloud node candidates
* since these can be instantiated multiple times.)
* Unmodifiable map from node name to deployed edge or BYON node
* candidate. We keep track of assigned edge candidates, since we do not
* want to doubly-assign edge nodes. We also store the node name, so we
* can "free" the edge candidate when the current component gets
* redeployed and lets go of its edge node. (We do not track cloud node
* candidates since these can be instantiated multiple times.)
*/
@Getter
private Map<String, NodeCandidate> nodeEdgeCandidates = new HashMap<>();
/** Map of component name to its requirements, as currently deployed.
* Each replica of a component has identical requirements. */
@Getter @Setter
private Map<String, List<Requirement>> componentRequirements = new HashMap<>();
/** Map of component name to its replica count, as currently deployed. */
@Getter @Setter
private Map<String, Integer> componentReplicaCounts = new HashMap<>();
private Map<String, NodeCandidate> nodeEdgeCandidates = Map.of();
/** Unmodifiable map of component name to its requirements, as currently
* deployed. Each replica of a component has identical requirements. */
@Getter
private Map<String, List<Requirement>> componentRequirements = Map.of();
/** Unmodifiable map of component name to its replica count, as currently
* deployed. */
@Getter
private Map<String, Integer> componentReplicaCounts = Map.of();
/** When an app gets deployed, this is where we send the AMPL file */
private Publisher ampl_message_channel;
@ -153,15 +185,15 @@ public class NebulousApp {
// private boolean deployed = false;
/** The KubeVela as it was most recently sent to the app's controller. */
@Getter @Setter
@Getter
private JsonNode deployedKubevela;
/** For each KubeVela component, the number of deployed nodes. All nodes
* will be identical wrt machine type etc. */
@Getter @Setter
private Map<String, Integer> deployedNodeCounts;
/** For each KubeVela component, the requirements for its node(s). */
@Getter @Setter
private Map<String, List<Requirement>> deployedNodeRequirements;
* will be identical wrt machine type etc. Unmodifiable map. */
@Getter
private Map<String, Integer> deployedNodeCounts = Map.of();
/** For each KubeVela component, the requirements for its node(s). Unmodifiable map. */
@Getter
private Map<String, List<Requirement>> deployedNodeRequirements = Map.of();
/**
* The EXN connector for this class. At the moment all apps share the
@ -183,6 +215,7 @@ public class NebulousApp {
public NebulousApp(JsonNode app_message, ObjectNode kubevela, ExnConnector exnConnector) {
this.UUID = app_message.at(uuid_path).textValue();
this.name = app_message.at(name_path).textValue();
this.state = State.READY;
this.clusterName = NebulousApps.calculateUniqueClusterName(this.UUID);
this.originalAppMessage = app_message;
this.originalKubevela = kubevela;
@ -286,6 +319,46 @@ public class NebulousApp {
}
}
/**
* Set the state from READY to DEPLOYING, and increment the generation.
*
* @return false if deployment could not be started, true otherwise.
*/
@Synchronized
public boolean setStateDeploying() {
if (state != State.READY) {
return false;
} else {
state = State.DEPLOYING;
deployGeneration++;
return true;
}
}
/** Set state from DEPLOYING to RUNNING and update app cluster information.
* @return false if not in state deploying, otherwise true. */
@Synchronized
public boolean setStateDeploymentFinished(Map<String, List<Requirement>> componentRequirements, Map<String, Integer> nodeCounts, Map<String, Set<String>> componentNodeNames, Map<String, NodeCandidate> nodeEdgeCandidates, JsonNode deployedKubevela) {
if (state != State.DEPLOYING) {
return false;
} else {
// We keep all state read-only so we cannot modify the app object
// before we know deployment is successful
this.componentRequirements = Map.copyOf(componentRequirements);
this.componentReplicaCounts = Map.copyOf(nodeCounts);
this.componentNodeNames = Map.copyOf(componentNodeNames);
this.deployedKubevela = deployedKubevela;
this.nodeEdgeCandidates = Map.copyOf(nodeEdgeCandidates);
state = State.RUNNING;
return true;
}
}
/** Set state unconditionally to FAILED. No more state changes will be
* possible once the state is set to FAILED. */
public void setStateFailed() {
state = State.FAILED;
}
/** Utility function to parse a KubeVela string. Can be used from jshell. */
public static JsonNode readKubevelaString(String kubevela) throws JsonMappingException, JsonProcessingException {
return yamlMapper.readTree(kubevela);
@ -391,6 +464,11 @@ public class NebulousApp {
/**
* Calculate AMPL file and send it off to the solver.
*
* <p> TODO: this should be done once from a message handler that listens
* for an incoming "solver ready" message
*
* <p> TODO: also send performance indicators to solver here
*/
public void sendAMPL() {
String ampl = AMPLGenerator.generateAMPL(this);

View File

@ -21,8 +21,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator;
import lombok.extern.slf4j.Slf4j;
import static net.logstash.logback.argument.StructuredArguments.keyValue;
@ -145,6 +143,36 @@ public class NebulousAppDeployer {
.allMatch(node -> node.get("state").asText().equals("Finished"));
}
/**
* Wait until all nodes in cluster are in state "Finished".
*
* <p>Note: Cluster deployment includes provisioning and booting VMs,
* installing various software packages, bringing up a Kubernetes cluster
* and installing the NebulOuS runtime. This can take some minutes.
*/
private static boolean waitForClusterDeploymentFinished(ExnConnector conn, String clusterName, String appUUID) {
// TODO: find out what state node(s) or the whole cluster are in when
// cluster start fails, and return false in that case.
JsonNode clusterState = conn.getCluster(clusterName);
while (clusterState == null || !isClusterDeploymentFinished(clusterState)) {
log.info("Waiting for cluster deployment to finish...",
keyValue("appId", appUUID), keyValue("clusterName", clusterName),
keyValue("clusterState", clusterState));
try {
Thread.sleep(10000);
} catch (InterruptedException e1) {
// ignore
}
// TODO: distinguish between clusterState==null because SAL hasn't
// set up its datastructures yet, and clusterState==null because
// the call to getCluster failed. In the latter case we want to
// abort (because someone has deleted the cluster), in the former
// case we want to continue.
clusterState = conn.getCluster(clusterName);
}
return true;
}
/**
* Given a KubeVela file, extract node requirements, create the job, start
* its nodes and submit KubeVela.
@ -152,45 +180,52 @@ public class NebulousAppDeployer {
* <p>NOTE: this method modifies the NebulousApp object state, storing
* various facts about the deployed cluster.
*
* <p>NOTE: this method is under reconstruction, pending the new
* endpoints.
*
* @param app The NebulOuS app object.
* @param kubevela the KubeVela file to deploy.
*/
public static void deployApplication(NebulousApp app, JsonNode kubevela) {
String appUUID = app.getUUID();
String clusterName = app.getClusterName();
if (!app.setStateDeploying()) {
// TODO: wait until we got the performance indicators from Marta
log.error("Trying to deploy app that is in state {} (should be READY), aborting deployment",
app.getState().name(),
keyValue("appId", appUUID), keyValue("clusterName", clusterName));
app.setStateFailed();
return;
}
// The application name is typed in by the user, and is used
// internally by SAL as an unquoted filename in a generated shell
// script. It shouldn't be this way but it is what it is.
String safeAppName = app.getName().replaceAll("[^a-zA-Z0-9-_]", "_");
ExnConnector conn = app.getExnConnector();
log.info("Starting initial deployment for application", keyValue("appId", appUUID), keyValue("clusterName", clusterName));
log.info("Starting initial deployment for application",
keyValue("appId", appUUID), keyValue("clusterName", clusterName));
int deployGeneration = app.getDeployGeneration() + 1;
app.setDeployGeneration(deployGeneration);
// The overall flow:
//
// 1. Extract node requirements and node counts from the KubeVela
// definition.
// 2. Ask resource broker for node candidates for all components and the
// controller.
// 3. Select node candidates, making sure to only select edge nodes
// once.
// 4. Create a SAL cluster.
// 5. Deploy the SAL cluster.
// 6. Add node affinity traits to the KubeVela file.
// 7. Deploy the SAL application.
// 8. Store cluster state (deployed KubeVela file, etc.) in
// NebulousApp object.
// - Extract node requirements and node counts from the KubeVela
// definition.
// - Rewrite KubeVela: remove performance requirements, add affinity
// traits
// - Ask resource broker for node candidates for all components and the
// controller.
// - Select node candidates, making sure to only select edge nodes
// once.
// - Create a SAL cluster.
// - Deploy the SAL cluster.
// - Add node affinity traits to the KubeVela file.
// - Deploy the SAL application.
// - Store cluster state (deployed KubeVela file, etc.) in
// NebulousApp object.
// ------------------------------------------------------------
// 1. Extract node requirements
// Extract node requirements
Map<String, List<Requirement>> componentRequirements = KubevelaAnalyzer.getClampedRequirements(kubevela);
Map<String, Integer> nodeCounts = KubevelaAnalyzer.getNodeCount(kubevela);
List<Requirement> controllerRequirements = getControllerRequirements(appUUID);
// HACK: do this only when cloud id = nrec
componentRequirements.forEach(
(k, reqs) -> reqs.add(new AttributeRequirement("location", "name", RequirementOperator.EQ, "bgo")));
@ -198,49 +233,68 @@ public class NebulousAppDeployer {
Main.logFile("component-counts-" + appUUID + ".txt", nodeCounts);
Main.logFile("controller-requirements-" + appUUID + ".txt", controllerRequirements);
// ----------------------------------------
// 2. Find node candidates
// ------------------------------------------------------------
// Rewrite KubeVela
JsonNode rewritten = createDeploymentKubevela(kubevela);
String rewritten_kubevela = "---\n# Did not manage to create rewritten KubeVela";
try {
rewritten_kubevela = yamlMapper.writeValueAsString(rewritten);
} catch (JsonProcessingException e) {
log.error("Failed to convert KubeVela to YAML; this should never happen",
keyValue("appId", appUUID), keyValue("clusterName", clusterName), e);
app.setStateFailed();
return;
}
Main.logFile("rewritten-kubevela-" + appUUID + ".yaml", rewritten_kubevela);
// TODO: filter by app resources (check enabled: true in resources array)
// ----------------------------------------
// Find node candidates
// TODO: filter by app resources / cloud? (check enabled: true in resources array)
List<NodeCandidate> controllerCandidates = conn.findNodeCandidates(controllerRequirements, appUUID);
if (controllerCandidates.isEmpty()) {
log.error("Could not find node candidates for requirements: {}",
log.error("Could not find node candidates for requirements: {}, aborting deployment",
controllerRequirements, keyValue("appId", appUUID), keyValue("clusterName", clusterName));
// Continue here while we don't really deploy
// return;
app.setStateFailed();
return;
}
Map<String, List<NodeCandidate>> componentCandidates = new HashMap<>();
for (Map.Entry<String, List<Requirement>> e : componentRequirements.entrySet()) {
String nodeName = e.getKey();
List<Requirement> requirements = e.getValue();
// TODO: filter by app resources (check enabled: true in resources array)
// TODO: filter by app resources / cloud? (check enabled: true in resources array)
List<NodeCandidate> candidates = conn.findNodeCandidates(requirements, appUUID);
if (candidates.isEmpty()) {
log.error("Could not find node candidates for for node {}, requirements: {}", nodeName, requirements,
log.error("Could not find node candidates for for node {}, requirements: {}, aborting deployment", nodeName, requirements,
keyValue("appId", appUUID), keyValue("clusterName", clusterName));
// Continue here while we don't really deploy
// return;
app.setStateFailed();
return;
}
componentCandidates.put(nodeName, candidates);
}
// ------------------------------------------------------------
// 3. Select node candidates
// Select node candidates
Map<String, NodeCandidate> nodeEdgeCandidates = new HashMap<>(app.getNodeEdgeCandidates());
// Controller node
log.info("Deciding on controller node candidate", keyValue("appId", appUUID), keyValue("clusterName", clusterName));
String masterNodeName = "n" + clusterName.toLowerCase() + "-masternode"; // safe because all component node names end with a number
// Take care to only use lowercase, numbers, starting with letter
String masterNodeName = "m" + clusterName.toLowerCase() + "-master";
NodeCandidate masterNodeCandidate = null;
if (controllerCandidates.size() > 0) {
masterNodeCandidate = controllerCandidates.get(0);
if (Set.of(NodeCandidateTypeEnum.BYON, NodeCandidateTypeEnum.EDGE)
.contains(masterNodeCandidate.getNodeCandidateType())) {
// Mark this candidate as already chosen
app.getNodeEdgeCandidates().put(masterNodeName, masterNodeCandidate);
nodeEdgeCandidates.put(masterNodeName, masterNodeCandidate);
}
} else {
log.error("Empty node candidate list for controller, continuing without creating node",
log.error("Empty node candidate list for controller, aborting deployment",
keyValue("appId", appUUID), keyValue("clusterName", clusterName));
app.setStateFailed();
return;
}
// Component nodes
@ -255,47 +309,57 @@ public class NebulousAppDeployer {
// ExnConnector.createCluster
// - Each node name and its label (nodeLabels), for
// ExnConnector.labelNodes
Map<String, Set<String>> componentNodeNames = new HashMap<>();
for (Map.Entry<String, List<Requirement>> e : componentRequirements.entrySet()) {
String componentName = e.getKey();
int numberOfNodes = nodeCounts.get(componentName);
Set<String> nodeNames = new HashSet<>();
List<NodeCandidate> candidates = componentCandidates.get(componentName);
if (candidates.size() == 0) {
log.error("Empty node candidate list for component {}, continuing without creating node", componentName,
log.error("Empty node candidate list for component {}, aborting deployment", componentName,
keyValue("appId", appUUID), keyValue("clusterName", clusterName));
continue;
app.setStateFailed();
return;
}
for (int nodeNumber = 1; nodeNumber <= numberOfNodes; nodeNumber++) {
String nodeName = createNodeName(clusterName, componentName, deployGeneration, nodeNumber);
String nodeName = createNodeName(clusterName, componentName, app.getDeployGeneration(), nodeNumber);
NodeCandidate candidate = candidates.stream()
.filter(each -> !app.getNodeEdgeCandidates().values().contains(each))
.filter(each -> !nodeEdgeCandidates.values().contains(each))
.findFirst()
.orElse(null);
if (candidate == null) {
log.error("No available node candidate for node {} of component {}", nodeNumber, componentName,
log.error("No available node candidate for node {} of component {}, aborting deployment", nodeNumber, componentName,
keyValue("appId", appUUID), keyValue("clusterName", clusterName));
continue;
app.setStateFailed();
return;
}
if (Set.of(NodeCandidateTypeEnum.BYON, NodeCandidateTypeEnum.EDGE).contains(candidate.getNodeCandidateType())) {
app.getNodeEdgeCandidates().put(nodeName, candidate);
nodeEdgeCandidates.put(nodeName, candidate);
}
clusterNodes.put(nodeName, candidate);
nodeLabels.addObject().put(nodeName, "nebulouscloud.eu/" + componentName + "=yes");
nodeNames.add(nodeName);
}
app.getComponentNodeNames().put(componentName, nodeNames);
// XXX TODO do not directly mutate this value
componentNodeNames.put(componentName, nodeNames);
}
Main.logFile("nodenames-" + appUUID + ".txt", app.getComponentNodeNames());
Main.logFile("nodenames-" + appUUID + ".txt", componentNodeNames);
Main.logFile("master-nodecandidate-" + appUUID + ".txt", masterNodeCandidate);
Main.logFile("component-nodecandidates-" + appUUID + ".txt", clusterNodes);
try {
Main.logFile("component-labels-" + appUUID + ".txt", mapper.writeValueAsString(nodeLabels));
} catch (JsonProcessingException e1) {
// ignore; the labelNodes method will report the same error later
log.error("Internal error: could not convert node labels to string (this should never happen), aborting deployment",
keyValue("appId", appUUID), keyValue("clusterName", clusterName));
app.setStateFailed();
return;
}
// TODO: send performance indicators (for monitoring system, which
// needs it before cluster creation)
// ------------------------------------------------------------
// 4. Create cluster
// Create cluster
ObjectNode cluster = mapper.createObjectNode();
cluster.put("name", clusterName)
@ -315,68 +379,52 @@ public class NebulousAppDeployer {
});
ObjectNode environment = cluster.withObject("/env-var");
environment.put("APPLICATION_ID", appUUID);
// TODO: add other environment variables, also from app creation
// message (it has an "env" array)
log.info("Calling defineCluster", keyValue("appId", appUUID), keyValue("clusterName", clusterName));
boolean defineClusterSuccess = conn.defineCluster(appUUID, clusterName, cluster);
if (!defineClusterSuccess) {
log.error("Call to defineCluster failed, blindly continuing...",
keyValue("appId", appUUID), keyValue("clusterName", clusterName));
log.error("Call to defineCluster failed for message body {}, aborting deployment",
cluster, keyValue("appId", appUUID), keyValue("clusterName", clusterName));
app.setStateFailed();
return;
}
// ------------------------------------------------------------
// 5. Deploy cluster
// Deploy cluster
log.info("Calling deployCluster", keyValue("appId", appUUID), keyValue("clusterName", clusterName));
boolean deployClusterSuccess = conn.deployCluster(appUUID, clusterName);
if (!deployClusterSuccess) {
log.error("Call to deployCluster failed, blindly continuing...",
keyValue("appId", appUUID), keyValue("clusterName", clusterName));
log.error("Call to deployCluster failed, trying to delete cluster and aborting deployment",
cluster, keyValue("appId", appUUID), keyValue("clusterName", clusterName));
app.setStateFailed();
conn.deleteCluster(appUUID, clusterName);
return;
}
JsonNode clusterState = conn.getCluster(clusterName);
while (clusterState == null || !isClusterDeploymentFinished(clusterState)) {
// Cluster deployment includes provisioning and booting VMs,
// installing various software packages, bringing up a Kubernetes
// cluster and installing the NebulOuS runtime. This can take
// some minutes.
log.info("Waiting for cluster deployment to finish...",
keyValue("appId", appUUID), keyValue("clusterName", clusterName),
keyValue("clusterState", clusterState));
try {
Thread.sleep(10000);
} catch (InterruptedException e1) {
// ignore
}
// TODO: distinguish between clusterState==null because SAL hasn't
// set up its datastructures yet, and clusterState==null because
// the call to getCluster failed. In the latter case we want to
// abort (because someone has deleted the cluster), in the former
// case we want to continue.
clusterState = conn.getCluster(clusterName);
if (!waitForClusterDeploymentFinished(conn, clusterName, appUUID)) {
log.error("Error while waiting for deployCluster to finish, trying to delete cluster and aborting deployment",
cluster, keyValue("appId", appUUID), keyValue("clusterName", clusterName));
app.setStateFailed();
conn.deleteCluster(appUUID, clusterName);
return;
}
log.info("Cluster deployment finished, continuing with app deployment",
keyValue("appId", appUUID), keyValue("clusterName", clusterName),
keyValue("clusterState", clusterState));
keyValue("appId", appUUID), keyValue("clusterName", clusterName));
log.info("Calling labelCluster", keyValue("appId", appUUID), keyValue("clusterName", clusterName));
boolean labelClusterSuccess = conn.labelNodes(appUUID, clusterName, nodeLabels);
if (!labelClusterSuccess) {
log.error("Call to deployCluster failed, blindly continuing...",
log.error("Call to deployCluster failed, aborting deployment",
keyValue("appId", appUUID), keyValue("clusterName", clusterName));
app.setStateFailed();
conn.deleteCluster(appUUID, clusterName);
return;
}
// ------------------------------------------------------------
// 6. Rewrite KubeVela
JsonNode rewritten = createDeploymentKubevela(kubevela);
String rewritten_kubevela = "---\n# Did not manage to create rewritten KubeVela";
try {
rewritten_kubevela = yamlMapper.writeValueAsString(rewritten);
} catch (JsonProcessingException e) {
log.error("Failed to convert KubeVela to YAML; this should never happen",
keyValue("appId", appUUID), keyValue("clusterName", clusterName), e);
}
Main.logFile("rewritten-kubevela-" + appUUID + ".yaml", rewritten_kubevela);
// ------------------------------------------------------------
// 7. Deploy application
// Deploy application
log.info("Calling deployApplication", keyValue("appId", appUUID), keyValue("clusterName", clusterName));
long proActiveJobID = conn.deployApplication(appUUID, clusterName, safeAppName, rewritten_kubevela);
@ -385,19 +433,16 @@ public class NebulousAppDeployer {
if (proActiveJobID == 0) {
// 0 means conversion from long has failed (because of an invalid
// response), OR a ProActive job id of 0.
log.warn("Job ID = 0, this means that deployApplication has probably failed.",
log.error("DeployApplication ProActive job ID = 0, deployApplication has probably failed; aborting deployment.",
keyValue("appId", appUUID), keyValue("clusterName", clusterName));
app.setStateFailed();
conn.deleteCluster(appUUID, clusterName);
return;
}
// ------------------------------------------------------------
// 8. Update NebulousApp state
// Update NebulousApp state
// TODO: send out AMPL (must be done after deployCluster, once we know
// how to pass the application id into the fresh cluster)
app.setComponentRequirements(componentRequirements);
app.setComponentReplicaCounts(nodeCounts);
app.setDeployedKubevela(rewritten);
app.setStateDeploymentFinished(componentRequirements, nodeCounts, componentNodeNames, nodeEdgeCandidates, rewritten);
log.info("App deployment finished.",
keyValue("appId", appUUID), keyValue("clusterName", clusterName));
}
@ -417,11 +462,10 @@ public class NebulousAppDeployer {
public static void redeployApplication(NebulousApp app, ObjectNode kubevela) {
String appUUID = app.getUUID();
String clusterName = app.getClusterName();
int deployGeneration = app.getDeployGeneration() + 1;
ExnConnector conn = app.getExnConnector();
app.setDeployGeneration(deployGeneration);
app.setStateDeploying();
log.info("Starting redeployment generation {}", deployGeneration,
log.info("Starting redeployment generation {}", app.getDeployGeneration(),
keyValue("appId", appUUID), keyValue("clusterName", clusterName));
// The overall flow:
//
@ -476,7 +520,7 @@ public class NebulousAppDeployer {
continue;
}
for (int nodeNumber = 1; nodeNumber <= nAdd; nodeNumber++) {
String nodeName = createNodeName(clusterName, componentName, deployGeneration, nodeNumber);
String nodeName = createNodeName(clusterName, componentName, app.getDeployGeneration(), nodeNumber);
NodeCandidate candidate = candidates.stream()
.filter(each -> !app.getNodeEdgeCandidates().values().contains(each))
.findFirst()
@ -532,7 +576,7 @@ public class NebulousAppDeployer {
continue;
}
for (int nodeNumber = 1; nodeNumber <= componentReplicaCounts.get(componentName); nodeNumber++) {
String nodeName = createNodeName(clusterName, componentName, deployGeneration, nodeNumber);
String nodeName = createNodeName(clusterName, componentName, app.getDeployGeneration(), nodeNumber);
NodeCandidate candidate = candidates.stream()
.filter(each -> !app.getNodeEdgeCandidates().values().contains(each))
.findFirst()