Skip to content

Commit 91f6827

Browse files
authored
Make waiting for cluster upgrades more resilient (#5133)
* Make waiting for cluster upgrades more resilient
1 parent 2a7f452 commit 91f6827

File tree

1 file changed

+51
-35
lines changed

1 file changed

+51
-35
lines changed

pkg/operator/operator.go

Lines changed: 51 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,51 +1039,67 @@ func (o *Operator) reconcileDeployment(cm *corev1.ConfigMap) (finalError error)
10391039
}
10401040

10411041
func (o *Operator) waitForClusterUpgrade(appID string, appSlug string) error {
1042-
ctx := context.Background()
1043-
1044-
kbClient, err := k8sutil.GetKubeClient(ctx)
1045-
if err != nil {
1046-
return errors.Wrap(err, "failed to get kube client")
1047-
}
10481042
logger.Infof("Waiting for cluster upgrade to finish")
1043+
10491044
for {
1050-
ins, err := embeddedcluster.GetCurrentInstallation(ctx, kbClient)
1045+
done, err := o.reconcileClusterUpgrade(context.Background(), appID, appSlug)
10511046
if err != nil {
1052-
return errors.Wrap(err, "failed to wait for embedded cluster installation")
1053-
}
1054-
if embeddedcluster.InstallationSucceeded(ctx, ins) {
1055-
logger.Infof("Cluster upgrade succeeded")
1056-
if err := o.notifyClusterUpgradeSucceeded(ctx, kbClient, ins, appID); err != nil {
1057-
logger.Errorf("Failed to notify upgrade succeeded: %v", err)
1058-
}
1047+
logger.Errorf("Error reconciling cluster upgrade (retrying in 5s): %v", err)
1048+
} else if done {
10591049
return nil
10601050
}
1061-
if embeddedcluster.InstallationFailed(ctx, ins) {
1062-
logger.Infof("Cluster upgrade failed")
1063-
if err := o.notifyClusterUpgradeFailed(ctx, kbClient, ins, appID); err != nil {
1064-
logger.Errorf("Failed to notify upgrade failed: %v", err)
1065-
}
1066-
if err := upgradeservicetask.SetStatusUpgradeFailed(appSlug, ins.Status.Reason); err != nil {
1067-
return errors.Wrap(err, "failed to set task status to failed")
1068-
}
1069-
return nil // we try to deploy the app even if the cluster upgrade failed
1051+
1052+
time.Sleep(5 * time.Second)
1053+
}
1054+
}
1055+
1056+
func (o *Operator) reconcileClusterUpgrade(ctx context.Context, appID string, appSlug string) (bool, error) {
1057+
kbClient, err := k8sutil.GetKubeClient(ctx)
1058+
if err != nil {
1059+
return false, errors.Wrap(err, "failed to get kube client")
1060+
}
1061+
1062+
ins, err := embeddedcluster.GetCurrentInstallation(ctx, kbClient)
1063+
if err != nil {
1064+
return false, errors.Wrap(err, "failed to wait for embedded cluster installation")
1065+
}
1066+
1067+
if embeddedcluster.InstallationSucceeded(ctx, ins) {
1068+
logger.Infof("Cluster upgrade succeeded")
1069+
if err := o.notifyClusterUpgradeSucceeded(ctx, kbClient, ins, appID); err != nil {
1070+
logger.Errorf("Failed to notify upgrade succeeded: %v", err)
10701071
}
1071-
msg := ins.Status.State
1072-
if checkInstallationConditionStatus(ins.Status, embeddedclusterv1beta1.ConditionTypeV2MigrationInProgress) == metav1.ConditionTrue {
1073-
msg = "V2MigrationInProgress"
1072+
return true, nil // succeeded
1073+
}
1074+
1075+
if embeddedcluster.InstallationFailed(ctx, ins) {
1076+
logger.Infof("Cluster upgrade failed")
1077+
if err := o.notifyClusterUpgradeFailed(ctx, kbClient, ins, appID); err != nil {
1078+
logger.Errorf("Failed to notify upgrade failed: %v", err)
10741079
}
1075-
if msg == "" {
1076-
// if the status was the same previously, do not overwrite the previous message with an empty one
1077-
taskStatus, taskMsg, _ := upgradeservicetask.GetStatus(appSlug)
1078-
if taskStatus == string(upgradeservicetask.StatusUpgradingCluster) {
1079-
msg = taskMsg
1080-
}
1080+
if err := upgradeservicetask.SetStatusUpgradeFailed(appSlug, ins.Status.Reason); err != nil {
1081+
return false, errors.Wrap(err, "failed to set task status to failed")
10811082
}
1082-
if err := upgradeservicetask.SetStatusUpgradingCluster(appSlug, msg); err != nil {
1083-
return errors.Wrap(err, "failed to set task status to upgrading cluster")
1083+
return true, nil // failed, but we try to deploy the app even if the cluster upgrade failed
1084+
}
1085+
1086+
msg := ins.Status.State
1087+
if checkInstallationConditionStatus(ins.Status, embeddedclusterv1beta1.ConditionTypeV2MigrationInProgress) == metav1.ConditionTrue {
1088+
msg = "V2MigrationInProgress"
1089+
}
1090+
if msg == "" {
1091+
// if the status was the same previously, do not overwrite the previous message with an empty one
1092+
taskStatus, taskMsg, _ := upgradeservicetask.GetStatus(appSlug)
1093+
if taskStatus == string(upgradeservicetask.StatusUpgradingCluster) {
1094+
msg = taskMsg
10841095
}
1085-
time.Sleep(5 * time.Second)
10861096
}
1097+
1098+
if err := upgradeservicetask.SetStatusUpgradingCluster(appSlug, msg); err != nil {
1099+
return false, errors.Wrap(err, "failed to set task status to upgrading cluster")
1100+
}
1101+
1102+
return false, nil // in progress
10871103
}
10881104

10891105
// notifyClusterUpgradeSucceeded sends a metrics event to the api that the upgrade succeeded.

0 commit comments

Comments
 (0)