@@ -24,6 +24,7 @@ import (
2424 "encoding/json"
2525 "fmt"
2626 "slices"
27+ "strconv"
2728 "strings"
2829 "time"
2930
@@ -543,6 +544,18 @@ func (r *helmTypeInstallStage) Handle(ctx context.Context) {
543544 r .setRequeueWithErr (err , "" )
544545 return
545546 } else if err == nil {
547+ // Check if the job is outdated (belongs to an older generation)
548+ if isJobOutdated (helmInstallJob , addon ) {
549+ r .reqCtx .Log .Info ("Deleting outdated install job" , "job" , key .Name , "jobGeneration" ,
550+ helmInstallJob .Annotations [AddonGeneration ], "addonGeneration" , addon .Generation )
551+ if err := r .reconciler .Delete (ctx , helmInstallJob ); client .IgnoreNotFound (err ) != nil {
552+ r .setRequeueWithErr (err , "" )
553+ return
554+ }
555+ r .setRequeueAfter (time .Second , "recreating install job for new generation" )
556+ return
557+ }
558+
546559 if helmInstallJob .Status .Succeeded > 0 {
547560 return
548561 }
@@ -710,6 +723,18 @@ func (r *helmTypeUninstallStage) Handle(ctx context.Context) {
710723 r .setRequeueWithErr (err , "" )
711724 return
712725 } else if err == nil {
726+ // Check if the job is outdated (belongs to an older generation)
727+ if isJobOutdated (helmUninstallJob , addon ) {
728+ r .reqCtx .Log .Info ("Deleting outdated uninstall job" , "job" , key .Name , "jobGeneration" ,
729+ helmUninstallJob .Annotations [AddonGeneration ], "addonGeneration" , addon .Generation )
730+ if err := r .reconciler .Delete (ctx , helmUninstallJob ); client .IgnoreNotFound (err ) != nil {
731+ r .setRequeueWithErr (err , "" )
732+ return
733+ }
734+ r .setRequeueAfter (time .Second , "recreating uninstall job for new generation" )
735+ return
736+ }
737+
713738 if helmUninstallJob .Status .Succeeded > 0 {
714739 r .reqCtx .Log .V (1 ).Info ("helm uninstall job succeed" , "job" , key )
715740 // TODO:
@@ -911,6 +936,17 @@ func createHelmJobProto(addon *extensionsv1alpha1.Addon) (*batchv1.Job, error) {
911936 }
912937 ttlSec := int32 (ttl .Seconds ())
913938 backoffLimit := int32 (3 )
939+
940+ // Set job timeout to prevent jobs from running indefinitely
941+ jobTimeout := time .Minute * 5
942+ if timeout := viper .GetString (constant .CfgAddonJobTimeout ); timeout != "" {
943+ var err error
944+ if jobTimeout , err = time .ParseDuration (timeout ); err != nil {
945+ return nil , err
946+ }
947+ }
948+ activeDeadlineSeconds := int64 (jobTimeout .Seconds ())
949+
914950 container := corev1.Container {
915951 Name : getJobMainContainerName (addon ),
916952 Image : viper .GetString (constant .KBToolsImage ),
@@ -940,10 +976,15 @@ func createHelmJobProto(addon *extensionsv1alpha1.Addon) (*batchv1.Job, error) {
940976 constant .AddonNameLabelKey : addon .Name ,
941977 constant .AppManagedByLabelKey : constant .AppName ,
942978 },
979+ Annotations : map [string ]string {
980+ // Add generation annotation to track which addon generation this job belongs to
981+ AddonGeneration : fmt .Sprintf ("%d" , addon .Generation ),
982+ },
943983 },
944984 Spec : batchv1.JobSpec {
945985 BackoffLimit : & backoffLimit ,
946986 TTLSecondsAfterFinished : & ttlSec ,
987+ ActiveDeadlineSeconds : & activeDeadlineSeconds ,
947988 Template : corev1.PodTemplateSpec {
948989 ObjectMeta : metav1.ObjectMeta {
949990 Labels : map [string ]string {
@@ -1075,6 +1116,26 @@ func getJobMainContainerName(addon *extensionsv1alpha1.Addon) string {
10751116 return strings .ToLower (string (addon .Spec .Type ))
10761117}
10771118
1119+ // isJobOutdated checks if the job belongs to an older generation of the addon
1120+ func isJobOutdated (job * batchv1.Job , addon * extensionsv1alpha1.Addon ) bool {
1121+ if job .Annotations == nil {
1122+ // Jobs without generation annotation are considered outdated
1123+ return true
1124+ }
1125+
1126+ jobGenStr , exists := job .Annotations [AddonGeneration ]
1127+ if ! exists {
1128+ return true
1129+ }
1130+
1131+ jobGen , err := strconv .ParseInt (jobGenStr , 10 , 64 )
1132+ if err != nil {
1133+ return true
1134+ }
1135+
1136+ return jobGen < addon .Generation
1137+ }
1138+
10781139func logFailedJobPodToCondError (ctx context.Context , stageCtx * stageCtx , addon * extensionsv1alpha1.Addon ,
10791140 jobName , reason string ) error {
10801141 podList := & corev1.PodList {}
0 commit comments