Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pkg/steps/multi_stage/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ func (s *multiStageTestStep) generateObservers(
type generatePodOptions struct {
IsObserver bool
enableSecretsStoreCSIDriver bool
phase string
}

func defaultGeneratePodOptions() *generatePodOptions {
Expand Down Expand Up @@ -147,6 +148,9 @@ func (s *multiStageTestStep) generatePods(
delete(pod.Labels, base_steps.ProwJobIdLabel)
pod.Annotations[base_steps.AnnotationSaveContainerLogs] = "true"
pod.Labels[MultiStageTestLabel] = s.name
if genPodOpts.phase != "" {
pod.Labels[MultiStageTestPhaseLabel] = genPodOpts.phase
}
needsKubeConfig := isKubeconfigNeeded(&step, genPodOpts)
if needsKubeConfig {
pod.Spec.ServiceAccountName = s.name
Expand Down
46 changes: 41 additions & 5 deletions pkg/steps/multi_stage/multi_stage.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ const (
const (
// MultiStageTestLabel is the label we use to mark a pod as part of a multi-stage test
MultiStageTestLabel = "ci.openshift.io/multi-stage-test"
// MultiStageTestPhaseLabel is the label we use to mark which phase (pre, test, post) a pod belongs to
MultiStageTestPhaseLabel = "ci.openshift.io/multi-stage-test-phase"
// ClusterProfileMountPath is where we mount the cluster profile in a pod
ClusterProfileMountPath = "/var/run/secrets/ci.openshift.io/cluster-profile"
// SecretMountPath is where we mount the shared dir secret
Expand Down Expand Up @@ -243,16 +245,50 @@ func (s *multiStageTestStep) run(ctx context.Context) error {
observerDone := make(chan struct{})
go s.runObservers(observerContext, ctx, observers, observerDone)
s.flags |= shortCircuit
if err := s.runSteps(ctx, "pre", s.pre, env, secretVolumes, secretVolumeMounts); err != nil {
errs = append(errs, fmt.Errorf("%q pre steps failed: %w", s.name, err))
} else if err := s.runSteps(ctx, "test", s.test, env, secretVolumes, secretVolumeMounts); err != nil {
errs = append(errs, fmt.Errorf("%q test steps failed: %w", s.name, err))

// Track if cancellation occurred during pre or test phases
cancelledDuringPreOrTest := false

// Run pre phase
preErr := s.runSteps(ctx, "pre", s.pre, env, secretVolumes, secretVolumeMounts)
if preErr != nil {
errs = append(errs, fmt.Errorf("%q pre steps failed: %w", s.name, preErr))
}
// Check if cancellation occurred during pre phase (check after runSteps returns)
if ctx.Err() != nil {
cancelledDuringPreOrTest = true
logrus.Warnf("Job was cancelled during pre phase for test %q", s.name)
} else if preErr == nil {
// Run test phase only if pre succeeded and wasn't cancelled
testErr := s.runSteps(ctx, "test", s.test, env, secretVolumes, secretVolumeMounts)
if testErr != nil {
errs = append(errs, fmt.Errorf("%q test steps failed: %w", s.name, testErr))
}
// Check if cancellation occurred during test phase
if ctx.Err() != nil {
cancelledDuringPreOrTest = true
logrus.Warnf("Job was cancelled during test phase for test %q", s.name)
}
}

s.cancelObserversContext(cancel) // signal to observers that we're tearing down
s.flags &= ^shortCircuit
if err := s.runSteps(context.Background(), "post", s.post, env, secretVolumes, secretVolumeMounts); err != nil {

// Always run post steps, even if the job was cancelled during pre or test phases.
// This ensures proper cleanup (e.g., deprovisioning resources) to prevent resource leaks.
// Use the original context by default to respect job deadlines/cancellation.
// Only switch to context.Background() if cancellation occurred during pre/test to ensure cleanup completes.
postCtx := ctx
if cancelledDuringPreOrTest {
postCtx = context.Background()
logrus.Infof("Job was cancelled during pre or test phase, running post steps with background context to ensure cleanup completes for test %q", s.name)
}
if err := s.runSteps(postCtx, "post", s.post, env, secretVolumes, secretVolumeMounts); err != nil {
errs = append(errs, fmt.Errorf("%q post steps failed: %w", s.name, err))
} else if cancelledDuringPreOrTest {
logrus.Infof("Post steps completed successfully after cancellation for test %q", s.name)
}

<-observerDone // wait for the observers to finish so we get their jUnit
return utilerrors.NewAggregate(errs)
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/steps/multi_stage/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func (s *multiStageTestStep) runSteps(
logrus.Infof("Running multi-stage phase %s", phase)
pods, bestEffortSteps, err := s.generatePods(steps, env, secretVolumes, secretVolumeMounts, &generatePodOptions{
enableSecretsStoreCSIDriver: s.enableSecretsStoreCSIDriver,
phase: phase,
})
if err != nil {
s.flags |= hasPrevErrs
Expand Down Expand Up @@ -75,7 +76,9 @@ func (s *multiStageTestStep) runSteps(
Output: err.Error(),
}
}
s.subLock.Lock()
s.subTests = append(s.subTests, testCase)
s.subLock.Unlock()
logrus.Infof("Step phase %s %s after %s.", phase, verb, duration.Truncate(time.Second))

return err
Expand Down