diff --git a/config/application.yml.example b/config/application.yml.example index f0004ec7..e8e7627c 100644 --- a/config/application.yml.example +++ b/config/application.yml.example @@ -40,6 +40,12 @@ oops: - "*/__MACOSX/*" - .DS_Store - "*/.DS_Store" + # Post-deploy health verification. When enabled, a deploy enters VERIFYING after the StatefulSet is + # applied and is only marked SUCCEEDED once the rollout is ready (catches ImagePullBackOff / not-ready + # pods). Set enabled=false to revert to marking SUCCEEDED immediately after apply. + health: + enabled: true + timeout: 5m pod-filesystem: max-download-size-bytes: 52428800 ingress: diff --git a/docker/application.yml.example b/docker/application.yml.example index 45cf8b55..40b97255 100644 --- a/docker/application.yml.example +++ b/docker/application.yml.example @@ -52,6 +52,12 @@ oops: - "*/__MACOSX/*" - .DS_Store - "*/.DS_Store" + # Post-deploy health verification. When enabled, a deploy enters VERIFYING after the StatefulSet is + # applied and is only marked SUCCEEDED once the rollout is ready (catches ImagePullBackOff / not-ready + # pods). Set enabled=false to revert to marking SUCCEEDED immediately after apply. + health: + enabled: true + timeout: 5m pod-filesystem: max-download-size-bytes: 52428800 feishu: diff --git a/src/main/java/com/github/wellch4n/oops/application/dto/DeploymentHealth.java b/src/main/java/com/github/wellch4n/oops/application/dto/DeploymentHealth.java new file mode 100644 index 00000000..2b74f86e --- /dev/null +++ b/src/main/java/com/github/wellch4n/oops/application/dto/DeploymentHealth.java @@ -0,0 +1,19 @@ +package com.github.wellch4n.oops.application.dto; + +/** + * Post-deploy health snapshot of an application's StatefulSet, used to drive the VERIFYING -> SUCCEEDED/ERROR + * transition. {@code rolloutComplete} means the new revision is fully ready; {@code failureReason} (when present) + * carries the first fatal pod condition (e.g. ImagePullBackOff) so verification can fail fast without waiting for + * the timeout. + */ +public record DeploymentHealth( + boolean workloadMissing, + boolean rolloutComplete, + Integer desiredReplicas, + Integer readyReplicas, + String failureReason +) { + public boolean hasFailure() { + return failureReason != null && !failureReason.isBlank(); + } +} diff --git a/src/main/java/com/github/wellch4n/oops/application/event/PipelineNotificationListener.java b/src/main/java/com/github/wellch4n/oops/application/event/PipelineNotificationListener.java index db05964d..ed739ee3 100644 --- a/src/main/java/com/github/wellch4n/oops/application/event/PipelineNotificationListener.java +++ b/src/main/java/com/github/wellch4n/oops/application/event/PipelineNotificationListener.java @@ -76,6 +76,7 @@ private String resolveTitle(PipelineNotificationType type) { case CREATED -> "发布任务已创建"; case BUILD_SUCCEEDED -> "构建成功"; case DEPLOYING -> "开始部署"; + case VERIFYING -> "验证部署中"; case SUCCEEDED -> "发布成功"; case FAILED -> "发布失败"; case STOPPED -> "发布已停止"; @@ -88,7 +89,7 @@ private ExternalMessageLevel resolveLevel(PipelineNotificationType type) { case FAILED -> ExternalMessageLevel.ERROR; case BUILD_SUCCEEDED -> ExternalMessageLevel.WARNING; case STOPPED -> ExternalMessageLevel.NEUTRAL; - case CREATED, DEPLOYING -> ExternalMessageLevel.INFO; + case CREATED, DEPLOYING, VERIFYING -> ExternalMessageLevel.INFO; }; } } diff --git a/src/main/java/com/github/wellch4n/oops/application/event/PipelineNotificationType.java b/src/main/java/com/github/wellch4n/oops/application/event/PipelineNotificationType.java index f9c91192..24ee9e8e 100644 --- a/src/main/java/com/github/wellch4n/oops/application/event/PipelineNotificationType.java +++ b/src/main/java/com/github/wellch4n/oops/application/event/PipelineNotificationType.java @@ -4,6 +4,7 @@ public enum PipelineNotificationType { CREATED, BUILD_SUCCEEDED, DEPLOYING, + VERIFYING, SUCCEEDED, FAILED, STOPPED diff --git a/src/main/java/com/github/wellch4n/oops/application/port/ApplicationRuntimeGateway.java b/src/main/java/com/github/wellch4n/oops/application/port/ApplicationRuntimeGateway.java index 02432bd8..666f6f30 100644 --- a/src/main/java/com/github/wellch4n/oops/application/port/ApplicationRuntimeGateway.java +++ b/src/main/java/com/github/wellch4n/oops/application/port/ApplicationRuntimeGateway.java @@ -3,6 +3,7 @@ import com.github.wellch4n.oops.domain.application.ApplicationRuntimeSpec; import com.github.wellch4n.oops.domain.environment.Environment; import com.github.wellch4n.oops.application.dto.ApplicationPodStatusView; +import com.github.wellch4n.oops.application.dto.DeploymentHealth; import java.util.List; import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; @@ -27,4 +28,11 @@ void applyRuntimeSpec(Environment environment, * if the workload does not exist. Used to highlight which pipeline's artifact is currently live. */ String findCurrentImage(Environment environment, String namespace, String applicationName); + + /** + * Post-deploy health snapshot: whether the StatefulSet rollout has converged onto the new revision and + * whether any pod is in a fatal waiting state (ImagePullBackOff / ErrImagePull / CrashLoopBackOff). + * Used by the scan job to drive the VERIFYING status to SUCCEEDED or ERROR. + */ + DeploymentHealth getDeploymentHealth(Environment environment, String namespace, String applicationName); } diff --git a/src/main/java/com/github/wellch4n/oops/application/port/repository/PipelineRepository.java b/src/main/java/com/github/wellch4n/oops/application/port/repository/PipelineRepository.java index 847ccf64..757c70b3 100644 --- a/src/main/java/com/github/wellch4n/oops/application/port/repository/PipelineRepository.java +++ b/src/main/java/com/github/wellch4n/oops/application/port/repository/PipelineRepository.java @@ -2,6 +2,7 @@ import com.github.wellch4n.oops.domain.delivery.Pipeline; import com.github.wellch4n.oops.domain.shared.PipelineStatus; +import java.time.LocalDateTime; import java.util.List; public interface PipelineRepository { @@ -33,5 +34,7 @@ boolean existsByNamespaceAndApplicationNameAndStatusIn( int updateStatusAndMessageIfMatch(String id, PipelineStatus expected, PipelineStatus target, String message); + int updateStatusAndDeadlineIfMatch(String id, PipelineStatus expected, PipelineStatus target, LocalDateTime deadline); + List query(String namespace, String applicationName); } diff --git a/src/main/java/com/github/wellch4n/oops/application/service/DeploymentService.java b/src/main/java/com/github/wellch4n/oops/application/service/DeploymentService.java index 402e459f..9e9f0d41 100644 --- a/src/main/java/com/github/wellch4n/oops/application/service/DeploymentService.java +++ b/src/main/java/com/github/wellch4n/oops/application/service/DeploymentService.java @@ -13,13 +13,11 @@ import com.github.wellch4n.oops.domain.shared.ApplicationSourceType; import com.github.wellch4n.oops.application.event.PipelineNotificationEvent; import com.github.wellch4n.oops.application.event.PipelineNotificationType; -import com.github.wellch4n.oops.domain.shared.PipelineStatus; import com.github.wellch4n.oops.shared.exception.BizException; import com.github.wellch4n.oops.application.dto.DeployCommand; import com.github.wellch4n.oops.application.dto.DeployStrategyParam; import com.github.wellch4n.oops.application.dto.GitDeployStrategyParam; import com.github.wellch4n.oops.application.dto.ZipDeployStrategyParam; -import java.util.List; import org.springframework.context.ApplicationEventPublisher; import org.springframework.stereotype.Service; @@ -66,7 +64,7 @@ public String deployApplication(String namespace, throw new BizException("Deploy strategy is required"); } deploymentConcurrencyPolicy.ensureNoActivePipeline(pipelineRepository.existsByNamespaceAndApplicationNameAndStatusIn( - namespace, applicationName, List.of(PipelineStatus.RUNNING, PipelineStatus.DEPLOYING) + namespace, applicationName, deploymentConcurrencyPolicy.activePipelineStatuses() )); Environment environment = requireEnvironment(request.environment()); diff --git a/src/main/java/com/github/wellch4n/oops/application/service/PipelineService.java b/src/main/java/com/github/wellch4n/oops/application/service/PipelineService.java index cd6ccb68..cb89e351 100644 --- a/src/main/java/com/github/wellch4n/oops/application/service/PipelineService.java +++ b/src/main/java/com/github/wellch4n/oops/application/service/PipelineService.java @@ -20,6 +20,8 @@ import com.github.wellch4n.oops.application.dto.LastSuccessfulPipelineDto; import com.github.wellch4n.oops.application.dto.Page; import com.github.wellch4n.oops.application.dto.PipelineDto; +import com.github.wellch4n.oops.infrastructure.config.PipelineHealthProperties; +import java.time.LocalDateTime; import java.util.*; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; @@ -46,6 +48,7 @@ public class PipelineService { private final PipelineLogGateway pipelineLogGateway; private final PipelineStateMachine pipelineStateMachine; private final DeploymentConcurrencyPolicy deploymentConcurrencyPolicy; + private final PipelineHealthProperties pipelineHealthProperties; public PipelineService(PipelineRepository pipelineRepository, EnvironmentService environmentService, ApplicationRepository applicationRepository, @@ -55,7 +58,8 @@ public PipelineService(PipelineRepository pipelineRepository, EnvironmentService PipelineJobGateway pipelineJobGateway, PipelineLogGateway pipelineLogGateway, PipelineStateMachine pipelineStateMachine, - DeploymentConcurrencyPolicy deploymentConcurrencyPolicy) { + DeploymentConcurrencyPolicy deploymentConcurrencyPolicy, + PipelineHealthProperties pipelineHealthProperties) { this.pipelineRepository = pipelineRepository; this.environmentService = environmentService; this.applicationRepository = applicationRepository; @@ -66,6 +70,7 @@ public PipelineService(PipelineRepository pipelineRepository, EnvironmentService this.pipelineLogGateway = pipelineLogGateway; this.pipelineStateMachine = pipelineStateMachine; this.deploymentConcurrencyPolicy = deploymentConcurrencyPolicy; + this.pipelineHealthProperties = pipelineHealthProperties; } public Page getPipelines(String namespace, String applicationName, String environment, Integer page, Integer size) { @@ -145,7 +150,7 @@ public Boolean deployPipeline(String namespace, String applicationName, String i } pipelineStateMachine.ensureManualDeployable(pipeline.getStatus()); deploymentConcurrencyPolicy.ensureNoActivePipeline(pipelineRepository.existsByNamespaceAndApplicationNameAndStatusIn( - namespace, applicationName, List.of(PipelineStatus.RUNNING, PipelineStatus.DEPLOYING) + namespace, applicationName, deploymentConcurrencyPolicy.activePipelineStatuses() )); pipelineStateMachine.ensureCanTransition(PipelineStatus.BUILD_SUCCEEDED, PipelineStatus.DEPLOYING); @@ -171,12 +176,7 @@ public Boolean deployPipeline(String namespace, String applicationName, String i artifactDeploymentExecutor.deploy(pipeline, application, environment, runtimeSpec, healthCheck, serviceConfig); - pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED); - pipelineRepository.updateStatusIfMatch(pipeline.getId(), PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED); - pipeline.markSucceeded(); - eventPublisher.publishEvent(PipelineNotificationEvent.of( - pipeline, PipelineNotificationType.SUCCEEDED, "应用已经成功发布。" - )); + completeDeployPhase(pipeline, "正在验证新版本是否就绪…", "应用已经成功发布。"); } catch (Exception e) { pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.ERROR); String message = StringUtils.defaultIfBlank(e.getMessage(), "发布任务执行失败,请查看日志。"); @@ -205,7 +205,7 @@ public String rollback(String namespace, String applicationName, String targetPi } deploymentConcurrencyPolicy.ensureNoActivePipeline(pipelineRepository.existsByNamespaceAndApplicationNameAndStatusIn( - namespace, applicationName, List.of(PipelineStatus.RUNNING, PipelineStatus.DEPLOYING) + namespace, applicationName, deploymentConcurrencyPolicy.activePipelineStatuses() )); Pipeline rollbackPipeline = pipelineRepository.save(Pipeline.rollback(source, operatorUserId)); @@ -236,12 +236,7 @@ public String rollback(String namespace, String applicationName, String targetPi artifactDeploymentExecutor.deploy(rollbackPipeline, application, environment, runtimeSpec, healthCheck, serviceConfig); - pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED); - pipelineRepository.updateStatusIfMatch(rollbackPipeline.getId(), PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED); - rollbackPipeline.markSucceeded(); - eventPublisher.publishEvent(PipelineNotificationEvent.of( - rollbackPipeline, PipelineNotificationType.SUCCEEDED, "回滚已成功。" - )); + completeDeployPhase(rollbackPipeline, "正在验证回滚版本是否就绪…", "回滚已成功。"); } catch (Exception e) { pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.ERROR); String message = StringUtils.defaultIfBlank(e.getMessage(), "回滚任务执行失败,请查看日志。"); @@ -285,6 +280,31 @@ public Boolean stopPipeline(String namespace, String applicationName, String id) return true; } + /** + * Completes the deploy phase after the artifact has been applied. When health verification is enabled, + * the pipeline moves to VERIFYING with a deadline and the scan job later decides SUCCEEDED/ERROR. When + * disabled, it is marked SUCCEEDED immediately (legacy behavior). + */ + private void completeDeployPhase(Pipeline pipeline, String verifyingDetail, String succeededDetail) { + if (pipelineHealthProperties.isEnabled()) { + LocalDateTime deadline = LocalDateTime.now().plus(pipelineHealthProperties.getTimeout()); + pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.VERIFYING); + pipelineRepository.updateStatusAndDeadlineIfMatch( + pipeline.getId(), PipelineStatus.DEPLOYING, PipelineStatus.VERIFYING, deadline); + pipeline.markVerifying(deadline); + eventPublisher.publishEvent(PipelineNotificationEvent.of( + pipeline, PipelineNotificationType.VERIFYING, verifyingDetail + )); + return; + } + pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED); + pipelineRepository.updateStatusIfMatch(pipeline.getId(), PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED); + pipeline.markSucceeded(); + eventPublisher.publishEvent(PipelineNotificationEvent.of( + pipeline, PipelineNotificationType.SUCCEEDED, succeededDetail + )); + } + private Environment requireEnvironment(String environmentName) { Environment environment = environmentService.getEnvironment(environmentName); if (environment == null) { diff --git a/src/main/java/com/github/wellch4n/oops/domain/delivery/DeploymentConcurrencyPolicy.java b/src/main/java/com/github/wellch4n/oops/domain/delivery/DeploymentConcurrencyPolicy.java index 222bafa5..e4ab4382 100644 --- a/src/main/java/com/github/wellch4n/oops/domain/delivery/DeploymentConcurrencyPolicy.java +++ b/src/main/java/com/github/wellch4n/oops/domain/delivery/DeploymentConcurrencyPolicy.java @@ -1,9 +1,21 @@ package com.github.wellch4n.oops.domain.delivery; +import com.github.wellch4n.oops.domain.shared.PipelineStatus; import com.github.wellch4n.oops.shared.exception.BizException; +import java.util.List; public class DeploymentConcurrencyPolicy { + private static final List ACTIVE_PIPELINE_STATUSES = List.of( + PipelineStatus.RUNNING, + PipelineStatus.DEPLOYING, + PipelineStatus.VERIFYING + ); + + public List activePipelineStatuses() { + return ACTIVE_PIPELINE_STATUSES; + } + public void ensureNoActivePipeline(boolean activePipelineExists) { if (activePipelineExists) { throw new BizException("Application is being deployed"); diff --git a/src/main/java/com/github/wellch4n/oops/domain/delivery/Pipeline.java b/src/main/java/com/github/wellch4n/oops/domain/delivery/Pipeline.java index b39b3d52..9481fa5f 100644 --- a/src/main/java/com/github/wellch4n/oops/domain/delivery/Pipeline.java +++ b/src/main/java/com/github/wellch4n/oops/domain/delivery/Pipeline.java @@ -5,6 +5,7 @@ import com.github.wellch4n.oops.domain.shared.DeployMode; import com.github.wellch4n.oops.domain.shared.PipelineStatus; import com.github.wellch4n.oops.domain.shared.PipelineTriggerType; +import java.time.LocalDateTime; import lombok.Data; import lombok.EqualsAndHashCode; @@ -26,6 +27,7 @@ public class Pipeline extends BaseAggregateRoot { private String message; private PipelineTriggerType triggerType; private String rollbackFromPipelineId; + private LocalDateTime verifyDeadline; public static Pipeline initialize( String namespace, @@ -86,6 +88,19 @@ public void markDeploying() { transitionTo(PipelineStatus.DEPLOYING); } + /** + * Enters post-deploy health verification. The artifact has been applied to the cluster but the rollout + * may not yet be ready; {@code verifyDeadline} bounds how long the scan job will wait before failing. + */ + public void markVerifying(LocalDateTime verifyDeadline) { + this.verifyDeadline = verifyDeadline; + transitionTo(PipelineStatus.VERIFYING); + } + + public boolean isVerifyTimedOut(LocalDateTime now) { + return verifyDeadline != null && now.isAfter(verifyDeadline); + } + public void markSucceeded() { transitionTo(PipelineStatus.SUCCEEDED); } diff --git a/src/main/java/com/github/wellch4n/oops/domain/delivery/PipelineStateMachine.java b/src/main/java/com/github/wellch4n/oops/domain/delivery/PipelineStateMachine.java index 6cd31771..8dd26ed2 100644 --- a/src/main/java/com/github/wellch4n/oops/domain/delivery/PipelineStateMachine.java +++ b/src/main/java/com/github/wellch4n/oops/domain/delivery/PipelineStateMachine.java @@ -32,10 +32,15 @@ public class PipelineStateMachine { PipelineStatus.STOPPED )); ALLOWED_TRANSITIONS.put(PipelineStatus.DEPLOYING, EnumSet.of( + PipelineStatus.VERIFYING, PipelineStatus.SUCCEEDED, PipelineStatus.ERROR, PipelineStatus.STOPPED )); + ALLOWED_TRANSITIONS.put(PipelineStatus.VERIFYING, EnumSet.of( + PipelineStatus.SUCCEEDED, + PipelineStatus.ERROR + )); ALLOWED_TRANSITIONS.put(PipelineStatus.STOPPED, EnumSet.noneOf(PipelineStatus.class)); ALLOWED_TRANSITIONS.put(PipelineStatus.SUCCEEDED, EnumSet.noneOf(PipelineStatus.class)); ALLOWED_TRANSITIONS.put(PipelineStatus.ERROR, EnumSet.noneOf(PipelineStatus.class)); diff --git a/src/main/java/com/github/wellch4n/oops/domain/shared/PipelineStatus.java b/src/main/java/com/github/wellch4n/oops/domain/shared/PipelineStatus.java index 369d1869..ca5c7fae 100644 --- a/src/main/java/com/github/wellch4n/oops/domain/shared/PipelineStatus.java +++ b/src/main/java/com/github/wellch4n/oops/domain/shared/PipelineStatus.java @@ -5,7 +5,7 @@ * @date 2025/7/5 */ public enum PipelineStatus { - INITIALIZED, RUNNING, BUILD_SUCCEEDED, DEPLOYING, + INITIALIZED, RUNNING, BUILD_SUCCEEDED, DEPLOYING, VERIFYING, STOPPED, SUCCEEDED, ERROR } diff --git a/src/main/java/com/github/wellch4n/oops/infrastructure/config/PipelineHealthProperties.java b/src/main/java/com/github/wellch4n/oops/infrastructure/config/PipelineHealthProperties.java new file mode 100644 index 00000000..0da5a94e --- /dev/null +++ b/src/main/java/com/github/wellch4n/oops/infrastructure/config/PipelineHealthProperties.java @@ -0,0 +1,22 @@ +package com.github.wellch4n.oops.infrastructure.config; + +import java.time.Duration; +import lombok.Data; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.context.annotation.Configuration; + +/** + * Post-deploy health verification settings. When {@code enabled}, a deploy transitions to VERIFYING after the + * StatefulSet is applied and is only marked SUCCEEDED once the rollout is ready; {@code timeout} bounds how long + * verification waits before failing. When disabled, a deploy is marked SUCCEEDED immediately after apply + * (legacy behavior). + */ +@Data +@Configuration +@ConfigurationProperties(prefix = "oops.pipeline.health") +public class PipelineHealthProperties { + + private boolean enabled = true; + + private Duration timeout = Duration.ofMinutes(5); +} diff --git a/src/main/java/com/github/wellch4n/oops/infrastructure/kubernetes/KubernetesApplicationRuntimeGateway.java b/src/main/java/com/github/wellch4n/oops/infrastructure/kubernetes/KubernetesApplicationRuntimeGateway.java index 24ba3776..b022f232 100644 --- a/src/main/java/com/github/wellch4n/oops/infrastructure/kubernetes/KubernetesApplicationRuntimeGateway.java +++ b/src/main/java/com/github/wellch4n/oops/infrastructure/kubernetes/KubernetesApplicationRuntimeGateway.java @@ -6,6 +6,7 @@ import com.github.wellch4n.oops.domain.application.ApplicationRuntimeSpec; import com.github.wellch4n.oops.domain.environment.Environment; import com.github.wellch4n.oops.application.dto.ApplicationPodStatusView; +import com.github.wellch4n.oops.application.dto.DeploymentHealth; import io.fabric8.kubernetes.api.model.Pod; import io.fabric8.kubernetes.api.model.Quantity; import io.fabric8.kubernetes.api.model.ResourceRequirementsBuilder; @@ -279,6 +280,62 @@ public String findCurrentImage(Environment environment, String namespace, String .orElseGet(() -> containers.isEmpty() ? null : containers.getFirst().getImage()); } + private static final java.util.Set FATAL_WAITING_REASONS = + java.util.Set.of("ImagePullBackOff", "ErrImagePull", "CrashLoopBackOff"); + + @Override + public DeploymentHealth getDeploymentHealth(Environment environment, String namespace, String applicationName) { + var client = clientPool.get(environment.getKubernetesApiServer()); + var statefulSet = client.apps().statefulSets() + .inNamespace(namespace) + .withName(applicationName) + .get(); + if (statefulSet == null) { + return new DeploymentHealth(true, false, null, null, null); + } + + Integer desiredReplicas = statefulSet.getSpec() != null ? statefulSet.getSpec().getReplicas() : null; + var status = statefulSet.getStatus(); + Integer readyReplicas = status != null ? status.getReadyReplicas() : null; + Integer updatedReplicas = status != null ? status.getUpdatedReplicas() : null; + Long generation = statefulSet.getMetadata() != null ? statefulSet.getMetadata().getGeneration() : null; + Long observedGeneration = status != null ? status.getObservedGeneration() : null; + + int desired = desiredReplicas == null ? 0 : desiredReplicas; + int ready = readyReplicas == null ? 0 : readyReplicas; + int updated = updatedReplicas == null ? 0 : updatedReplicas; + boolean generationObserved = generation == null + || (observedGeneration != null && observedGeneration >= generation); + boolean rolloutComplete = generationObserved && updated == desired && ready == desired; + + String failureReason = findFatalPodWaitingReason(client, namespace, applicationName); + + return new DeploymentHealth(false, rolloutComplete, desiredReplicas, readyReplicas, failureReason); + } + + private String findFatalPodWaitingReason(KubernetesClient client, String namespace, String applicationName) { + var pods = client.pods() + .inNamespace(namespace) + .withLabel("oops.type", OopsTypes.APPLICATION.name()) + .withLabel("oops.app.name", applicationName) + .list(); + for (Pod pod : pods.getItems()) { + if (pod.getStatus() == null || pod.getStatus().getContainerStatuses() == null) { + continue; + } + for (var containerStatus : pod.getStatus().getContainerStatuses()) { + var state = containerStatus.getState(); + if (state != null && state.getWaiting() != null) { + String reason = state.getWaiting().getReason(); + if (reason != null && FATAL_WAITING_REASONS.contains(reason)) { + return reason + " (" + pod.getMetadata().getName() + ")"; + } + } + } + } + return null; + } + private boolean hasResource(ApplicationRuntimeSpec.EnvironmentConfig runtimeSpec) { return StringUtils.isNotBlank(runtimeSpec.getCpuRequest()) || StringUtils.isNotBlank(runtimeSpec.getCpuLimit()) diff --git a/src/main/java/com/github/wellch4n/oops/infrastructure/kubernetes/task/processor/StatefulSetProcessor.java b/src/main/java/com/github/wellch4n/oops/infrastructure/kubernetes/task/processor/StatefulSetProcessor.java index 771ed8ab..65b9af17 100644 --- a/src/main/java/com/github/wellch4n/oops/infrastructure/kubernetes/task/processor/StatefulSetProcessor.java +++ b/src/main/java/com/github/wellch4n/oops/infrastructure/kubernetes/task/processor/StatefulSetProcessor.java @@ -65,6 +65,19 @@ public void process(DeployContext ctx) { .withTimeoutSeconds(healthCheck.effectiveTimeoutSeconds()) .withFailureThreshold(healthCheck.effectiveFailureThreshold()) .endLivenessProbe(); + // Readiness probe from the same config: it gates Service traffic and, crucially, drives + // readyReplicas — which is what post-deploy verification (VERIFYING) keys off to decide a + // rollout is actually healthy rather than merely started. + containerBuilder.withNewReadinessProbe() + .withNewHttpGet() + .withPath(healthCheck.normalizedPath()) + .withNewPort(appPort) + .endHttpGet() + .withInitialDelaySeconds(healthCheck.effectiveInitialDelaySeconds()) + .withPeriodSeconds(healthCheck.effectivePeriodSeconds()) + .withTimeoutSeconds(healthCheck.effectiveTimeoutSeconds()) + .withFailureThreshold(healthCheck.effectiveFailureThreshold()) + .endReadinessProbe(); } StatefulSet statefulSet = new StatefulSetBuilder() diff --git a/src/main/java/com/github/wellch4n/oops/infrastructure/persistence/jpa/Pipeline.java b/src/main/java/com/github/wellch4n/oops/infrastructure/persistence/jpa/Pipeline.java index 31c7517e..039c07af 100644 --- a/src/main/java/com/github/wellch4n/oops/infrastructure/persistence/jpa/Pipeline.java +++ b/src/main/java/com/github/wellch4n/oops/infrastructure/persistence/jpa/Pipeline.java @@ -7,6 +7,7 @@ import jakarta.persistence.Entity; import jakarta.persistence.EnumType; import jakarta.persistence.Enumerated; +import java.time.LocalDateTime; import lombok.Data; import lombok.EqualsAndHashCode; @@ -48,6 +49,8 @@ public class Pipeline extends BaseDataObject { private String rollbackFromPipelineId; + private LocalDateTime verifyDeadline; + public String getName() { return String.format("%s-pipeline-%s", applicationName, getId()); } diff --git a/src/main/java/com/github/wellch4n/oops/infrastructure/persistence/jpa/PipelinePersistenceAdapter.java b/src/main/java/com/github/wellch4n/oops/infrastructure/persistence/jpa/PipelinePersistenceAdapter.java index efabfeef..713a5f54 100644 --- a/src/main/java/com/github/wellch4n/oops/infrastructure/persistence/jpa/PipelinePersistenceAdapter.java +++ b/src/main/java/com/github/wellch4n/oops/infrastructure/persistence/jpa/PipelinePersistenceAdapter.java @@ -97,6 +97,11 @@ public int updateStatusAndMessageIfMatch(String id, PipelineStatus expected, Pip return pipelineRepository.updateStatusAndMessageIfMatch(id, expected, target, message); } + @Override + public int updateStatusAndDeadlineIfMatch(String id, PipelineStatus expected, PipelineStatus target, java.time.LocalDateTime deadline) { + return pipelineRepository.updateStatusAndDeadlineIfMatch(id, expected, target, deadline); + } + @Override public List query(String namespace, String applicationName) { return PersistenceMapper.convertList(pipelineRepository.findAll((root, query, criteriaBuilder) -> { diff --git a/src/main/java/com/github/wellch4n/oops/infrastructure/persistence/jpa/PipelineRepository.java b/src/main/java/com/github/wellch4n/oops/infrastructure/persistence/jpa/PipelineRepository.java index f088075e..a2d0c61a 100644 --- a/src/main/java/com/github/wellch4n/oops/infrastructure/persistence/jpa/PipelineRepository.java +++ b/src/main/java/com/github/wellch4n/oops/infrastructure/persistence/jpa/PipelineRepository.java @@ -58,4 +58,12 @@ int updateStatusAndMessageIfMatch(@Param("id") String id, @Param("expected") PipelineStatus expected, @Param("target") PipelineStatus target, @Param("message") String message); + + @Modifying + @Transactional + @Query("update Pipeline p set p.status = :target, p.verifyDeadline = :deadline where p.id = :id and p.status = :expected") + int updateStatusAndDeadlineIfMatch(@Param("id") String id, + @Param("expected") PipelineStatus expected, + @Param("target") PipelineStatus target, + @Param("deadline") java.time.LocalDateTime deadline); } diff --git a/src/main/java/com/github/wellch4n/oops/infrastructure/scheduler/PipelineInstanceScanJob.java b/src/main/java/com/github/wellch4n/oops/infrastructure/scheduler/PipelineInstanceScanJob.java index 52483098..14a0b7fc 100644 --- a/src/main/java/com/github/wellch4n/oops/infrastructure/scheduler/PipelineInstanceScanJob.java +++ b/src/main/java/com/github/wellch4n/oops/infrastructure/scheduler/PipelineInstanceScanJob.java @@ -1,10 +1,13 @@ package com.github.wellch4n.oops.infrastructure.scheduler; +import com.github.wellch4n.oops.application.port.ApplicationRuntimeGateway; import com.github.wellch4n.oops.application.port.ArtifactDeploymentExecutor; import com.github.wellch4n.oops.application.port.PipelineJobGateway; import com.github.wellch4n.oops.application.port.PipelineJobStatus; import com.github.wellch4n.oops.application.port.repository.ApplicationRepository; import com.github.wellch4n.oops.application.port.repository.PipelineRepository; +import com.github.wellch4n.oops.application.dto.DeploymentHealth; +import com.github.wellch4n.oops.infrastructure.config.PipelineHealthProperties; import com.github.wellch4n.oops.domain.application.Application; import com.github.wellch4n.oops.domain.application.ApplicationRuntimeSpec; import com.github.wellch4n.oops.domain.delivery.Pipeline; @@ -15,6 +18,7 @@ import com.github.wellch4n.oops.domain.shared.DeployMode; import com.github.wellch4n.oops.domain.shared.PipelineStatus; import com.github.wellch4n.oops.application.service.EnvironmentService; +import java.time.LocalDateTime; import java.util.List; import org.apache.commons.lang3.StringUtils; import org.springframework.context.ApplicationEventPublisher; @@ -36,13 +40,17 @@ public class PipelineInstanceScanJob { private final PipelineJobGateway pipelineJobGateway; private final ArtifactDeploymentExecutor artifactDeploymentExecutor; private final PipelineStateMachine pipelineStateMachine; + private final ApplicationRuntimeGateway applicationRuntimeGateway; + private final PipelineHealthProperties pipelineHealthProperties; public PipelineInstanceScanJob(ApplicationRepository applicationRepository, PipelineRepository pipelineRepository, EnvironmentService environmentService, ApplicationEventPublisher eventPublisher, PipelineJobGateway pipelineJobGateway, ArtifactDeploymentExecutor artifactDeploymentExecutor, - PipelineStateMachine pipelineStateMachine) { + PipelineStateMachine pipelineStateMachine, + ApplicationRuntimeGateway applicationRuntimeGateway, + PipelineHealthProperties pipelineHealthProperties) { this.applicationRepository = applicationRepository; this.pipelineRepository = pipelineRepository; this.environmentService = environmentService; @@ -50,6 +58,8 @@ public PipelineInstanceScanJob(ApplicationRepository applicationRepository, this.pipelineJobGateway = pipelineJobGateway; this.artifactDeploymentExecutor = artifactDeploymentExecutor; this.pipelineStateMachine = pipelineStateMachine; + this.applicationRuntimeGateway = applicationRuntimeGateway; + this.pipelineHealthProperties = pipelineHealthProperties; } @Scheduled(fixedRate = 5000) @@ -111,14 +121,7 @@ public void scan() { applicationRuntimeSpecEnvironmentConfig, healthCheck, applicationServiceConfig ); - pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED); - pipelineRepository.updateStatusIfMatch( - pipeline.getId(), PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED - ); - pipeline.markSucceeded(); - eventPublisher.publishEvent(PipelineNotificationEvent.of( - pipeline, PipelineNotificationType.SUCCEEDED, "应用已经成功发布。" - )); + completeDeployPhase(pipeline); } else if (jobStatus == PipelineJobStatus.FAILED) { System.err.println("Error processing succeeded pipeline " + pipeline.getId()); pipelineStateMachine.ensureCanTransition(PipelineStatus.RUNNING, PipelineStatus.ERROR); @@ -150,6 +153,91 @@ public void scan() { } } } + + scanVerifyingPipelines(); + } + + /** + * Polls pipelines awaiting post-deploy health verification. Each VERIFYING pipeline is checked against the + * live StatefulSet rollout: a converged rollout marks it SUCCEEDED, a fatal pod state or an exceeded deadline + * marks it ERROR, and anything in between leaves it VERIFYING for the next tick. + */ + private void scanVerifyingPipelines() { + List verifyingPipelines = pipelineRepository.findAllByStatus(PipelineStatus.VERIFYING); + for (Pipeline pipeline : verifyingPipelines) { + try { + Environment environment = environmentService.getEnvironment(pipeline.getEnvironment()); + if (environment == null) { + throw new IllegalStateException("Environment not found: " + pipeline.getEnvironment()); + } + + DeploymentHealth health = applicationRuntimeGateway.getDeploymentHealth( + environment, pipeline.getNamespace(), pipeline.getApplicationName()); + + if (health.hasFailure()) { + failVerification(pipeline, "新版本部署失败:" + health.failureReason()); + } else if (!health.workloadMissing() && health.rolloutComplete()) { + succeedVerification(pipeline); + } else if (pipeline.isVerifyTimedOut(LocalDateTime.now())) { + failVerification(pipeline, "健康验证超时,新版本未在规定时间内就绪。"); + } + // otherwise: still rolling out, leave VERIFYING for the next tick + } catch (Exception e) { + System.out.println("Error verifying pipeline instance: " + e.getMessage()); + if (pipeline.isVerifyTimedOut(LocalDateTime.now())) { + failVerification(pipeline, "健康验证超时,新版本未在规定时间内就绪。"); + } + } + } + } + + private void succeedVerification(Pipeline pipeline) { + pipelineStateMachine.ensureCanTransition(PipelineStatus.VERIFYING, PipelineStatus.SUCCEEDED); + int updated = pipelineRepository.updateStatusIfMatch( + pipeline.getId(), PipelineStatus.VERIFYING, PipelineStatus.SUCCEEDED); + if (updated > 0) { + pipeline.markSucceeded(); + eventPublisher.publishEvent(PipelineNotificationEvent.of( + pipeline, PipelineNotificationType.SUCCEEDED, "应用已经成功发布。" + )); + } + } + + private void failVerification(Pipeline pipeline, String message) { + pipelineStateMachine.ensureCanTransition(PipelineStatus.VERIFYING, PipelineStatus.ERROR); + int updated = pipelineRepository.updateStatusAndMessageIfMatch( + pipeline.getId(), PipelineStatus.VERIFYING, PipelineStatus.ERROR, message); + if (updated > 0) { + pipeline.markFailed(message); + eventPublisher.publishEvent(PipelineNotificationEvent.of( + pipeline, PipelineNotificationType.FAILED, message + )); + } + } + + /** + * Completes the deploy phase after the artifact is applied: moves to VERIFYING (with a deadline) when health + * verification is enabled, otherwise marks SUCCEEDED immediately (legacy behavior). + */ + private void completeDeployPhase(Pipeline pipeline) { + if (pipelineHealthProperties.isEnabled()) { + LocalDateTime deadline = LocalDateTime.now().plus(pipelineHealthProperties.getTimeout()); + pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.VERIFYING); + pipelineRepository.updateStatusAndDeadlineIfMatch( + pipeline.getId(), PipelineStatus.DEPLOYING, PipelineStatus.VERIFYING, deadline); + pipeline.markVerifying(deadline); + eventPublisher.publishEvent(PipelineNotificationEvent.of( + pipeline, PipelineNotificationType.VERIFYING, "正在验证新版本是否就绪…" + )); + return; + } + pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED); + pipelineRepository.updateStatusIfMatch( + pipeline.getId(), PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED); + pipeline.markSucceeded(); + eventPublisher.publishEvent(PipelineNotificationEvent.of( + pipeline, PipelineNotificationType.SUCCEEDED, "应用已经成功发布。" + )); } private ApplicationRuntimeSpec.EnvironmentConfig resolveEnvironmentConfig(Application application, String environmentName) { diff --git a/src/main/resources/db/migration/V11__add_pipeline_verify_deadline.sql b/src/main/resources/db/migration/V11__add_pipeline_verify_deadline.sql new file mode 100644 index 00000000..9bc91c1c --- /dev/null +++ b/src/main/resources/db/migration/V11__add_pipeline_verify_deadline.sql @@ -0,0 +1,2 @@ +ALTER TABLE `pipeline` + ADD COLUMN `verify_deadline` datetime DEFAULT NULL; diff --git a/src/test/java/com/github/wellch4n/oops/application/service/PipelineHealthVerificationTests.java b/src/test/java/com/github/wellch4n/oops/application/service/PipelineHealthVerificationTests.java new file mode 100644 index 00000000..f5acbc45 --- /dev/null +++ b/src/test/java/com/github/wellch4n/oops/application/service/PipelineHealthVerificationTests.java @@ -0,0 +1,122 @@ +package com.github.wellch4n.oops.application.service; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.github.wellch4n.oops.application.port.ArtifactDeploymentExecutor; +import com.github.wellch4n.oops.application.port.PipelineJobGateway; +import com.github.wellch4n.oops.application.port.PipelineLogGateway; +import com.github.wellch4n.oops.application.port.repository.ApplicationRepository; +import com.github.wellch4n.oops.application.port.repository.PipelineRepository; +import com.github.wellch4n.oops.domain.application.Application; +import com.github.wellch4n.oops.domain.delivery.DeploymentConcurrencyPolicy; +import com.github.wellch4n.oops.domain.delivery.Pipeline; +import com.github.wellch4n.oops.domain.delivery.PipelineStateMachine; +import com.github.wellch4n.oops.domain.environment.Environment; +import com.github.wellch4n.oops.domain.shared.PipelineStatus; +import com.github.wellch4n.oops.infrastructure.config.PipelineHealthProperties; +import java.time.Duration; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import org.springframework.context.ApplicationEventPublisher; + +/** + * Covers the post-deploy health verification path: with verification enabled a deploy moves to VERIFYING with a + * deadline instead of straight to SUCCEEDED. The VERIFYING -> SUCCEEDED/ERROR decisions themselves live in the + * scan job and are exercised in PipelineHealthVerificationScanTests-style setups; here we pin the service-side + * transition. + */ +class PipelineHealthVerificationTests { + + private static final String NAMESPACE = "default"; + private static final String APP_NAME = "demo"; + private static final String ENV = "prod"; + private static final String SOURCE_ID = "source-pipeline-id"; + private static final String NEW_ID = "new-rollback-id"; + + private PipelineRepository pipelineRepository; + private EnvironmentService environmentService; + private ApplicationRepository applicationRepository; + private ArtifactDeploymentExecutor artifactDeploymentExecutor; + private PipelineService pipelineService; + + @BeforeEach + void setUp() { + pipelineRepository = Mockito.mock(PipelineRepository.class); + environmentService = Mockito.mock(EnvironmentService.class); + applicationRepository = Mockito.mock(ApplicationRepository.class); + UserService userService = Mockito.mock(UserService.class); + ApplicationEventPublisher eventPublisher = Mockito.mock(ApplicationEventPublisher.class); + artifactDeploymentExecutor = Mockito.mock(ArtifactDeploymentExecutor.class); + PipelineJobGateway pipelineJobGateway = Mockito.mock(PipelineJobGateway.class); + PipelineLogGateway pipelineLogGateway = Mockito.mock(PipelineLogGateway.class); + + PipelineHealthProperties healthProperties = new PipelineHealthProperties(); + healthProperties.setEnabled(true); + healthProperties.setTimeout(Duration.ofMinutes(5)); + + pipelineService = new PipelineService( + pipelineRepository, + environmentService, + applicationRepository, + userService, + eventPublisher, + artifactDeploymentExecutor, + pipelineJobGateway, + pipelineLogGateway, + PipelineStateMachine.getInstance(), + new DeploymentConcurrencyPolicy(), + healthProperties + ); + } + + private Pipeline succeededSource() { + Pipeline source = new Pipeline(); + source.setId(SOURCE_ID); + source.setNamespace(NAMESPACE); + source.setApplicationName(APP_NAME); + source.setEnvironment(ENV); + source.setArtifact("registry.example.com/demo:v1"); + source.setStatus(PipelineStatus.SUCCEEDED); + return source; + } + + @Test + void rollbackEntersVerifyingWhenHealthEnabled() { + when(pipelineRepository.findByNamespaceAndApplicationNameAndId(NAMESPACE, APP_NAME, SOURCE_ID)) + .thenReturn(succeededSource()); + when(pipelineRepository.save(any(Pipeline.class))).thenAnswer(invocation -> { + Pipeline saved = invocation.getArgument(0); + saved.setId(NEW_ID); + return saved; + }); + when(pipelineRepository.existsByNamespaceAndApplicationNameAndStatusIn(eq(NAMESPACE), eq(APP_NAME), anyList())) + .thenReturn(false); + when(pipelineRepository.updateStatusIfMatch(eq(NEW_ID), eq(PipelineStatus.INITIALIZED), eq(PipelineStatus.DEPLOYING))) + .thenReturn(1); + + Environment environment = new Environment(); + environment.setName(ENV); + when(environmentService.getEnvironment(ENV)).thenReturn(environment); + + Application application = new Application(); + application.setName(APP_NAME); + application.setNamespace(NAMESPACE); + when(applicationRepository.findAggregate(NAMESPACE, APP_NAME)).thenReturn(application); + + String resultId = pipelineService.rollback(NAMESPACE, APP_NAME, SOURCE_ID, "operator-1"); + + assertEquals(NEW_ID, resultId); + // With health verification on, the deploy phase ends in VERIFYING (with a deadline), NOT SUCCEEDED. + verify(pipelineRepository).updateStatusAndDeadlineIfMatch( + eq(NEW_ID), eq(PipelineStatus.DEPLOYING), eq(PipelineStatus.VERIFYING), any()); + verify(pipelineRepository, never()).updateStatusIfMatch( + eq(NEW_ID), eq(PipelineStatus.DEPLOYING), eq(PipelineStatus.SUCCEEDED)); + } +} diff --git a/src/test/java/com/github/wellch4n/oops/application/service/PipelineRollbackTests.java b/src/test/java/com/github/wellch4n/oops/application/service/PipelineRollbackTests.java index 1b1c27a5..85175260 100644 --- a/src/test/java/com/github/wellch4n/oops/application/service/PipelineRollbackTests.java +++ b/src/test/java/com/github/wellch4n/oops/application/service/PipelineRollbackTests.java @@ -24,6 +24,7 @@ import com.github.wellch4n.oops.domain.environment.Environment; import com.github.wellch4n.oops.domain.shared.PipelineStatus; import com.github.wellch4n.oops.domain.shared.PipelineTriggerType; +import com.github.wellch4n.oops.infrastructure.config.PipelineHealthProperties; import com.github.wellch4n.oops.shared.exception.BizException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -57,6 +58,11 @@ void setUp() { PipelineJobGateway pipelineJobGateway = org.mockito.Mockito.mock(PipelineJobGateway.class); PipelineLogGateway pipelineLogGateway = org.mockito.Mockito.mock(PipelineLogGateway.class); + // Health verification disabled: rollback completes straight to SUCCEEDED, keeping these tests + // focused on rollback mechanics. The VERIFYING path is covered by PipelineHealthVerificationTests. + PipelineHealthProperties healthProperties = new PipelineHealthProperties(); + healthProperties.setEnabled(false); + pipelineService = new PipelineService( pipelineRepository, environmentService, @@ -67,7 +73,8 @@ void setUp() { pipelineJobGateway, pipelineLogGateway, PipelineStateMachine.getInstance(), - new DeploymentConcurrencyPolicy() + new DeploymentConcurrencyPolicy(), + healthProperties ); } diff --git a/src/test/java/com/github/wellch4n/oops/domain/delivery/DeploymentConcurrencyPolicyTests.java b/src/test/java/com/github/wellch4n/oops/domain/delivery/DeploymentConcurrencyPolicyTests.java new file mode 100644 index 00000000..ec1147e5 --- /dev/null +++ b/src/test/java/com/github/wellch4n/oops/domain/delivery/DeploymentConcurrencyPolicyTests.java @@ -0,0 +1,26 @@ +package com.github.wellch4n.oops.domain.delivery; + +import static org.junit.jupiter.api.Assertions.assertIterableEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.github.wellch4n.oops.domain.shared.PipelineStatus; +import com.github.wellch4n.oops.shared.exception.BizException; +import java.util.List; +import org.junit.jupiter.api.Test; + +class DeploymentConcurrencyPolicyTests { + + private final DeploymentConcurrencyPolicy policy = new DeploymentConcurrencyPolicy(); + + @Test + void activeStatusesIncludeVerifying() { + assertIterableEquals( + List.of(PipelineStatus.RUNNING, PipelineStatus.DEPLOYING, PipelineStatus.VERIFYING), + policy.activePipelineStatuses()); + } + + @Test + void rejectsWhenActivePipelineExists() { + assertThrows(BizException.class, () -> policy.ensureNoActivePipeline(true)); + } +} diff --git a/src/test/java/com/github/wellch4n/oops/domain/delivery/PipelineStateMachineTests.java b/src/test/java/com/github/wellch4n/oops/domain/delivery/PipelineStateMachineTests.java index 113f282f..a873db55 100644 --- a/src/test/java/com/github/wellch4n/oops/domain/delivery/PipelineStateMachineTests.java +++ b/src/test/java/com/github/wellch4n/oops/domain/delivery/PipelineStateMachineTests.java @@ -29,6 +29,26 @@ void stillAllowsNormalBuildPath() { PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED)); } + @Test + void allowsDeployingToVerifyingThenSucceeded() { + assertDoesNotThrow(() -> stateMachine.ensureCanTransition( + PipelineStatus.DEPLOYING, PipelineStatus.VERIFYING)); + assertDoesNotThrow(() -> stateMachine.ensureCanTransition( + PipelineStatus.VERIFYING, PipelineStatus.SUCCEEDED)); + } + + @Test + void allowsVerifyingToError() { + assertDoesNotThrow(() -> stateMachine.ensureCanTransition( + PipelineStatus.VERIFYING, PipelineStatus.ERROR)); + } + + @Test + void rejectsVerifyingToStopped() { + assertThrows(BizException.class, () -> stateMachine.ensureCanTransition( + PipelineStatus.VERIFYING, PipelineStatus.STOPPED)); + } + @Test void rejectsIllegalTransitionFromInitialized() { assertThrows(BizException.class, () -> stateMachine.ensureCanTransition( diff --git a/src/test/java/com/github/wellch4n/oops/infrastructure/scheduler/PipelineVerificationScanTests.java b/src/test/java/com/github/wellch4n/oops/infrastructure/scheduler/PipelineVerificationScanTests.java new file mode 100644 index 00000000..a0687a65 --- /dev/null +++ b/src/test/java/com/github/wellch4n/oops/infrastructure/scheduler/PipelineVerificationScanTests.java @@ -0,0 +1,163 @@ +package com.github.wellch4n.oops.infrastructure.scheduler; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.github.wellch4n.oops.application.dto.DeploymentHealth; +import com.github.wellch4n.oops.application.port.ApplicationRuntimeGateway; +import com.github.wellch4n.oops.application.port.ArtifactDeploymentExecutor; +import com.github.wellch4n.oops.application.port.PipelineJobGateway; +import com.github.wellch4n.oops.application.port.repository.ApplicationRepository; +import com.github.wellch4n.oops.application.port.repository.PipelineRepository; +import com.github.wellch4n.oops.application.service.EnvironmentService; +import com.github.wellch4n.oops.domain.delivery.Pipeline; +import com.github.wellch4n.oops.domain.delivery.PipelineStateMachine; +import com.github.wellch4n.oops.domain.environment.Environment; +import com.github.wellch4n.oops.domain.shared.PipelineStatus; +import com.github.wellch4n.oops.infrastructure.config.PipelineHealthProperties; +import java.time.LocalDateTime; +import java.util.List; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import org.springframework.context.ApplicationEventPublisher; + +/** + * Exercises the scan job's VERIFYING decision logic: a converged rollout succeeds, a fatal pod state fails fast, + * an exceeded deadline times out, and an in-progress rollout is left untouched. + */ +class PipelineVerificationScanTests { + + private static final String NAMESPACE = "default"; + private static final String APP_NAME = "demo"; + private static final String ENV = "prod"; + private static final String PIPELINE_ID = "verifying-id"; + + private PipelineRepository pipelineRepository; + private EnvironmentService environmentService; + private ApplicationRuntimeGateway applicationRuntimeGateway; + private PipelineInstanceScanJob scanJob; + + @BeforeEach + void setUp() { + pipelineRepository = Mockito.mock(PipelineRepository.class); + environmentService = Mockito.mock(EnvironmentService.class); + applicationRuntimeGateway = Mockito.mock(ApplicationRuntimeGateway.class); + ApplicationRepository applicationRepository = Mockito.mock(ApplicationRepository.class); + ApplicationEventPublisher eventPublisher = Mockito.mock(ApplicationEventPublisher.class); + PipelineJobGateway pipelineJobGateway = Mockito.mock(PipelineJobGateway.class); + ArtifactDeploymentExecutor artifactDeploymentExecutor = Mockito.mock(ArtifactDeploymentExecutor.class); + + PipelineHealthProperties healthProperties = new PipelineHealthProperties(); + healthProperties.setEnabled(true); + + scanJob = new PipelineInstanceScanJob( + applicationRepository, + pipelineRepository, + environmentService, + eventPublisher, + pipelineJobGateway, + artifactDeploymentExecutor, + PipelineStateMachine.getInstance(), + applicationRuntimeGateway, + healthProperties + ); + + // No RUNNING pipelines; the build branch is a no-op for these tests. + when(pipelineRepository.findAllByStatus(PipelineStatus.RUNNING)).thenReturn(List.of()); + + Environment environment = new Environment(); + environment.setName(ENV); + when(environmentService.getEnvironment(ENV)).thenReturn(environment); + } + + private Pipeline verifyingPipeline(LocalDateTime deadline) { + Pipeline pipeline = new Pipeline(); + pipeline.setId(PIPELINE_ID); + pipeline.setNamespace(NAMESPACE); + pipeline.setApplicationName(APP_NAME); + pipeline.setEnvironment(ENV); + pipeline.setStatus(PipelineStatus.VERIFYING); + pipeline.setVerifyDeadline(deadline); + return pipeline; + } + + @Test + void convergedRolloutMarksSucceeded() { + when(pipelineRepository.findAllByStatus(PipelineStatus.VERIFYING)) + .thenReturn(List.of(verifyingPipeline(LocalDateTime.now().plusMinutes(5)))); + when(applicationRuntimeGateway.getDeploymentHealth(any(), eq(NAMESPACE), eq(APP_NAME))) + .thenReturn(new DeploymentHealth(false, true, 1, 1, null)); + when(pipelineRepository.updateStatusIfMatch(eq(PIPELINE_ID), eq(PipelineStatus.VERIFYING), eq(PipelineStatus.SUCCEEDED))) + .thenReturn(1); + + scanJob.scan(); + + verify(pipelineRepository).updateStatusIfMatch(PIPELINE_ID, PipelineStatus.VERIFYING, PipelineStatus.SUCCEEDED); + } + + @Test + void fatalPodStateMarksErrorBeforeDeadline() { + when(pipelineRepository.findAllByStatus(PipelineStatus.VERIFYING)) + .thenReturn(List.of(verifyingPipeline(LocalDateTime.now().plusMinutes(5)))); + when(applicationRuntimeGateway.getDeploymentHealth(any(), eq(NAMESPACE), eq(APP_NAME))) + .thenReturn(new DeploymentHealth(false, false, 1, 0, "ImagePullBackOff (demo-0)")); + when(pipelineRepository.updateStatusAndMessageIfMatch(eq(PIPELINE_ID), eq(PipelineStatus.VERIFYING), eq(PipelineStatus.ERROR), any())) + .thenReturn(1); + + scanJob.scan(); + + verify(pipelineRepository).updateStatusAndMessageIfMatch( + eq(PIPELINE_ID), eq(PipelineStatus.VERIFYING), eq(PipelineStatus.ERROR), any()); + verify(pipelineRepository, never()).updateStatusIfMatch( + eq(PIPELINE_ID), eq(PipelineStatus.VERIFYING), eq(PipelineStatus.SUCCEEDED)); + } + + @Test + void exceededDeadlineMarksError() { + when(pipelineRepository.findAllByStatus(PipelineStatus.VERIFYING)) + .thenReturn(List.of(verifyingPipeline(LocalDateTime.now().minusMinutes(1)))); + when(applicationRuntimeGateway.getDeploymentHealth(any(), eq(NAMESPACE), eq(APP_NAME))) + .thenReturn(new DeploymentHealth(false, false, 2, 1, null)); + when(pipelineRepository.updateStatusAndMessageIfMatch(eq(PIPELINE_ID), eq(PipelineStatus.VERIFYING), eq(PipelineStatus.ERROR), any())) + .thenReturn(1); + + scanJob.scan(); + + verify(pipelineRepository).updateStatusAndMessageIfMatch( + eq(PIPELINE_ID), eq(PipelineStatus.VERIFYING), eq(PipelineStatus.ERROR), any()); + } + + @Test + void exceededDeadlineAfterHealthQueryErrorMarksError() { + when(pipelineRepository.findAllByStatus(PipelineStatus.VERIFYING)) + .thenReturn(List.of(verifyingPipeline(LocalDateTime.now().minusMinutes(1)))); + when(applicationRuntimeGateway.getDeploymentHealth(any(), eq(NAMESPACE), eq(APP_NAME))) + .thenThrow(new IllegalStateException("Kubernetes API unavailable")); + when(pipelineRepository.updateStatusAndMessageIfMatch(eq(PIPELINE_ID), eq(PipelineStatus.VERIFYING), eq(PipelineStatus.ERROR), any())) + .thenReturn(1); + + scanJob.scan(); + + verify(pipelineRepository).updateStatusAndMessageIfMatch( + eq(PIPELINE_ID), eq(PipelineStatus.VERIFYING), eq(PipelineStatus.ERROR), any()); + } + + @Test + void inProgressRolloutLeavesVerifyingUntouched() { + when(pipelineRepository.findAllByStatus(PipelineStatus.VERIFYING)) + .thenReturn(List.of(verifyingPipeline(LocalDateTime.now().plusMinutes(5)))); + when(applicationRuntimeGateway.getDeploymentHealth(any(), eq(NAMESPACE), eq(APP_NAME))) + .thenReturn(new DeploymentHealth(false, false, 2, 1, null)); + + scanJob.scan(); + + verify(pipelineRepository, never()).updateStatusIfMatch( + eq(PIPELINE_ID), eq(PipelineStatus.VERIFYING), eq(PipelineStatus.SUCCEEDED)); + verify(pipelineRepository, never()).updateStatusAndMessageIfMatch( + eq(PIPELINE_ID), eq(PipelineStatus.VERIFYING), eq(PipelineStatus.ERROR), any()); + } +} diff --git a/web/app/apps/[namespace]/[name]/pipelines/[pipelineId]/page.tsx b/web/app/apps/[namespace]/[name]/pipelines/[pipelineId]/page.tsx index 101f1a74..931cc044 100644 --- a/web/app/apps/[namespace]/[name]/pipelines/[pipelineId]/page.tsx +++ b/web/app/apps/[namespace]/[name]/pipelines/[pipelineId]/page.tsx @@ -82,13 +82,14 @@ const statusLabel: Record = { INITIALIZED: "apps.pipeline.status.INITIALIZED", RUNNING: "apps.pipeline.status.RUNNING", DEPLOYING: "apps.pipeline.status.DEPLOYING", + VERIFYING: "apps.pipeline.status.VERIFYING", SUCCEEDED: "apps.pipeline.status.SUCCEEDED", ERROR: "apps.pipeline.status.ERROR", STOPPED: "apps.pipeline.status.STOPPED", } function getStatusVariant(status: string): "default" | "secondary" | "destructive" | "outline" { - if (status === "RUNNING" || status === "DEPLOYING") return "default" + if (status === "RUNNING" || status === "DEPLOYING" || status === "VERIFYING") return "default" if (status === "SUCCEEDED") return "secondary" if (status === "ERROR" || status === "STOPPED") return "destructive" return "outline" diff --git a/web/app/help/docs/pipelines/page.tsx b/web/app/help/docs/pipelines/page.tsx index 0f73affe..e660a5dd 100644 --- a/web/app/help/docs/pipelines/page.tsx +++ b/web/app/help/docs/pipelines/page.tsx @@ -51,7 +51,7 @@ export default function PipelinesDocPage() { ]} /> - status 取值:RUNNINGBUILD_SUCCEEDEDDEPLOYINGSUCCEEDEDFAILEDSTOPPED。 + status 取值:RUNNINGBUILD_SUCCEEDEDDEPLOYINGVERIFYINGSUCCEEDEDERRORSTOPPED diff --git a/web/app/pipelines/columns.tsx b/web/app/pipelines/columns.tsx index 4b57edc6..295d076f 100644 --- a/web/app/pipelines/columns.tsx +++ b/web/app/pipelines/columns.tsx @@ -90,7 +90,7 @@ export const getPipelineColumns = ( cell: ({ row }) => { const status = row.original.status let variant: "default" | "secondary" | "destructive" | "outline" = "outline" - if (status === "RUNNING" || status === "DEPLOYING") variant = "default" + if (status === "RUNNING" || status === "DEPLOYING" || status === "VERIFYING") variant = "default" if (status === "SUCCEEDED") variant = "secondary" if (status === "ERROR" || status === "STOPPED") variant = "destructive" @@ -99,6 +99,7 @@ export const getPipelineColumns = ( INITIALIZED: "apps.pipeline.status.INITIALIZED", RUNNING: "apps.pipeline.status.RUNNING", DEPLOYING: "apps.pipeline.status.DEPLOYING", + VERIFYING: "apps.pipeline.status.VERIFYING", SUCCEEDED: "apps.pipeline.status.SUCCEEDED", ERROR: "apps.pipeline.status.ERROR", STOPPED: "apps.pipeline.status.STOPPED", diff --git a/web/lib/api/types.ts b/web/lib/api/types.ts index 5b246507..b75ab223 100644 --- a/web/lib/api/types.ts +++ b/web/lib/api/types.ts @@ -171,7 +171,7 @@ interface ApplicationContainerStatus { startedAt?: string | null } -type PipelineStatus = 'INITIALIZED' | 'RUNNING' | 'BUILD_SUCCEEDED' | 'DEPLOYING' | 'STOPPED' | 'SUCCEEDED' | 'ERROR' +type PipelineStatus = 'INITIALIZED' | 'RUNNING' | 'BUILD_SUCCEEDED' | 'DEPLOYING' | 'VERIFYING' | 'STOPPED' | 'SUCCEEDED' | 'ERROR' export type DeployMode = 'IMMEDIATE' | 'MANUAL' diff --git a/web/locales/en-US/apps.ts b/web/locales/en-US/apps.ts index 06c8ffb0..2c4f753b 100644 --- a/web/locales/en-US/apps.ts +++ b/web/locales/en-US/apps.ts @@ -235,6 +235,7 @@ const apps = { "apps.pipeline.status.INITIALIZED": "Initialized", "apps.pipeline.status.RUNNING": "Running", "apps.pipeline.status.DEPLOYING": "Deploying", + "apps.pipeline.status.VERIFYING": "Verifying", "apps.pipeline.status.SUCCEEDED": "Succeeded", "apps.pipeline.status.ERROR": "Failed", "apps.pipeline.status.STOPPED": "Stopped", diff --git a/web/locales/ja-JP/apps.ts b/web/locales/ja-JP/apps.ts index 3c70249c..3a8a6381 100644 --- a/web/locales/ja-JP/apps.ts +++ b/web/locales/ja-JP/apps.ts @@ -235,6 +235,7 @@ const apps = { "apps.pipeline.status.INITIALIZED": "初期化", "apps.pipeline.status.RUNNING": "実行中", "apps.pipeline.status.DEPLOYING": "デプロイ中", + "apps.pipeline.status.VERIFYING": "検証中", "apps.pipeline.status.SUCCEEDED": "成功", "apps.pipeline.status.ERROR": "失敗", "apps.pipeline.status.STOPPED": "停止しました", diff --git a/web/locales/zh-CN/apps.ts b/web/locales/zh-CN/apps.ts index c0794306..c8b44224 100644 --- a/web/locales/zh-CN/apps.ts +++ b/web/locales/zh-CN/apps.ts @@ -235,6 +235,7 @@ const apps = { "apps.pipeline.status.INITIALIZED": "初始化", "apps.pipeline.status.RUNNING": "运行中", "apps.pipeline.status.DEPLOYING": "发布中", + "apps.pipeline.status.VERIFYING": "验证中", "apps.pipeline.status.SUCCEEDED": "成功", "apps.pipeline.status.ERROR": "失败", "apps.pipeline.status.STOPPED": "已停止", diff --git a/web/locales/zh-TW/apps.ts b/web/locales/zh-TW/apps.ts index 3013dcc3..8bdcc23a 100644 --- a/web/locales/zh-TW/apps.ts +++ b/web/locales/zh-TW/apps.ts @@ -235,6 +235,7 @@ const apps = { "apps.pipeline.status.INITIALIZED": "初始化", "apps.pipeline.status.RUNNING": "運作中", "apps.pipeline.status.DEPLOYING": "發佈中", + "apps.pipeline.status.VERIFYING": "驗證中", "apps.pipeline.status.SUCCEEDED": "成功", "apps.pipeline.status.ERROR": "失敗", "apps.pipeline.status.STOPPED": "已停止",