Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions config/application.yml.example
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ oops:
- "*/__MACOSX/*"
- .DS_Store
- "*/.DS_Store"
# Post-deploy health verification. When enabled, a deploy enters VERIFYING after the StatefulSet is
# applied and is only marked SUCCEEDED once the rollout is ready (catches ImagePullBackOff / not-ready
# pods). Set enabled=false to revert to marking SUCCEEDED immediately after apply.
health:
enabled: true
timeout: 5m
pod-filesystem:
max-download-size-bytes: 52428800
ingress:
Expand Down
6 changes: 6 additions & 0 deletions docker/application.yml.example
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ oops:
- "*/__MACOSX/*"
- .DS_Store
- "*/.DS_Store"
# Post-deploy health verification. When enabled, a deploy enters VERIFYING after the StatefulSet is
# applied and is only marked SUCCEEDED once the rollout is ready (catches ImagePullBackOff / not-ready
# pods). Set enabled=false to revert to marking SUCCEEDED immediately after apply.
health:
enabled: true
timeout: 5m
pod-filesystem:
max-download-size-bytes: 52428800
feishu:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.github.wellch4n.oops.application.dto;

/**
* Post-deploy health snapshot of an application's StatefulSet, used to drive the VERIFYING -> SUCCEEDED/ERROR
* transition. {@code rolloutComplete} means the new revision is fully ready; {@code failureReason} (when present)
* carries the first fatal pod condition (e.g. ImagePullBackOff) so verification can fail fast without waiting for
* the timeout.
*/
public record DeploymentHealth(
boolean workloadMissing,
boolean rolloutComplete,
Integer desiredReplicas,
Integer readyReplicas,
String failureReason
) {
public boolean hasFailure() {
return failureReason != null && !failureReason.isBlank();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ private String resolveTitle(PipelineNotificationType type) {
case CREATED -> "发布任务已创建";
case BUILD_SUCCEEDED -> "构建成功";
case DEPLOYING -> "开始部署";
case VERIFYING -> "验证部署中";
case SUCCEEDED -> "发布成功";
case FAILED -> "发布失败";
case STOPPED -> "发布已停止";
Expand All @@ -88,7 +89,7 @@ private ExternalMessageLevel resolveLevel(PipelineNotificationType type) {
case FAILED -> ExternalMessageLevel.ERROR;
case BUILD_SUCCEEDED -> ExternalMessageLevel.WARNING;
case STOPPED -> ExternalMessageLevel.NEUTRAL;
case CREATED, DEPLOYING -> ExternalMessageLevel.INFO;
case CREATED, DEPLOYING, VERIFYING -> ExternalMessageLevel.INFO;
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ public enum PipelineNotificationType {
CREATED,
BUILD_SUCCEEDED,
DEPLOYING,
VERIFYING,
SUCCEEDED,
FAILED,
STOPPED
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.github.wellch4n.oops.domain.application.ApplicationRuntimeSpec;
import com.github.wellch4n.oops.domain.environment.Environment;
import com.github.wellch4n.oops.application.dto.ApplicationPodStatusView;
import com.github.wellch4n.oops.application.dto.DeploymentHealth;
import java.util.List;
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;

Expand All @@ -27,4 +28,11 @@ void applyRuntimeSpec(Environment environment,
* if the workload does not exist. Used to highlight which pipeline's artifact is currently live.
*/
String findCurrentImage(Environment environment, String namespace, String applicationName);

/**
* Post-deploy health snapshot: whether the StatefulSet rollout has converged onto the new revision and
* whether any pod is in a fatal waiting state (ImagePullBackOff / ErrImagePull / CrashLoopBackOff).
* Used by the scan job to drive the VERIFYING status to SUCCEEDED or ERROR.
*/
DeploymentHealth getDeploymentHealth(Environment environment, String namespace, String applicationName);
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.github.wellch4n.oops.domain.delivery.Pipeline;
import com.github.wellch4n.oops.domain.shared.PipelineStatus;
import java.time.LocalDateTime;
import java.util.List;

public interface PipelineRepository {
Expand Down Expand Up @@ -33,5 +34,7 @@ boolean existsByNamespaceAndApplicationNameAndStatusIn(

int updateStatusAndMessageIfMatch(String id, PipelineStatus expected, PipelineStatus target, String message);

int updateStatusAndDeadlineIfMatch(String id, PipelineStatus expected, PipelineStatus target, LocalDateTime deadline);

List<Pipeline> query(String namespace, String applicationName);
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,11 @@
import com.github.wellch4n.oops.domain.shared.ApplicationSourceType;
import com.github.wellch4n.oops.application.event.PipelineNotificationEvent;
import com.github.wellch4n.oops.application.event.PipelineNotificationType;
import com.github.wellch4n.oops.domain.shared.PipelineStatus;
import com.github.wellch4n.oops.shared.exception.BizException;
import com.github.wellch4n.oops.application.dto.DeployCommand;
import com.github.wellch4n.oops.application.dto.DeployStrategyParam;
import com.github.wellch4n.oops.application.dto.GitDeployStrategyParam;
import com.github.wellch4n.oops.application.dto.ZipDeployStrategyParam;
import java.util.List;
import org.springframework.context.ApplicationEventPublisher;
import org.springframework.stereotype.Service;

Expand Down Expand Up @@ -66,7 +64,7 @@ public String deployApplication(String namespace,
throw new BizException("Deploy strategy is required");
}
deploymentConcurrencyPolicy.ensureNoActivePipeline(pipelineRepository.existsByNamespaceAndApplicationNameAndStatusIn(
namespace, applicationName, List.of(PipelineStatus.RUNNING, PipelineStatus.DEPLOYING)
namespace, applicationName, deploymentConcurrencyPolicy.activePipelineStatuses()
));

Environment environment = requireEnvironment(request.environment());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import com.github.wellch4n.oops.application.dto.LastSuccessfulPipelineDto;
import com.github.wellch4n.oops.application.dto.Page;
import com.github.wellch4n.oops.application.dto.PipelineDto;
import com.github.wellch4n.oops.infrastructure.config.PipelineHealthProperties;
import java.time.LocalDateTime;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
Expand All @@ -46,6 +48,7 @@ public class PipelineService {
private final PipelineLogGateway pipelineLogGateway;
private final PipelineStateMachine pipelineStateMachine;
private final DeploymentConcurrencyPolicy deploymentConcurrencyPolicy;
private final PipelineHealthProperties pipelineHealthProperties;

public PipelineService(PipelineRepository pipelineRepository, EnvironmentService environmentService,
ApplicationRepository applicationRepository,
Expand All @@ -55,7 +58,8 @@ public PipelineService(PipelineRepository pipelineRepository, EnvironmentService
PipelineJobGateway pipelineJobGateway,
PipelineLogGateway pipelineLogGateway,
PipelineStateMachine pipelineStateMachine,
DeploymentConcurrencyPolicy deploymentConcurrencyPolicy) {
DeploymentConcurrencyPolicy deploymentConcurrencyPolicy,
PipelineHealthProperties pipelineHealthProperties) {
this.pipelineRepository = pipelineRepository;
this.environmentService = environmentService;
this.applicationRepository = applicationRepository;
Expand All @@ -66,6 +70,7 @@ public PipelineService(PipelineRepository pipelineRepository, EnvironmentService
this.pipelineLogGateway = pipelineLogGateway;
this.pipelineStateMachine = pipelineStateMachine;
this.deploymentConcurrencyPolicy = deploymentConcurrencyPolicy;
this.pipelineHealthProperties = pipelineHealthProperties;
}

public Page<PipelineDto> getPipelines(String namespace, String applicationName, String environment, Integer page, Integer size) {
Expand Down Expand Up @@ -145,7 +150,7 @@ public Boolean deployPipeline(String namespace, String applicationName, String i
}
pipelineStateMachine.ensureManualDeployable(pipeline.getStatus());
deploymentConcurrencyPolicy.ensureNoActivePipeline(pipelineRepository.existsByNamespaceAndApplicationNameAndStatusIn(
namespace, applicationName, List.of(PipelineStatus.RUNNING, PipelineStatus.DEPLOYING)
namespace, applicationName, deploymentConcurrencyPolicy.activePipelineStatuses()
));
pipelineStateMachine.ensureCanTransition(PipelineStatus.BUILD_SUCCEEDED, PipelineStatus.DEPLOYING);

Expand All @@ -171,12 +176,7 @@ public Boolean deployPipeline(String namespace, String applicationName, String i

artifactDeploymentExecutor.deploy(pipeline, application, environment, runtimeSpec, healthCheck, serviceConfig);

pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED);
pipelineRepository.updateStatusIfMatch(pipeline.getId(), PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED);
pipeline.markSucceeded();
eventPublisher.publishEvent(PipelineNotificationEvent.of(
pipeline, PipelineNotificationType.SUCCEEDED, "应用已经成功发布。"
));
completeDeployPhase(pipeline, "正在验证新版本是否就绪…", "应用已经成功发布。");
} catch (Exception e) {
pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.ERROR);
String message = StringUtils.defaultIfBlank(e.getMessage(), "发布任务执行失败,请查看日志。");
Expand Down Expand Up @@ -205,7 +205,7 @@ public String rollback(String namespace, String applicationName, String targetPi
}

deploymentConcurrencyPolicy.ensureNoActivePipeline(pipelineRepository.existsByNamespaceAndApplicationNameAndStatusIn(
namespace, applicationName, List.of(PipelineStatus.RUNNING, PipelineStatus.DEPLOYING)
namespace, applicationName, deploymentConcurrencyPolicy.activePipelineStatuses()
));

Pipeline rollbackPipeline = pipelineRepository.save(Pipeline.rollback(source, operatorUserId));
Expand Down Expand Up @@ -236,12 +236,7 @@ public String rollback(String namespace, String applicationName, String targetPi

artifactDeploymentExecutor.deploy(rollbackPipeline, application, environment, runtimeSpec, healthCheck, serviceConfig);

pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED);
pipelineRepository.updateStatusIfMatch(rollbackPipeline.getId(), PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED);
rollbackPipeline.markSucceeded();
eventPublisher.publishEvent(PipelineNotificationEvent.of(
rollbackPipeline, PipelineNotificationType.SUCCEEDED, "回滚已成功。"
));
completeDeployPhase(rollbackPipeline, "正在验证回滚版本是否就绪…", "回滚已成功。");
} catch (Exception e) {
pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.ERROR);
String message = StringUtils.defaultIfBlank(e.getMessage(), "回滚任务执行失败,请查看日志。");
Expand Down Expand Up @@ -285,6 +280,31 @@ public Boolean stopPipeline(String namespace, String applicationName, String id)
return true;
}

/**
* Completes the deploy phase after the artifact has been applied. When health verification is enabled,
* the pipeline moves to VERIFYING with a deadline and the scan job later decides SUCCEEDED/ERROR. When
* disabled, it is marked SUCCEEDED immediately (legacy behavior).
*/
private void completeDeployPhase(Pipeline pipeline, String verifyingDetail, String succeededDetail) {
if (pipelineHealthProperties.isEnabled()) {
LocalDateTime deadline = LocalDateTime.now().plus(pipelineHealthProperties.getTimeout());
pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.VERIFYING);
pipelineRepository.updateStatusAndDeadlineIfMatch(
pipeline.getId(), PipelineStatus.DEPLOYING, PipelineStatus.VERIFYING, deadline);
Comment thread
microbluey marked this conversation as resolved.
pipeline.markVerifying(deadline);
eventPublisher.publishEvent(PipelineNotificationEvent.of(
pipeline, PipelineNotificationType.VERIFYING, verifyingDetail
));
return;
}
pipelineStateMachine.ensureCanTransition(PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED);
pipelineRepository.updateStatusIfMatch(pipeline.getId(), PipelineStatus.DEPLOYING, PipelineStatus.SUCCEEDED);
pipeline.markSucceeded();
eventPublisher.publishEvent(PipelineNotificationEvent.of(
pipeline, PipelineNotificationType.SUCCEEDED, succeededDetail
));
}

private Environment requireEnvironment(String environmentName) {
Environment environment = environmentService.getEnvironment(environmentName);
if (environment == null) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
package com.github.wellch4n.oops.domain.delivery;

import com.github.wellch4n.oops.domain.shared.PipelineStatus;
import com.github.wellch4n.oops.shared.exception.BizException;
import java.util.List;

public class DeploymentConcurrencyPolicy {

private static final List<PipelineStatus> ACTIVE_PIPELINE_STATUSES = List.of(
PipelineStatus.RUNNING,
PipelineStatus.DEPLOYING,
PipelineStatus.VERIFYING
);

public List<PipelineStatus> activePipelineStatuses() {
return ACTIVE_PIPELINE_STATUSES;
}

public void ensureNoActivePipeline(boolean activePipelineExists) {
if (activePipelineExists) {
throw new BizException("Application is being deployed");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import com.github.wellch4n.oops.domain.shared.DeployMode;
import com.github.wellch4n.oops.domain.shared.PipelineStatus;
import com.github.wellch4n.oops.domain.shared.PipelineTriggerType;
import java.time.LocalDateTime;
import lombok.Data;
import lombok.EqualsAndHashCode;

Expand All @@ -26,6 +27,7 @@ public class Pipeline extends BaseAggregateRoot {
private String message;
private PipelineTriggerType triggerType;
private String rollbackFromPipelineId;
private LocalDateTime verifyDeadline;

public static Pipeline initialize(
String namespace,
Expand Down Expand Up @@ -86,6 +88,19 @@ public void markDeploying() {
transitionTo(PipelineStatus.DEPLOYING);
}

/**
* Enters post-deploy health verification. The artifact has been applied to the cluster but the rollout
* may not yet be ready; {@code verifyDeadline} bounds how long the scan job will wait before failing.
*/
public void markVerifying(LocalDateTime verifyDeadline) {
this.verifyDeadline = verifyDeadline;
transitionTo(PipelineStatus.VERIFYING);
}

public boolean isVerifyTimedOut(LocalDateTime now) {
return verifyDeadline != null && now.isAfter(verifyDeadline);
}

public void markSucceeded() {
transitionTo(PipelineStatus.SUCCEEDED);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,15 @@ public class PipelineStateMachine {
PipelineStatus.STOPPED
));
ALLOWED_TRANSITIONS.put(PipelineStatus.DEPLOYING, EnumSet.of(
PipelineStatus.VERIFYING,
PipelineStatus.SUCCEEDED,
PipelineStatus.ERROR,
PipelineStatus.STOPPED
));
ALLOWED_TRANSITIONS.put(PipelineStatus.VERIFYING, EnumSet.of(
PipelineStatus.SUCCEEDED,
PipelineStatus.ERROR
));
ALLOWED_TRANSITIONS.put(PipelineStatus.STOPPED, EnumSet.noneOf(PipelineStatus.class));
ALLOWED_TRANSITIONS.put(PipelineStatus.SUCCEEDED, EnumSet.noneOf(PipelineStatus.class));
ALLOWED_TRANSITIONS.put(PipelineStatus.ERROR, EnumSet.noneOf(PipelineStatus.class));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* @date 2025/7/5
*/
public enum PipelineStatus {
INITIALIZED, RUNNING, BUILD_SUCCEEDED, DEPLOYING,
INITIALIZED, RUNNING, BUILD_SUCCEEDED, DEPLOYING, VERIFYING,
STOPPED,
SUCCEEDED, ERROR
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package com.github.wellch4n.oops.infrastructure.config;

import java.time.Duration;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;

/**
* Post-deploy health verification settings. When {@code enabled}, a deploy transitions to VERIFYING after the
* StatefulSet is applied and is only marked SUCCEEDED once the rollout is ready; {@code timeout} bounds how long
* verification waits before failing. When disabled, a deploy is marked SUCCEEDED immediately after apply
* (legacy behavior).
*/
@Data
@Configuration
@ConfigurationProperties(prefix = "oops.pipeline.health")
public class PipelineHealthProperties {

private boolean enabled = true;

private Duration timeout = Duration.ofMinutes(5);
}
Loading