From 3d73a3c38de2c1b6f1111768bd4cc142b1e5b961 Mon Sep 17 00:00:00 2001 From: devppratik Date: Tue, 2 Jun 2026 13:48:11 +0530 Subject: [PATCH 1/2] feat: add Claude Code infrastructure and documentation --- .claude/agents/README.md | 244 ++++++++++++ .claude/agents/ci-agent.md | 290 ++++++++++++++ .claude/agents/docs-agent.md | 198 ++++++++++ .claude/agents/lint-agent.md | 104 +++++ .claude/agents/security-agent.md | 235 ++++++++++++ .claude/agents/test-agent.md | 155 ++++++++ .claude/hooks/README.md | 351 +++++++++++++++++ .claude/hooks/cleanup.sh | 15 + .claude/hooks/pre-edit.sh | 148 +++++++ .claude/hooks/stop-prek-validation.sh | 92 +++++ .claude/settings.json | 56 +++ .claude/skills/README.md | 123 ++++++ .claude/skills/prow-ci/SKILL.md | 363 ++++++++++++++++++ .claude/skills/prow-ci/analyze_failure.py | 269 +++++++++++++ .../skills/prow-ci/fetch_prow_artifacts.py | 143 +++++++ .gitignore | 1 + .gitleaks.toml | 155 ++++++++ .prek-version | 1 + CONTRIBUTING.md | 154 ++++++++ DEVELOPMENT.md | 198 ++++++++++ TESTING.md | 282 ++++++++++++++ hack/ci.sh | 9 + hack/prek.ci.toml | 63 +++ prek.toml | 78 ++++ 24 files changed, 3727 insertions(+) create mode 100644 .claude/agents/README.md create mode 100644 .claude/agents/ci-agent.md create mode 100644 .claude/agents/docs-agent.md create mode 100644 .claude/agents/lint-agent.md create mode 100644 .claude/agents/security-agent.md create mode 100644 .claude/agents/test-agent.md create mode 100644 .claude/hooks/README.md create mode 100755 .claude/hooks/cleanup.sh create mode 100755 .claude/hooks/pre-edit.sh create mode 100755 .claude/hooks/stop-prek-validation.sh create mode 100644 .claude/settings.json create mode 100644 .claude/skills/README.md create mode 100644 .claude/skills/prow-ci/SKILL.md create mode 100755 .claude/skills/prow-ci/analyze_failure.py create mode 100755 .claude/skills/prow-ci/fetch_prow_artifacts.py create mode 100644 .gitleaks.toml create mode 100644 .prek-version create mode 100644 CONTRIBUTING.md create mode 100644 DEVELOPMENT.md create mode 100644 TESTING.md create mode 100755 hack/ci.sh create mode 100644 hack/prek.ci.toml create mode 100644 prek.toml diff --git a/.claude/agents/README.md b/.claude/agents/README.md new file mode 100644 index 000000000..b14631fa1 --- /dev/null +++ b/.claude/agents/README.md @@ -0,0 +1,244 @@ +# Claude Agents + +Specialized agents for this operator development workflows. + +## Available Agents + +### [lint-agent](./lint-agent.md) +**Purpose**: Automated linting and code quality enforcement + +**When to use**: +- Pre-commit validation +- After code generation +- Before creating PR +- Investigating CI lint failures + +**Key capabilities**: +- Run formatting checks +- Execute golangci-lint +- Auto-fix safe issues +- Report unfixable problems + +--- + +### [test-agent](./test-agent.md) +**Purpose**: Automated testing and test quality assurance + +**When to use**: +- After code changes +- Test failures in CI +- Before creating PR +- After regenerating mocks + +**Key capabilities**: +- Run targeted tests for changed code +- Detect flaky test failures +- Suggest minimal fixes +- Ensure test coverage + +--- + +### [security-agent](./security-agent.md) +**Purpose**: Security scanning and policy enforcement + +**When to use**: +- Before committing code +- RBAC manifests modified +- Secret handling changed +- CI/CD pipelines modified + +**Key capabilities**: +- Scan for hardcoded secrets +- Validate RBAC configurations +- Check insecure patterns +- Enforce security policies + +--- + +### [docs-agent](./docs-agent.md) +**Purpose**: Documentation maintenance and synchronization + +**When to use**: +- Code changes affect workflows +- New features added +- Build process modified +- Command examples need updating + +**Key capabilities**: +- Update docs after code changes +- Ensure command examples work +- Validate markdown formatting +- Keep docs synchronized + +--- + +### [ci-agent](./ci-agent.md) +**Purpose**: CI/CD validation and workflow integrity + +**When to use**: +- Tekton pipelines modified +- Pre-commit hooks changed +- CI failures need investigation +- New validation steps added + +**Key capabilities**: +- Validate pipeline integrity +- Ensure local/CI parity +- Detect missing checks +- Optimize execution order + +--- + +## Usage Patterns + +### Single Agent Invocation +Use a specific agent when the task is clear: +```text +"Run lint-agent to check formatting" +"Use security-agent to scan for secrets" +"Invoke test-agent on controllers" +``` + +### Multi-Agent Workflow +Agents can work together for comprehensive validation: +```text +1. lint-agent: Fix formatting and linting +2. test-agent: Run affected tests +3. security-agent: Scan for secrets and RBAC issues +4. docs-agent: Update documentation +5. ci-agent: Validate CI parity +``` + +### Pre-Commit Workflow +Recommended agent sequence before committing: +```text +1. security-agent (secrets, RBAC) +2. lint-agent (formatting, linting) +3. test-agent (targeted tests) +4. docs-agent (if docs need updates) +``` + +### Pre-PR Workflow +Full validation before creating pull request: +```text +1. lint-agent --all-files +2. test-agent --full-suite +3. security-agent --comprehensive +4. docs-agent --validate +5. ci-agent --parity-check +``` + +## Agent Design Principles + +All agents follow these principles: + +**Focused Responsibility** +- Each agent has clear, narrow responsibilities +- No overlap with other agents +- Single purpose, well-defined scope + +**Reuse Existing Tools** +- Leverage pre-commit hooks +- Use Makefile targets +- Don't reinvent validations + +**Fast Feedback** +- Quick execution (<30s for targeted checks) +- Fail fast on common issues +- Provide actionable output + +**CI Parity** +- Mirror CI checks locally +- Use same tool versions +- Deterministic results + +**Safe Automation** +- Auto-fix only safe changes +- Escalate risky modifications +- Never bypass security checks + +**Clear Escalation** +- Define when human intervention needed +- Explain what can't be auto-fixed +- Provide context for decisions + +## Integration with Pre-commit + +Agents complement (don't replace) pre-commit hooks: + +| Pre-commit Hook | Corresponding Agent | +|-----------------|---------------------| +| `gitleaks` | security-agent | +| `golangci-lint` | lint-agent | +| `go-build` | lint-agent | +| `go-mod-tidy` | lint-agent | +| `rbac-wildcard-check` | security-agent | + +**Relationship:** +- Pre-commit hooks: Automated git hooks (mandatory) +- Agents: Interactive assistance (on-demand) +- Both use same underlying tools + +## Output Format + +All agents should report findings consistently: +```text +[AGENT] [SEVERITY] Location: Issue +Example: [lint-agent] [ERROR] pkg/handler/deployment.go:42: unreachable code +Example: [security-agent] [CRITICAL] deploy/role.yaml:15: Wildcard permission +``` + +Severity levels: +- **CRITICAL**: Blocks commit/PR +- **ERROR**: Must fix before merge +- **WARNING**: Should fix +- **INFO**: Informational + +## Extension Guide + +To add a new agent: + +1. Create `new-agent.md` in this directory +2. Add YAML frontmatter at the top: + ```yaml + --- + name: new-agent + description: Brief description of when to use this agent. Be specific about use cases. + tools: Bash, Read, Edit, Grep + model: sonnet + --- + ``` +3. Follow the template structure in markdown body: + - **Responsibilities**: What it does + - **Usage**: When to invoke + - **Commands**: How it works + - **Escalation**: When to defer to human +4. Update this README with agent description +5. Test agent workflows locally +6. Document integration points + +**Required frontmatter fields**: +- `name`: Agent identifier (kebab-case, matches filename) +- `description`: When to use this agent (triggers invocation) +- `tools`: Comma-separated list of allowed tools +- `model`: Claude model to use (`sonnet`, `opus`, or `haiku`) + +**Agent file structure**: +```text +.claude/agents/ +├── README.md +├── ci-agent.md # Frontmatter + markdown body +├── docs-agent.md +├── lint-agent.md +├── security-agent.md +└── test-agent.md +``` + +## Agent Communication + +Agents can reference each other: +- `lint-agent` may suggest running `test-agent` +- `security-agent` may trigger `ci-agent` for pipeline validation +- `docs-agent` updates after `lint-agent` or `test-agent` changes + +Keep communication minimal and explicit. diff --git a/.claude/agents/ci-agent.md b/.claude/agents/ci-agent.md new file mode 100644 index 000000000..0e5148c4c --- /dev/null +++ b/.claude/agents/ci-agent.md @@ -0,0 +1,290 @@ +--- +name: ci-agent +description: CI/CD validation and workflow integrity. Use when validating Tekton pipelines, checking local/CI parity, debugging CI failures, or ensuring pre-commit hooks mirror CI checks. +tools: Bash, Read, Grep, WebFetch, WebSearch +model: sonnet +--- + +# CI Agent + +CI/CD validation and workflow integrity for this operator. + +## Responsibilities + +### Primary Tasks +- Validate Tekton pipeline integrity +- Ensure local/CI parity +- Detect missing CI checks +- Optimize pipeline execution ordering +- Verify pre-commit mirrors CI + +### CI/CD Components + +**Tekton Pipelines** (`.tekton/`): +- `this repository-pull-request.yaml`: PR validation +- `this repository-push.yaml`: Main branch builds +- `this repository-e2e-pull-request.yaml`: E2E tests on PR +- `this repository-e2e-push.yaml`: E2E tests on merge +- `this repository-pko-push.yaml`: PKO deployment +- `this repository-pko-pull-request.yaml`: PKO validation + +**Pipeline Stages:** +1. Checkout code +2. Build container image +3. Run linting (golangci-lint) +4. Run unit tests +5. Security scanning (gitleaks, gosec) +6. E2E testing (separate pipeline) +7. PKO packaging (separate pipeline) + +## Local/CI Parity + +### Pre-commit ↔ CI Mapping + +| Pre-commit Hook | CI Equivalent | Purpose | +|----------------|---------------|---------| +| `go-build` | Tekton compile check | Ensure code compiles | +| `golangci-lint` | Tekton lint job | Static analysis | +| `gitleaks` | Tekton security scan | Secret detection | +| `go-mod-tidy` | CI dependency check | No uncommitted go.mod/sum | +| `rbac-wildcard-check` | CI security policy | No wildcard RBAC | + +**Parity validation:** +```bash +# Check pre-commit uses same golangci-lint version as CI +grep "rev:" .pre-commit-config.yaml | grep golangci-lint +# Should match version in boilerplate pipeline + +# Check gitleaks version +grep "rev:" .pre-commit-config.yaml | grep gitleaks +``` + +### Running Full CI Locally + +```bash +# Lint (same as CI) +make go-check + +# Tests (same environment as CI) +boilerplate/_lib/container-make go-test + +# Build (same as CI) +make docker-build + +# Full validation +pre-commit run --all-files +make go-test +make go-build +``` + +## Pipeline Validation + +### Tekton Pipeline Health Checks + +```bash +# Check for valid YAML +yamllint .tekton/*.yaml + +# Validate Tekton syntax (requires tkn CLI) +# tkn pipeline validate -f .tekton/this repository-pull-request.yaml + +# Check for missing required steps +grep "pipelineRef:" .tekton/*.yaml +grep "params:" .tekton/*.yaml +``` + +### Required CI Steps + +Every PR pipeline MUST include: +- ✅ Checkout code +- ✅ Build image +- ✅ Run golangci-lint +- ✅ Run gitleaks +- ✅ Run unit tests +- ✅ Build succeeds + +E2E pipeline additionally includes: +- ✅ Deploy to test cluster +- ✅ Run e2e tests +- ✅ Cleanup + +### Missing Check Detection + +```bash +# Checks that should be in CI but might be missing +REQUIRED_CHECKS=( + "golangci-lint" + "gitleaks" + "go test" + "go build" + "rbac-wildcard-check" +) + +for check in "${REQUIRED_CHECKS[@]}"; do + if ! grep -q "$check" .tekton/*.yaml; then + echo "WARNING: $check not found in CI" + fi +done +``` + +## Usage + +Invoke when: +- Tekton pipelines modified +- Pre-commit hooks changed +- New validation steps added +- CI failures need investigation +- Optimization needed + +## Commands + +```bash +# Validate Tekton YAML +yamllint .tekton/*.yaml + +# Check pipeline references +grep "pipelineRef:" .tekton/*.yaml + +# Compare pre-commit and CI tools +diff <(grep "rev:" .pre-commit-config.yaml) <(echo "# CI versions from boilerplate") + +# Test container build (same as CI) +make docker-build + +# Run in CI-equivalent environment +boilerplate/_lib/container-make +``` + +## Execution Ordering Optimization + +**Current order (fastest first per pre-commit golden rule 13):** +1. File hygiene (2s) - check-merge-conflict, trailing-whitespace, EOF +2. YAML syntax (2s) - validate deploy/ manifests +3. Secret scan (5s) - gitleaks +4. Go build (10s cached) - compile check +5. Go mod tidy (10s) - dependency drift +6. RBAC check (5s) - wildcard detection +7. Static analysis (15s cached) - golangci-lint + +**Why this order:** +- Quick checks first provide fast feedback +- Fail fast on common issues (formatting, secrets) +- Expensive checks (lint) run last +- Total target: <30s on typical changeset + +## Integration with Boilerplate + +this operator uses Red Hat boilerplate: +- **Pipeline source**: `https://github.com/openshift/boilerplate` +- **Pipeline path**: `pipelines/docker-build-oci-ta/pipeline.yaml` +- **Updates**: `make boilerplate-update` + +When boilerplate updates: +- Check for breaking changes +- Test locally before merging +- Update pre-commit hooks to match + +## CI Failure Investigation + +### Lint Failures +```bash +# Reproduce locally +make go-check + +# Or exact CI environment +boilerplate/_lib/container-make go-check +``` + +### Test Failures +```bash +# Reproduce locally +make go-test + +# CI environment +boilerplate/_lib/container-make go-test + +# Check for environment differences +env | grep -E "GO|CI|BUILD" +``` + +### Build Failures +```bash +# Reproduce locally +make docker-build + +# Check Dockerfile +cat build/Dockerfile + +# Verify base image +grep "FROM" build/Dockerfile +``` + +### Secret Scan Failures +```bash +# Reproduce locally +gitleaks detect --source . --verbose + +# Check specific file +gitleaks detect --source . --log-opts="" +``` + +## Escalation Conditions + +Escalate to human when: +- CI pipeline consistently fails but local passes +- Tekton pipeline syntax errors +- Boilerplate update breaks CI +- New required check needs adding +- Pipeline execution time >10 minutes +- Conflux/Tekton infrastructure issues + +## Output Format + +Report CI issues in this format: +```text +CI Status: FAILING +Pipeline: this repository-pull-request +Stage: golangci-lint +Error: Exit code 1 + +Local Reproduction: + make go-check + # Output shows 3 linter errors in pkg/handler/deployment.go + +Root Cause: +Fix: +``` + +## Performance Targets + +- **PR pipeline**: <5 minutes total +- **Lint**: <1 minute +- **Unit tests**: <2 minutes +- **Build**: <3 minutes +- **E2E pipeline**: <15 minutes + +If exceeded, investigate: +- Cache misses +- Network issues +- Test parallelization +- Resource constraints + +## CI Security Considerations + +**Pipeline security:** +- Don't disable required checks +- Don't allow bypassing on PRs +- Require approvals for `.tekton/` changes +- Validate pipeline changes carefully + +**Secret handling in CI:** +- Use Tekton Secrets for credentials +- Don't log secrets +- Don't expose secrets in params +- Rotate secrets regularly + +**Image security:** +- Base images from trusted registries +- Scan images for vulnerabilities +- Don't use `latest` tag +- Sign images (if applicable) diff --git a/.claude/agents/docs-agent.md b/.claude/agents/docs-agent.md new file mode 100644 index 000000000..bdc1c0121 --- /dev/null +++ b/.claude/agents/docs-agent.md @@ -0,0 +1,198 @@ +--- +name: docs-agent +description: Documentation maintenance and synchronization. Use when updating docs after code changes, validating command examples, keeping CLAUDE.md synchronized, or fixing documentation drift. +tools: Bash, Read, Edit, Grep +model: sonnet +--- + +# Docs Agent + +Documentation maintenance and synchronization for this operator. + +## Responsibilities + +### Primary Tasks +- Update documentation after code changes +- Ensure command examples remain valid +- Keep CLAUDE.md synchronized with actual workflows +- Validate markdown formatting +- Check for broken links (if applicable) + +### Documentation Files +- `README.md`: Project overview, badges, links +- `CONTRIBUTING.md`: Contribution guidelines +- `DEVELOPMENT.md`: Developer commands +- `TESTING.md`: Testing guidelines +- `CLAUDE.md`: AI agent guidance +- `docs/*.md`: Design docs, testing guides + +## Update Triggers + +Update docs when: +- **Make targets added/removed**: Update `DEVELOPMENT.md` and `CLAUDE.md` +- **API types changed**: Update `docs/design.md` +- **Test framework changes**: Update `TESTING.md` +- **New dependencies**: Update `docs/development.md` +- **Pre-commit hooks changed**: Update `CONTRIBUTING.md` +- **Build process changed**: Update `DEVELOPMENT.md` and `CLAUDE.md` + +## Validation Checks + +### Command Examples +```bash +# Extract commands from markdown +grep '```bash' -A 10 *.md | grep '^make\|^go\|^ginkgo' + +# Test each command (in safe read-only way) +make -n go-build # Dry-run +make help # List targets +go help test # Verify go commands +``` + +### Markdown Linting +```bash +# Check for common issues +# - Broken relative links +# - Inconsistent formatting +# - Missing code block language tags + +grep -E '```$' *.md # Code blocks without language +grep -E '\[.*\]\(\./' *.md # Relative links to check +``` + +### Consistency Checks +- All `make` targets in docs exist in `Makefile` +- Pre-commit hooks listed match `.pre-commit-config.yaml` +- Dependencies in docs match `go.mod` +- Commands use correct flags + +## Usage + +Invoke when: +- Code changes affect documented workflows +- New features added +- Build process modified +- Contributing guidelines need updates + +## Auto-Update Patterns + +### Make Targets +When `Makefile` changes, sync: +- `DEVELOPMENT.md` command reference +- `CLAUDE.md` development commands section +- `README.md` if new primary targets added + +### Pre-commit Hooks +When `.pre-commit-config.yaml` changes, sync: +- `CONTRIBUTING.md` validation section +- `CLAUDE.md` validation strategy + +### Dependencies +When `go.mod` changes (major versions), sync: +- `docs/development.md` prerequisites +- `README.md` badges/requirements + +## Documentation Style + +### Consistency Rules +- Use `bash` for code blocks, not `sh` or `shell` +- Commands should be copy-pasteable +- Include expected output for non-obvious commands +- Use `# Comments` to explain complex commands +- Prefer real examples over placeholders + +### Code Block Format +```bash +# Good +make go-build # Build the operator binary +``` + +Bad (no language tag): +\`\`\` +make go-build +\`\`\` + +Bad (placeholder): +\`\`\` +make +\`\`\` + +### Link Format +- Use relative paths for internal docs: `[Testing](./TESTING.md)` +- Use full URLs for external links: `[Ginkgo](https://onsi.github.io/ginkgo/)` +- Check links exist before committing + +## Documentation Sections to Maintain + +### README.md +- Project description stays current +- Badges reflect actual status +- Links to docs are correct +- Quick start is up to date + +### CONTRIBUTING.md +- Pre-commit setup matches `.pre-commit-config.yaml` +- Required checks match CI pipeline +- Examples use current commands +- Security guidelines current + +### DEVELOPMENT.md +- All commands work as documented +- File paths are correct +- Prerequisites match actual requirements +- Troubleshooting addresses real issues + +### TESTING.md +- Test commands use current framework +- Ginkgo/Gomega patterns match code +- Mock generation steps are accurate +- Coverage instructions work + +### CLAUDE.md +- Agent rules reflect current workflows +- Commands are accurate and tested +- Security guardrails comprehensive +- Repo-specific constraints current + +## Escalation Conditions + +Escalate to human when: +- Major architectural docs need rewriting (`docs/design.md`) +- Conflicting information across multiple docs +- Command examples fail validation +- Documentation strategy needs rethinking +- Breaking changes require migration guide + +## Integration Points + +- Update docs in same PR as code changes +- Keep docs in sync with implementation +- No separate "docs update" PRs unless fixing errors + +## Validation Commands + +```bash +# Check all markdown files +find . -name "*.md" -not -path "./vendor/*" -not -path "./.git/*" + +# Verify make targets exist +grep '```bash' *.md | grep 'make ' | sed 's/.*make \([a-z-]*\).*/\1/' | sort -u + +# Check for dead links (manual review) +grep -r '\[.*\](' *.md docs/*.md +``` + +## Output Format + +When updating docs, report: +``` +Updated: DEVELOPMENT.md +- Added section on new make target: go-bench +- Fixed typo in test commands +- Updated Go version requirement: 1.22.7 -> 1.24.0 + +Validated: +- All make targets exist and work +- All command examples tested +- Links checked +``` diff --git a/.claude/agents/lint-agent.md b/.claude/agents/lint-agent.md new file mode 100644 index 000000000..c53aa10a0 --- /dev/null +++ b/.claude/agents/lint-agent.md @@ -0,0 +1,104 @@ +--- +name: lint-agent +description: Automated linting and code quality enforcement. Use when running formatting checks, executing golangci-lint, auto-fixing safe issues, or investigating CI lint failures. +tools: Bash, Read, Edit +model: sonnet +--- + +# Lint Agent + +Automated linting and code quality enforcement for this operator. + +## Responsibilities + +### Primary Tasks +- Run formatting checks (`go fmt`) +- Execute golangci-lint with repo configuration +- Auto-fix safe linting issues +- Preserve existing code style and patterns +- Report unfixable issues with context + +### Validation Flow +1. Check if Go files have changed +2. Run `go fmt -l .` to detect formatting issues +3. Auto-fix formatting: `go fmt ./...` +4. Run `make go-check` (golangci-lint) +5. Attempt auto-fixes: `golangci-lint run --fix` +6. Report remaining issues with file:line references + +### Auto-Fix Criteria +Safe to auto-fix: +- Formatting (gofmt) +- Unused imports +- Simplifiable code (gosimple) +- Ineffectual assignments +- Trailing whitespace + +DO NOT auto-fix: +- Potential bugs (govet errors) +- Security issues (gosec warnings) +- Cyclomatic complexity violations +- API breaking changes + +## Usage + +Invoke when: +- Pre-commit validation needed +- After code generation +- Before creating PR +- CI lint failures need investigation + +## Commands + +```bash +# Format check only +go fmt -l . | grep -v "^$" + +# Format and fix +go fmt ./... + +# Full lint (as in CI) +make go-check + +# Lint with auto-fix +golangci-lint run --fix --config=boilerplate/openshift/golang-osd-operator/golangci.yml + +# Lint specific files +golangci-lint run --config=boilerplate/openshift/golang-osd-operator/golangci.yml +``` + +## Configuration + +Lint config: `boilerplate/openshift/golang-osd-operator/golangci.yml` + +Key rules: +- `govet`: Go static analysis +- `gosec`: Security scanning +- `staticcheck`: Bug detection +- `gocyclo`: Complexity checks +- `gofmt`: Formatting +- `goimports`: Import management + +## Output Format + +Report issues in this format: +```text +[FILE:LINE] [LINTER] Issue description +Example: pkg/handler/deployment.go:42 [govet] unreachable code +``` + +## Escalation Conditions + +Escalate to human when: +- Security warnings from gosec +- Cyclomatic complexity >15 (requires refactoring) +- API compatibility issues +- Multiple unfixable errors (>5) +- Linter configuration issues + +## Integration Points + +- Runs as part of `pre-commit run golangci-lint` +- Mirrors Tekton CI lint job +- Should complete in <30s on typical changeset +- Uses same config as CI (no drift) diff --git a/.claude/agents/security-agent.md b/.claude/agents/security-agent.md new file mode 100644 index 000000000..2f75b3e5e --- /dev/null +++ b/.claude/agents/security-agent.md @@ -0,0 +1,235 @@ +--- +name: security-agent +description: Security scanning and policy enforcement. Use when scanning for secrets, validating RBAC (no wildcards), checking insecure patterns, or investigating security violations in CI. +tools: Bash, Read, Grep, Edit +model: sonnet +--- + +# Security Agent + +Security scanning and policy enforcement for this operator. + +## Responsibilities + +### Primary Tasks +- Scan for hardcoded secrets and credentials +- Validate RBAC configurations (no wildcards) +- Check for insecure patterns in code +- Detect dangerous operations +- Enforce security policies + +### Security Checks + +#### 1. Secret Scanning +```bash +# Gitleaks (runs in pre-commit) +pre-commit run gitleaks + +# Manual scan +gitleaks detect --source . --verbose +``` + +**Detect:** +- AWS keys (access key ID, secret access key) +- GitHub tokens +- API keys +- Private keys (PEM, SSH) +- Passwords in code or config +- Database connection strings with credentials +- High-entropy strings (potential secrets) + +#### 2. RBAC Wildcard Check +```bash +# Pre-commit hook enforces this +make rbac-wildcard-check +``` + +**Forbidden patterns in `deploy/*.yaml`:** +- `resources: ["*"]` +- `verbs: ["*"]` +- `apiGroups: ["*"]` (usually) +- Multi-line format: `- '*'` + +**Enforcement:** +- ALWAYS specify exact resource types +- ALWAYS specify exact verbs +- Wildcard permissions are NEVER acceptable + +#### 3. Code Security Patterns + +**Dangerous patterns to detect:** +```go +// Secrets in code +password := "hardcoded-secret" // FORBIDDEN +apiKey := os.Getenv("API_KEY") // OK if not logged + +// Logging secrets +logger.Info("token: " + token) // FORBIDDEN +logger.Info("request authenticated") // OK + +// Command injection +exec.Command("sh", "-c", userInput) // DANGEROUS +exec.Command("kubectl", "get", "pods", podName) // OK if podName validated + +// Unsafe YAML/JSON unmarshaling +yaml.Unmarshal(untrustedInput, &obj) // Validate schema first + +// File path traversal +filepath.Join(baseDir, userInput) // Validate userInput doesn't contain ".." +``` + +#### 4. Dependency Vulnerabilities +```bash +# Check for known vulnerabilities in dependencies +go list -json -m all | nancy sleuth + +# Scan go.mod for outdated dependencies with CVEs +# (This requires external tooling not in current repo) +``` + +## Usage + +Invoke when: +- Before committing code +- RBAC manifests modified +- Secret handling code changed +- CI/CD pipelines modified +- Dockerfile updated +- Network policy changed + +## Commands + +```bash +# Full security scan +pre-commit run gitleaks --all-files +make rbac-wildcard-check +make go-check # includes gosec + +# Individual checks +gitleaks detect --source . --verbose +golangci-lint run --enable gosec +grep -r "password\s*:=\s*\"" --include="*.go" . +``` + +## High-Risk File Detection + +Files requiring extra scrutiny: +- `deploy/*.yaml` (RBAC, NetworkPolicy) +- `*_rbac.go` (authorization logic) +- `controllers/*_secret.go` (secret handling) +- `.tekton/*.yaml` (CI/CD pipelines) +- `build/Dockerfile` (container security) + +## Security Policy Enforcement + +### Secrets +- ✅ Use Kubernetes Secrets with references +- ✅ Use environment variables (with care) +- ✅ Use external secret management (Vault, etc.) +- ❌ Never hardcode secrets +- ❌ Never log secrets +- ❌ Never commit `.env` files with secrets + +### RBAC +- ✅ Specify exact resources and verbs +- ✅ Use Role for namespace-scoped permissions +- ✅ Use ClusterRole sparingly +- ❌ Never use wildcard permissions +- ❌ Never grant `cluster-admin` + +### Network Policies +- ✅ Default deny all traffic +- ✅ Explicitly allow required connections +- ✅ Document each ingress/egress rule +- ❌ Don't create overly permissive policies + +### Container Security +- ✅ Use minimal base images +- ✅ Run as non-root user +- ✅ Set read-only root filesystem +- ✅ Drop unnecessary capabilities +- ❌ Don't use `latest` tag +- ❌ Don't run as root + +## Gitleaks Configuration + +Custom allowlist in `.gitleaks.toml`: +- Known false positives +- Test fixtures with fake credentials +- Public key material (certificates) +- Non-secret high-entropy strings + +## Output Format + +Report findings in this format: +```text +[SEVERITY] [CATEGORY] Location: Issue +Example: [HIGH] [SECRET] pkg/handler/auth.go:42: Hardcoded API key detected +Example: [CRITICAL] [RBAC] deploy/role.yaml:15: Wildcard permission not allowed +``` + +Severity levels: +- **CRITICAL**: Immediate fix required (secrets committed, wildcard RBAC) +- **HIGH**: Security vulnerability (code injection, auth bypass) +- **MEDIUM**: Risky pattern (weak crypto, missing validation) +- **LOW**: Security hygiene (outdated dependency, missing security header) + +## Auto-Remediation + +Safe to auto-fix: +- Removing trailing whitespace from manifests +- Fixing YAML indentation + +NOT safe to auto-fix: +- Adding or modifying security context in manifests (requires manual review) +- Removing wildcards from RBAC (requires understanding requirements) +- Removing secrets from code (requires alternative solution) +- Changing authentication logic +- Modifying NetworkPolicies + +## Escalation Conditions + +Escalate immediately when: +- Secrets detected in commit +- Wildcard RBAC permissions found +- Authentication/authorization logic changed +- Network policy allows all traffic +- Dockerfile runs as root +- CI pipeline modified to skip security checks + +Escalate for review when: +- gosec warnings in security-critical code +- New dependency with known CVEs +- Crypto algorithm changes +- External network call added + +## Integration Points + +- **Pre-commit**: gitleaks runs automatically +- **CI**: Tekton runs gitleaks and gosec +- **RBAC check**: Custom make target +- **Manual**: Run before modifying security-critical code + +## FIPS Compliance + +this operator requires FIPS 140-2 compliance: +- All crypto operations must use validated libraries +- No weak algorithms (MD5, SHA1, DES) +- TLS 1.2+ only +- FIPS-approved key lengths + +Check crypto usage: +```bash +grep -r "crypto/" --include="*.go" . | grep -v "crypto/tls" +grep -r "md5\|sha1\|des" --include="*.go" . +``` + +## False Positive Handling + +If gitleaks flags non-secret: +1. Verify it's truly not a secret +2. Add to `.gitleaks.toml` allowlist with justification +3. Document why it's safe +4. Review periodically + +Never disable gitleaks entirely or use `SKIP=gitleaks`. diff --git a/.claude/agents/test-agent.md b/.claude/agents/test-agent.md new file mode 100644 index 000000000..e7651ad21 --- /dev/null +++ b/.claude/agents/test-agent.md @@ -0,0 +1,155 @@ +--- +name: test-agent +description: Automated testing and test quality assurance. Use when running targeted tests for changed code, analyzing test failures, debugging flaky tests, or ensuring test coverage. +tools: Bash, Read, Edit +model: sonnet +--- + +# Test Agent + +Automated testing and test quality assurance for this operator. + +## Responsibilities + +### Primary Tasks +- Run targeted unit tests for changed code +- Detect and report flaky test failures +- Suggest minimal fixes for test failures +- Ensure test coverage for new code +- Avoid unnecessary test reruns + +### Test Execution Strategy +1. **Incremental testing**: Run only affected packages +2. **Failure analysis**: Distinguish real bugs from flaky tests +3. **Minimal fixes**: Fix the test or the bug, not surrounding code +4. **Coverage validation**: Ensure new code has tests + +### Test Selection Logic + +```bash +# Changed Go files +CHANGED_FILES=$(git diff --name-only HEAD | grep "\.go$") + +# Extract packages +PACKAGES=$(echo "$CHANGED_FILES" | xargs -n1 dirname | sort -u | tr '\n' ' ') + +# Run targeted tests +for pkg in $PACKAGES; do + go test -v ./$pkg/... +done +``` + +## Usage + +Invoke when: +- Code changes committed +- Test failures in CI +- Before creating PR +- After code generation (mocks changed) + +## Commands + +```bash +# All tests +make go-test + +# Specific package +go test -v ./controllers/ + +# Focused test +ginkgo -focus="NetworkPolicy" ./controllers/ + +# Verbose output +ginkgo -v ./... + +# Coverage +go test -coverprofile=coverage.out ./... +go tool cover -html=coverage.out + +# Container-based (CI parity) +boilerplate/_lib/container-make go-test +``` + +## Failure Analysis + +### Real Failure Indicators +- Consistent failure across multiple runs +- Failed assertion with unexpected value +- Panic or runtime error +- Compilation error in test + +### Flaky Test Indicators +- Passes on retry without code changes +- Timeout issues +- Race condition symptoms +- Environment-dependent failures + +### Test Debugging + +```bash +# Run test multiple times to detect flakiness +for i in {1..5}; do go test ./pkg/mypackage || break; done + +# Verbose Ginkgo output +ginkgo -v -trace ./pkg/mypackage + +# Race detector +go test -race ./pkg/mypackage +``` + +## Fix Strategy + +**Test fails due to code bug:** +1. Identify failing assertion +2. Locate corresponding production code +3. Fix the bug +4. Verify fix with targeted test run +5. Run full suite to check for regressions + +**Test fails due to outdated mocks:** +1. Check if interface changed +2. Regenerate mocks: `boilerplate/_lib/container-make generate` +3. Update test expectations if needed +4. Rerun tests + +**Test fails due to test bug:** +1. Review test logic +2. Fix test setup or assertions +3. Ensure test is deterministic +4. Avoid hardcoded timeouts or sleeps + +## Test Coverage Requirements + +New code MUST have: +- Unit tests for public functions +- Error path testing +- Edge case coverage +- Mock-based isolation from Kubernetes + +Don't test: +- Generated code (`zz_generated.*.go`) +- Trivial getters/setters +- Third-party library wrappers (test your logic, not theirs) + +## Escalation Conditions + +Escalate to human when: +- Consistent test failures across multiple packages +- Flaky tests that can't be made deterministic +- Coverage drops significantly +- Tests require architectural changes +- Mock generation fails + +## Performance Targets + +- Unit tests: <5s per package +- Controller tests: <15s per controller +- Full suite: <2 minutes +- Flake rate: <1% + +## Integration Points + +- Runs in Tekton CI for every commit +- Local execution via `make go-test` +- Pre-commit hook available (not enabled by default, too slow) +- Container-based for CI parity: `boilerplate/_lib/container-make go-test` diff --git a/.claude/hooks/README.md b/.claude/hooks/README.md new file mode 100644 index 000000000..a798afc17 --- /dev/null +++ b/.claude/hooks/README.md @@ -0,0 +1,351 @@ +# Claude Code Hooks + +Security and validation hooks for this operator development. + +## Overview + +This repository uses **prek** (git hook manager) for quality checks and validation. Claude Code hooks integrate with prek to provide immediate feedback during development. + +## Architecture + +```text +┌─────────────────────────────────────┐ +│ Developer / Claude Code Agent │ +└──────────────┬──────────────────────┘ + │ + ▼ +┌─────────────────────────────────────┐ +│ Stop Hook (every turn) │ +│ - Runs prek validation │ +│ - Blocks if issues found │ +│ - Claude fixes automatically │ +└──────────────┬──────────────────────┘ + │ + ▼ +┌─────────────────────────────────────┐ +│ Prek Hooks (CI config) │ +│ - golangci-lint (static analysis) │ +│ - RBAC wildcard check │ +│ - go build validation │ +│ - go mod tidy check │ +│ - file hygiene (trailing space) │ +└──────────────┬──────────────────────┘ + │ +┌──────────────▼──────────────────────┐ +│ Prek Hooks (full config) │ +│ + rh-pre-commit (InfoSec) │ +│ + gitleaks (secret scanning) │ +└──────────────┬──────────────────────┘ + │ + ▼ +┌─────────────────────────────────────┐ +│ Git Commit │ +└──────────────┬──────────────────────┘ + │ + ▼ +┌─────────────────────────────────────┐ +│ CI/CD (Tekton Pipelines) │ +└─────────────────────────────────────┘ +``` + +## Available Hooks + +### [stop-prek-validation.sh](./stop-prek-validation.sh) +**Purpose**: Run prek validation when Claude makes changes (or always, if configured) + +**Triggers**: On Claude Code session stop (Stop hook) + +**Behavior**: + +**Default mode** (recommended): +- Only runs if there are uncommitted changes (staged, unstaged, or untracked files) +- Skips validation for read-only queries (fast iteration) +- Validates when Claude modifies code (before commit) + +**Strict mode** (opt-in): +- Set environment variable: `export CLAUDE_LINT_ON_STOP=true` +- Always runs validation on every stop, regardless of changes +- Use when you want maximum quality enforcement +- Slower but catches issues immediately + +**Common behavior**: +- Runs `prek run --config hack/prek.ci.toml` on changed files +- Uses CI-compatible config (skips network-dependent hooks like rh-pre-commit, gitleaks) +- Blocks Claude from stopping if issues found +- Feeds errors back to Claude for automatic fixes +- Includes infinite loop guard (allows stop on retry) + +**Benefits**: +- **Default**: No performance impact on read-only queries (0s when no changes) +- **Default**: Catches issues when Claude modifies code (before commit) +- **Strict**: Maximum quality enforcement on every interaction +- Fast validation (5-10s typical) - only checks changed files + +**Performance**: +- Default mode, clean working directory: 0s (skipped) +- Default mode, with changes: 5-10s typical (changed files only) +- Strict mode (CLAUDE_LINT_ON_STOP=true): 5-10s every stop + +**Installation**: Configured in `.claude/settings.json` + +**Enable strict mode**: +```bash +# In your shell profile (~/.zshrc, ~/.bashrc) +export CLAUDE_LINT_ON_STOP=true + +# Or for single session +CLAUDE_LINT_ON_STOP=true claude +``` + +--- + +### [pre-edit.sh](./pre-edit.sh) +**Purpose**: Prevent editing generated files and warn about high-risk changes + +**Status**: Available for standalone use (not configured as Claude Code hook) + +**Checks**: +- Generated files (`zz_generated.*.go`) +- Generated mocks (`**/generated/mock_*.go`) +- Vendored code (`vendor/`) +- Boilerplate files (managed upstream) +- High-risk security files (RBAC, auth, NetworkPolicy) +- CI/CD pipelines (`.tekton/*.yaml`) +- Dockerfiles + +**Manual Usage**: +```bash +.claude/hooks/pre-edit.sh path/to/file.go +``` + +--- + +## Prek Configuration + +This repository maintains **two prek configurations**: + +### 1. **prek.toml** (Full validation) +Used for local development with internal network access. + +**Hooks**: +- File hygiene (trailing whitespace, EOF, syntax checks) +- **rh-pre-commit**: Red Hat InfoSec security checks (requires `gitlab.cee.redhat.com` access) +- **gitleaks**: Secret detection (configured via `.gitleaks.toml`) +- **golangci-lint**: Static analysis +- **go-build**: Compile check +- **go-mod-tidy**: Dependency drift detection +- **rbac-wildcard-check**: RBAC validation + +**Usage**: +```bash +prek run --all-files +``` + +### 2. **hack/prek.ci.toml** (CI-compatible) +Used by Claude Code stop hook and CI environments without internal network access. + +**Excludes**: +- `rh-pre-commit` (requires Red Hat internal network) +- `gitleaks` (may not be available in all CI environments) + +**Usage**: +```bash +hack/ci.sh +# or +prek run --config hack/prek.ci.toml --all-files +``` + +**Why two configs?** +The CI-compatible config allows Claude Code and external CI systems to run quality checks without requiring access to Red Hat's internal GitLab instance. + +## Setup + +### Prerequisites +```bash +# Install prek (choose one) +uv tool install prek # recommended +pipx install prek # alternative +pip install --user prek # fallback +``` + +### Install Git Hooks +```bash +prek install +``` + +This sets up pre-commit hooks that run validation automatically. + +## Usage + +### Automatic Validation +Prek runs automatically: +- **On every turn**: Stop hook runs `prek run --all-files` +- **On commit**: Pre-commit hook runs relevant checks + +### Manual Validation +```bash +# Run all checks +prek run --all-files + +# Run specific check +prek run gitleaks +prek run golangci-lint +prek run rbac-wildcard-check +``` + +## Hook Categories + +### Stop Hooks +**Purpose**: Validate before Claude Code stops + +**Current**: +- `stop-prek-validation.sh`: Run prek checks + +**Benefits**: +- Immediate feedback (not delayed until commit) +- Automatic fixes by Claude +- Prevents accumulation of violations + +### Pre-commit Hooks +**Purpose**: Validate before git commit + +**Managed by**: prek (configured in `prek.toml`) + +**Checks**: +- File hygiene and syntax +- Security scanning (rh-pre-commit, gitleaks) +- Static analysis (golangci-lint) +- Build validation (go build, go mod tidy) +- Custom checks (RBAC wildcards) + +## Security Guardrails + +### Secret Prevention +**Implementation**: gitleaks via prek + +**Configuration**: `.gitleaks.toml` + +**Detects**: +- AWS credentials +- GitHub tokens +- API keys +- Private keys +- Database connection strings +- OCM-specific tokens +- High-entropy secrets + +**Action**: BLOCK commit + +### InfoSec Scanning +**Implementation**: rh-pre-commit via prek + +**Source**: Red Hat InfoSec Developer Workbench + +**Checks**: Internal security policies and compliance + +**Action**: BLOCK commit on violations + +### RBAC Validation +**Implementation**: rbac-wildcard-check via prek + +**Detects**: +- Wildcard resources: `["*"]` +- Wildcard verbs: `["*"]` + +**Action**: BLOCK commit + +### Generated File Protection +**Implementation**: pre-edit.sh (standalone) + +**Detects**: +- `zz_generated.*.go` +- Generated mocks +- CRD manifests + +**Action**: BLOCK edit (suggest regeneration) + +## Hook Performance + +**Targets:** +- Stop hook: <30s for full validation +- Pre-commit hook: <30s on typical changeset +- Individual checks: <10s each + +**Optimization:** +- Prek runs hooks in parallel where possible +- Hooks only check changed files (where applicable) +- Build artifacts cached between runs + +## Troubleshooting + +### Hook Not Running +```bash +# Verify prek is installed +prek --version + +# Reinstall git hooks +prek install + +# Check hook configuration +cat prek.toml +``` + +### Hook Fails Incorrectly +```bash +# Run hook manually for debugging +prek run --verbose + +# Check hook configuration +cat prek.toml + +# Update prek +uv tool upgrade prek # or pipx upgrade prek +``` + +### Hook Failures (DO NOT Bypass) + +**NEVER bypass hooks:** +```bash +# FORBIDDEN - bypasses all validation +git commit --no-verify + +# FORBIDDEN - bypasses specific hooks +SKIP=hook-id git commit +``` + +**If hooks are blocking your commit:** +1. **Investigate and fix the root issue** - hooks catch real problems +2. **If the hook or config is broken:** + - Fix the hook/config first + - Open an issue documenting the problem + - Request reviewer approval before merge +3. **Re-run full validation:** + - `prek run --all-files` locally + - Ensure all required CI checks pass + - Get explicit code review approval + +**Security hooks (gitleaks, rh-pre-commit) must NEVER be bypassed under any circumstances.** + +## Version Management + +### Prek Version +Pinned in `.prek-version` for CI consistency: +```bash +cat .prek-version # v0.3.9 +``` + +Update when new prek releases are available. + +### Hook Dependencies +Defined in `prek.toml` with immutable refs: +- `rh-pre-commit-2.3.0` +- `v8.18.0` (gitleaks) +- `v2.0.2` (golangci-lint) + +## References + +- [Prek Documentation](https://prek.j178.dev/) +- [Gitleaks](https://github.com/gitleaks/gitleaks) +- [RH InfoSec Tools](https://gitlab.cee.redhat.com/infosec-public/developer-workbench/tools) +- [golangci-lint](https://golangci-lint.run/) +- [CLAUDE.md](../../CLAUDE.md) - Development guidelines diff --git a/.claude/hooks/cleanup.sh b/.claude/hooks/cleanup.sh new file mode 100755 index 000000000..7cc826bf7 --- /dev/null +++ b/.claude/hooks/cleanup.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# +# Cleanup Hook for this operator +# Runs when Claude Code session stops +# + +set -euo pipefail + +# Nothing to clean up currently +# This hook is here for future use: +# - Clear temporary files +# - Clean up test resources +# - Report summary of changes + +exit 0 diff --git a/.claude/hooks/pre-edit.sh b/.claude/hooks/pre-edit.sh new file mode 100755 index 000000000..45c96aa8f --- /dev/null +++ b/.claude/hooks/pre-edit.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash +# +# Pre-Edit Hook for this operator +# Prevents editing generated files, vendored code, and high-risk files without warning +# +# Usage: Called automatically by Claude Code before file edits +# + +set -euo pipefail + +FILE="${1:-}" + +if [[ -z "$FILE" ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Normalize file path to be repo-relative for consistent pattern matching +# This ensures patterns like vendor/* work regardless of whether the input is absolute or relative +REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || echo ".") +if [[ "$FILE" = /* ]]; then + # Convert absolute path to repo-relative + FILE="${FILE#"$REPO_ROOT"/}" +fi +# Strip leading ./ +FILE="${FILE#./}" + +# Helper function for interactive confirmation +confirm_or_exit() { + local prompt="$1" + echo "$prompt" + + # Check if stdin is a TTY (interactive terminal) + if [[ ! -t 0 ]]; then + echo "❌ ERROR: Non-interactive environment detected" + echo " This operation requires manual confirmation" + echo " Run this script in an interactive terminal" + exit 1 + fi + + read -r response + if [[ ! "$response" =~ ^[Yy]$ ]]; then + exit 1 + fi +} + +# ============================================================================= +# GENERATED FILES - BLOCK EDITS +# ============================================================================= + +# Check for generated Go files +if [[ "$FILE" == *"zz_generated."* ]]; then + echo "❌ BLOCKED: Cannot edit generated file: $FILE" + echo " This file is auto-generated by controller-gen." + echo " To regenerate: boilerplate/_lib/container-make generate" + exit 1 +fi + +if [[ "$FILE" == *"/generated/"* ]] && [[ "$FILE" == *"mock_"* ]]; then + echo "❌ BLOCKED: Cannot edit generated mock: $FILE" + echo " This file is auto-generated by mockgen." + echo " To regenerate: boilerplate/_lib/container-make generate" + exit 1 +fi + +# Check for generated CRD manifests +if [[ "$FILE" == deploy/crds/* ]] && [[ "$FILE" == *.yaml ]]; then + echo "⚠️ WARNING: Editing generated CRD manifest: $FILE" + echo " CRDs are generated from API types." + echo " Consider editing api/v1alpha1/*.go instead." + echo " To regenerate CRDs: make manifests" + echo "" + confirm_or_exit " Continue? (y/N)" +fi + +# ============================================================================= +# LOCKFILES - WARNING +# ============================================================================= + +if [[ "$FILE" == "go.sum" ]]; then + echo "⚠️ WARNING: Editing go.sum directly" + echo " This file is managed by 'go mod tidy'." + confirm_or_exit " Are you sure you want to edit it manually? (y/N)" +fi + +# ============================================================================= +# VENDORED CODE - BLOCK EDITS +# ============================================================================= + +if [[ "$FILE" == vendor/* ]]; then + echo "❌ BLOCKED: Cannot edit vendored code: $FILE" + echo " Vendor directory is managed by go modules." + echo " Update dependencies in go.mod instead." + exit 1 +fi + +if [[ "$FILE" == boilerplate/* ]] && [[ "$FILE" != boilerplate/update* ]]; then + echo "⚠️ WARNING: Editing boilerplate file: $FILE" + echo " Boilerplate is managed upstream." + echo " Local changes may be overwritten by 'make boilerplate-update'." + confirm_or_exit " Continue? (y/N)" +fi + +# ============================================================================= +# HIGH-RISK FILES - WARNING +# ============================================================================= + +HIGH_RISK_PATTERNS=( + "*/rbac.go" + "*/auth*.go" + "*_rbac.yaml" + "*/networkpolicy*.go" + "*ClusterRole*.yaml" + ".tekton/*.yaml" + "build/Dockerfile" +) + +for pattern in "${HIGH_RISK_PATTERNS[@]}"; do + # shellcheck disable=SC2053 + if [[ "$FILE" == $pattern ]]; then + echo "⚠️ HIGH-RISK FILE: $FILE" + echo " This file affects security or CI/CD." + echo " Changes require:" + echo " - Careful review" + echo " - Test coverage" + echo " - Security validation" + echo "" + confirm_or_exit " Continue? (y/N)" + break + fi +done + +# ============================================================================= +# LARGE DIFFS - WARNING +# ============================================================================= + +# If file exists and is large, warn about broad changes +if [[ -f "$FILE" ]]; then + LINES=$(wc -l < "$FILE") + if (( LINES > 500 )); then + echo "⚠️ LARGE FILE: $FILE ($LINES lines)" + echo " Prefer targeted edits over broad refactors." + confirm_or_exit " Continue? (y/N)" + fi +fi + +# All checks passed +exit 0 diff --git a/.claude/hooks/stop-prek-validation.sh b/.claude/hooks/stop-prek-validation.sh new file mode 100755 index 000000000..52c6cc142 --- /dev/null +++ b/.claude/hooks/stop-prek-validation.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash +# +# Stop Hook: Prek Validation +# +# Runs prek validation when Claude Code stops with smart triggering: +# +# Default mode (CLAUDE_LINT_ON_STOP not set): +# - Only runs when there are uncommitted changes +# - Skips validation for read-only queries (fast iteration) +# - Validates when Claude modifies code (catch issues before commit) +# +# Strict mode (export CLAUDE_LINT_ON_STOP=true): +# - Always runs validation on every stop +# - Use when you want maximum quality enforcement +# - Slower but catches issues immediately +# +# Performance: +# - Validates changed files only (5-10s typical) +# - Uses hack/prek.ci.toml (skips network-dependent hooks) +# +set -uo pipefail + +# Ensure we're running from the git repository root +# This handles cases where Claude Code's CWD is in a subdirectory (e.g., .claude/skills/) +REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +if [[ -z "$REPO_ROOT" ]]; then + jq -n '{"decision": "block", "reason": "Not in a git repository. Cannot run prek validation."}' + exit 0 +fi +cd "$REPO_ROOT" + +# Check for jq dependency +if ! command -v jq &> /dev/null; then + cat <<'EOF' +{"decision": "block", "reason": "jq is not installed — required for hook processing.\n\nInstall it:\n brew install jq # macOS\n apt-get install jq # Debian/Ubuntu\n yum install jq # RHEL/CentOS\n\nRetry the action once installed."} +EOF + exit 0 +fi + +HOOK_INPUT=$(cat) + +# Allow stop on retry to prevent infinite loops +STOP_HOOK_ACTIVE=$(echo "$HOOK_INPUT" | jq -r '.stop_hook_active // false') +if [[ "$STOP_HOOK_ACTIVE" == "true" ]]; then + exit 0 +fi + +# Determine if validation should run: +# 1. If CLAUDE_LINT_ON_STOP=true → always run (user opt-in for strict mode) +# 2. Otherwise, only run if there are uncommitted changes (about to commit) +FORCE_LINT="${CLAUDE_LINT_ON_STOP:-false}" + +if [[ "$FORCE_LINT" != "true" ]]; then + # Check for uncommitted changes (staged, unstaged, or untracked) + if git diff-index --quiet HEAD -- 2>/dev/null && [[ -z "$(git ls-files --others --exclude-standard)" ]]; then + # No changes and not forced - skip validation + exit 0 + fi +fi + +# Check if prek is installed — block and nudge instead of silently passing +if ! command -v prek &> /dev/null; then + jq -n \ + --arg reason "prek is not installed — required for quality checks before stopping. + +Install it: + uv tool install prek # recommended + pipx install prek # alternative + pip install --user prek # fallback + +Then wire up the git hook: prek install + +Retry the action once installed so validation can run." \ + '{"decision": "block", "reason": $reason}' + exit 0 +fi + +# Run prek validation (using CI config to skip network-dependent hooks) +# Only validate changed files for speed +PREK_OUTPUT=$(prek run --config hack/prek.ci.toml 2>&1) +PREK_EXIT=$? + +if [[ $PREK_EXIT -eq 0 ]]; then + exit 0 +fi + +# Block stop and tell Claude what to fix +jq -n \ + --arg reason "prek validation failed. Fix the issues below, then try again: + +$PREK_OUTPUT" \ + '{"decision": "block", "reason": $reason}' diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 000000000..604063977 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,56 @@ +{ + "permissions": { + "allow": [ + "Bash(make go-build)", + "Bash(make go-test)", + "Bash(make go-check)", + "Bash(make tools)", + "Bash(make lint)", + "Bash(go build ./...)", + "Bash(go test ./...)", + "Bash(go fmt ./...)", + "Bash(go mod tidy)", + "Bash(ginkgo -r ./...)", + "Bash(pre-commit run)", + "Bash(pre-commit run --all-files)", + "Bash(boilerplate/_lib/container-make)", + "Bash(boilerplate/_lib/container-make generate)", + "Bash(git status)", + "Bash(git diff)", + "Bash(git log)", + "Bash(git branch)", + "Bash(grep *)", + "Bash(find *)", + "Bash(ls *)", + "Bash(cat *)" + ], + "ask": [ + "Bash(git commit *)", + "Bash(git push *)", + "Bash(git reset *)", + "Bash(git rebase *)", + "Bash(make docker-build)", + "Bash(kubectl *)", + "Bash(oc *)" + ], + "deny": [ + "Bash(git commit --no-verify)", + "Bash(git push --force origin master)", + "Bash(git push --force origin main)", + "Bash(rm -rf /)", + "Bash(chmod 777 *)" + ] + }, + "hooks": { + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "bash \"$(git rev-parse --show-toplevel)/.claude/hooks/stop-prek-validation.sh\"" + } + ] + } + ] + } +} diff --git a/.claude/skills/README.md b/.claude/skills/README.md new file mode 100644 index 000000000..e7063f86f --- /dev/null +++ b/.claude/skills/README.md @@ -0,0 +1,123 @@ +# Claude Skills + +Reusable workflow skills for this operator development. + +## Available Skills + +### [prow-ci](./prow-ci/SKILL.md) +**Purpose**: Access and analyze OpenShift Prow CI results + +**When to use**: +- Investigating CI failures +- Checking test results +- Analyzing build logs +- Debugging failed PR checks + +**Key capabilities**: +- Access Prow dashboard and job results +- Retrieve build logs and artifacts +- Debug test failures +- Compare local vs CI results + +**Resources**: +- [Prow Dashboard](https://prow.ci.openshift.org/) +- [CI Search](https://github.com/openshift/ci-search) + +## Usage + +Skills are reusable workflows that combine multiple tools and knowledge to accomplish specific tasks. + +### Invoking Skills + +Skills can be referenced in Claude conversations: +```text +"Use the prow-ci skill to investigate the failed test in PR #123" +"Check Prow CI results for the latest build" +``` + +### Skill Components + +Each skill typically includes: +- **Purpose**: What the skill does +- **Usage**: When to invoke it +- **Commands**: Specific commands to run +- **Troubleshooting**: Common issues and solutions +- **Integration**: How it works with other tools + +## Creating New Skills + +To add a new skill: + +1. Create subdirectory: `skillname/` in this directory +2. Create `SKILL.md` inside the subdirectory +3. Use frontmatter with metadata: + ```yaml + --- + name: skillname + description: Brief description of what this skill does + trigger: skill triggers, slash command synonyms + --- + ``` +4. Document commands and workflows in the markdown body +5. Update this README +6. Test the skill workflow + +**Directory structure**: +``` +.claude/skills/ +├── README.md +└── skillname/ + ├── SKILL.md # Required: skill definition + └── reference/ # Optional: supporting docs +``` + +## Integration with Other Components + +**Skills vs Agents**: +- **Agents**: Autonomous actors with specific responsibilities +- **Skills**: Reusable workflows that agents or humans execute + +**Skills vs Hooks**: +- **Hooks**: Automated enforcement (runs automatically) +- **Skills**: On-demand workflows (runs when invoked) + +**Skills vs Commands**: +- **Commands**: Simple, single-purpose actions +- **Skills**: Complex, multi-step workflows + +## Future Skills + +Planned skills for this repository: + +### dependency-update +- Check for outdated dependencies +- Update go.mod +- Run tests +- Validate compatibility +- Create update PR + +### release-prep +- Update version numbers +- Generate changelog +- Run full validation +- Create release PR +- Tag release + +### api-compat-check +- Compare API changes +- Detect breaking changes +- Suggest migration path +- Update documentation + +### security-audit +- Run gitleaks on full history +- Check dependencies for CVEs +- Review RBAC configurations +- Scan container images +- Generate security report + +## References + +- [CLAUDE.md](../../CLAUDE.md) - Agent behavioral rules +- [.claude/agents/](../agents/) - Specialized agents +- [.claude/hooks/](../hooks/) - Security and validation hooks diff --git a/.claude/skills/prow-ci/SKILL.md b/.claude/skills/prow-ci/SKILL.md new file mode 100644 index 000000000..843cdb14b --- /dev/null +++ b/.claude/skills/prow-ci/SKILL.md @@ -0,0 +1,363 @@ +--- +name: prow-ci +description: Fetch and analyze OpenShift Prow CI job failures with automated artifact download and failure pattern detection +trigger: prow, prow-ci, /prow-ci, ci results, check ci, analyze ci failure +--- + +# Prow CI Analysis for Rbac Permissions Operator + +This skill fetches Prow CI job artifacts from Google Cloud Storage and provides automated failure analysis. + +## Prerequisites + +Before using this skill, verify gcloud CLI is installed: +```bash +which gcloud +``` + +If not installed, provide instructions from: https://cloud.google.com/sdk/docs/install + +**Note**: The `test-platform-results` GCS bucket is publicly accessible - no authentication required. + +## Quick Start + +```bash +# Check PR status and get Prow job URLs +gh pr checks + +# Analyze a failed job +/prow-ci + +# Or ask naturally: +"Analyze the lint failure in PR 328" +"Check why the validate job failed" +"Show me what broke in the coverage job" +``` + +## Implementation + +When invoked, this skill: + +1. **Fetches artifacts** using `fetch_prow_artifacts.py`: + - Downloads **prowjob.json** (job metadata) + - Downloads **build-log.txt** (complete build output with all errors) + - Saves to `.work/prow-artifacts//` + - **Note**: Script is optimized to only download essential files. Optional artifacts (JUnit XML, per-target logs) are skipped as build-log.txt contains all needed information. + +2. **Analyzes failures** using `analyze_failure.py`: + - Parses build-log.txt for error patterns + - Detects common failure patterns (lint, build, timeout, OOM) + - Extracts error messages and stack traces + - Identifies compilation errors and test failures + +3. **Generates report**: + - Markdown format with failure summary + - Test failures with details + - Pattern detection (compilation errors, lint failures, timeouts) + - Actionable error messages + +## Usage Instructions + +### Step 1: Get Prow Job URL + +```bash +# View PR checks to find failed jobs +gh pr checks + +# Or get detailed status +gh pr view --json statusCheckRollup --jq '.statusCheckRollup[] | select(.state == "FAILURE")' +``` + +Example Prow job URL: +``` +https://prow.ci.openshift.org/view/gs/test-platform-results/pr-logs/pull/openshift_rbac-permissions-operator/328/pull-ci-openshift-rbac-permissions-operator-master-lint/2059308810190721024 +``` + +### Step 2: Fetch and Analyze + +Run the fetch script first: +```bash +cd /Users/ppanda/rh-projects/ROSA-730/rbac-permissions-operator/.claude/skills/prow-ci + +python3 fetch_prow_artifacts.py "" -o .work/prow-artifacts +``` + +This downloads only the essential files: +- `prowjob.json` - Job metadata (job name, state, type, URL) +- `build-log.txt` - Complete build output (contains all errors, test failures, and output) + +### Step 3: Analyze Failures + +```bash +python3 analyze_failure.py .work/prow-artifacts/ -f markdown +``` + +Output includes: +- Job information (name, state, URL) +- JUnit test failures with messages and stack traces +- Detected failure patterns (lint errors, build failures, timeouts) +- Top error messages from build log + +### Step 4: Present Findings + +Create a clear summary for the user with: +- Root cause identification +- Failed tests with error messages +- Detected patterns (lint, build, timeout, etc.) +- Actionable next steps to fix the issue + +### Example Workflow + +```bash +# User provides: "Analyze the lint failure in PR 328" + +# 1. Get Prow job URL +gh pr checks 328 | grep lint + +# 2. Fetch artifacts +python3 .claude/skills/prow-ci/fetch_prow_artifacts.py \ + "https://prow.ci.openshift.org/view/gs/test-platform-results/pr-logs/pull/openshift_rbac-permissions-operator/328/pull-ci-openshift-rbac-permissions-operator-master-lint/2059308810190721024" + +# 3. Analyze +python3 .claude/skills/prow-ci/analyze_failure.py \ + .work/prow-artifacts/2059308810190721024 \ + -f markdown + +# 4. Review the output and provide actionable summary +``` + +## Prow Resources + +**Main Dashboard**: https://prow.ci.openshift.org/ +**CI Search**: https://github.com/openshift/ci-search +**Job History**: https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator + +## Common Use Cases + +### 1. Check Recent CI Results + +```bash +# View recent PR jobs +curl -s "https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator&type=presubmit" | grep -E "pull-ci-openshift-rbac-permissions-operator" + +# Check latest job status for specific PR +# Replace PR_NUMBER with actual PR number +gh pr view PR_NUMBER --json statusCheckRollup --jq '.statusCheckRollup[] | select(.context | contains("prow"))' +``` + +### 2. Access Build Logs + +Prow logs are stored at: +- **Pull request jobs**: `gs://test-platform-results/pr-logs/pull/openshift_rbac-permissions-operator/[PR_NUMBER]/[JOB_NAME]/[JOB_ID]` +- **Periodic jobs**: `gs://test-platform-results/logs/[JOB_NAME]/[JOB_ID]` + +**Viewing logs via web**: +```text +https://prow.ci.openshift.org/view/gs/test-platform-results/pr-logs/pull/openshift_rbac-permissions-operator/[PR_NUMBER]/[JOB_NAME]/[JOB_ID] +``` + +### 3. Analyze Test Failures + +```bash +# Get PR checks +gh pr view PR_NUMBER --json statusCheckRollup + +# Find failed jobs +gh pr checks PR_NUMBER | grep -i "fail" + +# Access specific job artifacts +# Navigate to Prow UI and click on: +# - Build Log (for compilation/test output) +# - JUnit (for structured test results) +# - Artifacts (for generated files, coverage, etc.) +``` + +### 4. Common Job Names + +**Prow CI Jobs** (configured in openshift/release): +- `pull-ci-openshift-rbac-permissions-operator-master-e2e-binary-build-success` - E2E binary build verification +- `pull-ci-openshift-rbac-permissions-operator-master-coverage` - Code coverage analysis (with Codecov) +- `pull-ci-openshift-rbac-permissions-operator-master-lint` - Linting checks +- `pull-ci-openshift-rbac-permissions-operator-master-test` - Unit tests +- `pull-ci-openshift-rbac-permissions-operator-master-validate` - Validation checks + +**Tekton Pipelines** (configured in `.tekton/`): +- `rbac-permissions-operator-pull-request` - Main PR pipeline (docker build with OCI-TA) +- `rbac-permissions-operator-e2e-pull-request` - E2E testing pipeline +- `rbac-permissions-operator-pko-pull-request` - PKO (Package Operator) pipeline +- Corresponding `-push` pipelines for merged commits + +## Debugging CI Failures + +### Step 1: Identify Failed Job +```bash +gh pr checks PR_NUMBER +``` + +### Step 2: Access Prow UI +Open the Prow link from PR checks or construct manually: +```text +https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator&type=presubmit +``` + +### Step 3: Review Logs +Click on failed job → "Build Log" tab + +### Step 4: Check Artifacts +Look for: +- Test failure logs +- Coverage reports +- Generated artifacts + +### Step 5: Reproduce Locally +Many Prow jobs can be reproduced with: +```bash +# For unit tests (matches: pull-ci-...-test) +make go-test + +# For linting (matches: pull-ci-...-lint) +make go-check +# OR use pre-commit for comprehensive linting +pre-commit run --all-files + +# For validation (matches: pull-ci-...-validate) +make validate + +# For coverage (matches: pull-ci-...-coverage) +make coverage + +# For E2E binary build (matches: pull-ci-...-e2e-binary-build-success) +make e2e-binary-build + +# For container builds (Tekton pipelines) +make docker-build +``` + +## CI/Prow Integration in This Repo + +This repo uses **both Prow and Tekton** for comprehensive CI: + +**Prow CI** (openshift/release): +- Configuration: `ci-operator/config/openshift/rbac-permissions-operator/openshift-rbac-permissions-operator-master.yaml` +- Runs: lint, test, validate, coverage, e2e-binary-build +- Uses Codecov for coverage reporting (secret: `rbac-permissions-operator-codecov-token`) +- Skip rules: Changes to `.tekton/`, `.github/`, `.md` files, `OWNERS`, `LICENSE` don't trigger most jobs + +**Tekton Pipelines** (`.tekton/`): +- Primary build pipeline using Pipelines as Code +- Three pipeline types: main, e2e, pko +- Builds container images to Quay (rbac-permissions-operator-tenant) +- Pull request images expire after 5 days +- Uses boilerplate framework from `openshift/boilerplate` (docker-build-oci-ta pipeline) + +## Quick Reference Commands + +```bash +# Check all PR checks status +gh pr checks + +# View detailed status for a specific PR +gh pr view --json statusCheckRollup + +# Filter only Prow jobs +gh pr checks | grep "pull-ci-openshift-rbac-permissions-operator" + +# Check Tekton pipeline status +gh pr view --json statusCheckRollup --jq '.statusCheckRollup[] | select(.context | contains("Tekton"))' + +# Open Prow dashboard in browser (cross-platform) +# Copy and paste this URL into your browser: +# https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator + +# Or use platform-specific command: +# macOS: open "https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator" +# Linux: xdg-open "https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator" +# Windows: start "https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator" + +# View specific PR on Prow (replace ) +# https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator&type=presubmit&pull= +``` + +## Troubleshooting + +### Can't find job results? +- Check both Prow AND Tekton - this repo uses both systems +- Prow jobs: `pull-ci-openshift-rbac-permissions-operator-master-*` +- Tekton jobs: Usually show as "Tekton" or pipeline names in PR checks +- Verify repo name format in Prow: `openshift_rbac-permissions-operator` (underscore, not dash) +- Ensure PR has been opened and CI has run + +### Logs show permission denied? +- Prow logs are public for openshift org +- Use web UI (prow.ci.openshift.org) instead of gsutil +- Check if job ID is correct + +### Job still running? +- Check Prow dashboard for in-progress jobs +- Look for "Pending" or "Running" status +- Wait for completion before accessing artifacts + +### Tekton pipeline failures? +- Check the pipeline link in PR checks (usually links to Konflux/AppStudio UI) +- Tekton logs are in the AppStudio dashboard, not Prow +- Common issues: + - Image build failures → Check Dockerfile syntax and build context + - Pipeline timeout → Check for slow steps or network issues + - Auth failures → Secret configuration in `rbac-permissions-operator-tenant` namespace +- Local validation: + ```bash + # Validate Tekton YAML syntax + kubectl apply --dry-run=client -f .tekton/ + + # Test container build locally + podman build -f build/Dockerfile -t test:local . + ``` + +## Advanced: CI Search + +For historical job searches: +```bash +# Clone ci-search tool +git clone https://github.com/openshift/ci-search.git + +# Use web interface at search.ci.openshift.org (if available) +# Search for patterns in build logs across all jobs +``` + +## References + +- [Prow Dashboard](https://prow.ci.openshift.org/) +- [CI Search Tool](https://github.com/openshift/ci-search) +- [OpenShift CI Documentation](https://docs.ci.openshift.org/) + +## CI Configuration Files + +**Prow Configuration** (in openshift/release repo): +- Location: `ci-operator/config/openshift/rbac-permissions-operator/openshift-rbac-permissions-operator-master.yaml` +- Update process: Submit PR to openshift/release repository +- Auto-generated jobs in: `ci-operator/jobs/openshift/rbac-permissions-operator/` + +**Tekton Pipelines** (in this repo): +- Location: `.tekton/` directory +- Files: + - `rbac-permissions-operator-pull-request.yaml` - Main PR pipeline + - `rbac-permissions-operator-push.yaml` - Post-merge pipeline + - `rbac-permissions-operator-e2e-pull-request.yaml` - E2E testing + - `rbac-permissions-operator-pko-pull-request.yaml` - PKO validation +- Triggered by: Pipelines as Code (via Tekton) +- Uses: Boilerplate docker-build-oci-ta pipeline from openshift/boilerplate + +## Coverage Reporting + +This repository uses Codecov for coverage tracking: +- Secret: `rbac-permissions-operator-codecov-token` (stored in Prow) +- Generate coverage locally: `make coverage` +- Coverage runs on PRs and post-merge (`publish-coverage`) +- Dashboard: Check Codecov for rbac-permissions-operator + +## Integration with Other Skills + +- Use with **test-agent** to compare local test results with CI +- Use with **ci-agent** to validate CI configuration +- Use with **lint-agent** when investigating lint failures in CI +- Use with **security-agent** when investigating pre-commit hook failures diff --git a/.claude/skills/prow-ci/analyze_failure.py b/.claude/skills/prow-ci/analyze_failure.py new file mode 100755 index 000000000..c46a1bce2 --- /dev/null +++ b/.claude/skills/prow-ci/analyze_failure.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python3 +""" +Analyze Prow CI job failures from downloaded artifacts. +""" + +import argparse +import json +import os +import re +import sys +import xml.etree.ElementTree as ET +from pathlib import Path + + +def parse_junit_xml(xml_file): + """Parse JUnit XML and extract failures.""" + try: + tree = ET.parse(xml_file) + root = tree.getroot() + + failures = [] + for testsuite in root.findall('.//testsuite'): + suite_name = testsuite.get('name', 'unknown') + for testcase in testsuite.findall('.//testcase'): + test_name = testcase.get('name', 'unknown') + classname = testcase.get('classname', '') + + failure = testcase.find('failure') + error = testcase.find('error') + + if failure is not None: + failures.append({ + 'type': 'failure', + 'suite': suite_name, + 'test': test_name, + 'class': classname, + 'message': failure.get('message', ''), + 'details': failure.text or '' + }) + elif error is not None: + failures.append({ + 'type': 'error', + 'suite': suite_name, + 'test': test_name, + 'class': classname, + 'message': error.get('message', ''), + 'details': error.text or '' + }) + + return failures + except Exception as e: + print(f"Warning: Could not parse {xml_file}: {e}", file=sys.stderr) + return [] + + +def analyze_build_log(log_file): + """Analyze build-log.txt for common failure patterns.""" + if not os.path.exists(log_file): + return None + + with open(log_file, 'r', encoding='utf-8', errors='replace') as f: + content = f.read() + + analysis = { + 'errors': [], + 'failures': [], + 'warnings': [], + 'patterns': {} + } + + # Common failure patterns + patterns = { + 'compilation_error': r'(?:compilation failed|build failed|cannot find package)', + 'test_failure': r'(?:FAIL:|Test failed:|tests failed)', + 'lint_error': r'(?:golangci-lint|gofmt|go vet) .* failed', + 'timeout': r'(?:timeout|timed out|deadline exceeded)', + 'oom': r'(?:out of memory|OOMKilled|killed by signal)', + 'image_pull': r'(?:Failed to pull image|ErrImagePull|ImagePullBackOff)', + 'permission_denied': r'(?:permission denied|forbidden|unauthorized)', + } + + for pattern_name, regex in patterns.items(): + matches = re.findall(regex, content, re.IGNORECASE) + if matches: + analysis['patterns'][pattern_name] = len(matches) + + # Extract error lines + for line in content.splitlines(): + if re.search(r'\bERROR\b', line, re.IGNORECASE): + analysis['errors'].append(line.strip()) + elif re.search(r'\bFAIL(ED)?\b', line): + analysis['failures'].append(line.strip()) + elif re.search(r'\bWARNING\b', line, re.IGNORECASE): + analysis['warnings'].append(line.strip()) + + # Limit to most relevant + analysis['errors'] = analysis['errors'][:10] + analysis['failures'] = analysis['failures'][:10] + analysis['warnings'] = analysis['warnings'][:5] + + return analysis + + +def analyze_prowjob(prowjob_file): + """Extract key information from prowjob.json.""" + if not os.path.exists(prowjob_file): + return None + + with open(prowjob_file, 'r') as f: + data = json.load(f) + + status = data.get('status', {}) + spec = data.get('spec', {}) + + return { + 'state': status.get('state', 'unknown'), + 'start_time': status.get('startTime'), + 'completion_time': status.get('completionTime'), + 'url': status.get('url', ''), + 'job_name': spec.get('job', 'unknown'), + 'type': spec.get('type', 'unknown'), + 'refs': spec.get('refs', {}), + } + + +def generate_analysis_report(artifacts_dir): + """Generate comprehensive failure analysis report.""" + report = { + 'prowjob': None, + 'build_log': None, + 'junit_failures': [], + 'summary': '' + } + + # Analyze prowjob.json + prowjob_file = os.path.join(artifacts_dir, 'prowjob.json') + report['prowjob'] = analyze_prowjob(prowjob_file) + + # Analyze build-log.txt + build_log_file = os.path.join(artifacts_dir, 'build-log.txt') + report['build_log'] = analyze_build_log(build_log_file) + + # Analyze JUnit XML files + artifacts_path = os.path.join(artifacts_dir, 'artifacts') + if os.path.exists(artifacts_path): + for xml_file in Path(artifacts_path).rglob('junit*.xml'): + failures = parse_junit_xml(xml_file) + report['junit_failures'].extend(failures) + + # Generate summary + summary_parts = [] + + if report['prowjob']: + pj = report['prowjob'] + summary_parts.append(f"Job: {pj['job_name']}") + summary_parts.append(f"State: {pj['state']}") + + if report['junit_failures']: + summary_parts.append(f"\nJUnit Failures: {len(report['junit_failures'])}") + for f in report['junit_failures'][:5]: + summary_parts.append(f" - {f['test']}: {f['message'][:100]}") + + if report['build_log'] and report['build_log']['patterns']: + summary_parts.append("\nDetected Patterns:") + for pattern, count in report['build_log']['patterns'].items(): + summary_parts.append(f" - {pattern}: {count} occurrences") + + if report['build_log'] and report['build_log']['errors']: + summary_parts.append(f"\nTop Errors ({len(report['build_log']['errors'])}):") + for err in report['build_log']['errors'][:3]: + summary_parts.append(f" - {err[:150]}") + + report['summary'] = '\n'.join(summary_parts) + + return report + + +def format_markdown_report(report): + """Format analysis as Markdown.""" + lines = ["# Prow CI Failure Analysis\n"] + + if report['prowjob']: + pj = report['prowjob'] + lines.append("## Job Information") + lines.append(f"- **Job**: {pj['job_name']}") + lines.append(f"- **State**: {pj['state']}") + lines.append(f"- **Type**: {pj['type']}") + if pj.get('url'): + lines.append(f"- **URL**: {pj['url']}") + lines.append("") + + if report['junit_failures']: + lines.append("## Test Failures") + lines.append(f"\nTotal failures: {len(report['junit_failures'])}\n") + for f in report['junit_failures']: + lines.append(f"### {f['test']}") + lines.append(f"**Suite**: {f['suite']}") + lines.append(f"**Type**: {f['type']}") + if f['message']: + lines.append(f"**Message**: {f['message']}") + if f['details']: + lines.append("```") + lines.append(f['details'][:500]) + if len(f['details']) > 500: + lines.append("... (truncated)") + lines.append("```") + lines.append("") + + if report['build_log']: + bl = report['build_log'] + + if bl['patterns']: + lines.append("## Detected Patterns") + for pattern, count in sorted(bl['patterns'].items(), key=lambda x: x[1], reverse=True): + lines.append(f"- **{pattern}**: {count} occurrences") + lines.append("") + + if bl['errors']: + lines.append("## Errors") + for err in bl['errors']: + lines.append(f"- {err}") + lines.append("") + + if bl['failures']: + lines.append("## Failures") + for fail in bl['failures'][:5]: + lines.append(f"- {fail}") + lines.append("") + + return '\n'.join(lines) + + +def main(): + parser = argparse.ArgumentParser(description='Analyze Prow CI job failures') + parser.add_argument('artifacts_dir', help='Directory containing downloaded artifacts') + parser.add_argument('-f', '--format', choices=['text', 'json', 'markdown'], + default='markdown', help='Output format') + parser.add_argument('-o', '--output', help='Output file (default: stdout)') + + args = parser.parse_args() + + if not os.path.exists(args.artifacts_dir): + print(f"Error: Artifacts directory not found: {args.artifacts_dir}", file=sys.stderr) + return 1 + + # Generate analysis + report = generate_analysis_report(args.artifacts_dir) + + # Format output + if args.format == 'json': + output = json.dumps(report, indent=2) + elif args.format == 'markdown': + output = format_markdown_report(report) + else: # text + output = report['summary'] + + # Write output + if args.output: + with open(args.output, 'w') as f: + f.write(output) + print(f"Analysis saved to: {args.output}") + else: + print(output) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/.claude/skills/prow-ci/fetch_prow_artifacts.py b/.claude/skills/prow-ci/fetch_prow_artifacts.py new file mode 100755 index 000000000..53d2272ce --- /dev/null +++ b/.claude/skills/prow-ci/fetch_prow_artifacts.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +""" +Fetch Prow CI job artifacts from Google Cloud Storage. +""" + +import argparse +import json +import os +import re +import subprocess +import sys +from pathlib import Path +from urllib.parse import urlparse + + +def parse_prow_url(url): + """ + Parse Prow job URL to extract GCS path components. + + Returns dict with: + - gcs_base_path: Full GCS path (gs://...) + - bucket_path: Path within bucket + - build_id: Numeric build ID + - job_name: Name of the Prow job + """ + # Handle both gcsweb URLs and direct GCS URLs + if 'test-platform-results' not in url: + raise ValueError("URL must contain 'test-platform-results'") + + # Extract path after test-platform-results + match = re.search(r'test-platform-results/(.+?)(?:\?|$)', url) + if not match: + raise ValueError("Could not parse test-platform-results path") + + bucket_path = match.group(1).rstrip('/') + + # Extract build ID (10+ digits) + build_match = re.search(r'/(\d{10,})/?', bucket_path) + if not build_match: + raise ValueError("Could not find build ID (10+ digits) in URL") + + build_id = build_match.group(1) + + # Extract job name (segment before build_id) + job_match = re.search(r'/([^/]+)/\d{10,}/?', bucket_path) + if not job_match: + raise ValueError("Could not extract job name from URL") + + job_name = job_match.group(1) + + gcs_base_path = f"gs://test-platform-results/{bucket_path}" + + return { + 'gcs_base_path': gcs_base_path, + 'bucket_path': bucket_path, + 'build_id': build_id, + 'job_name': job_name + } + + +def download_from_gcs(gcs_path, local_path): + """Download a file from GCS using gcloud storage cp.""" + try: + os.makedirs(os.path.dirname(local_path), exist_ok=True) + cmd = [ + 'gcloud', 'storage', 'cp', + gcs_path, + local_path, + '--no-user-output-enabled' + ] + subprocess.run(cmd, check=True, capture_output=True) + return True + except subprocess.CalledProcessError as e: + print(f"Warning: Could not download {gcs_path}: {e.stderr.decode()}", file=sys.stderr) + return False + + +def fetch_prowjob_json(gcs_base_path, output_dir): + """Fetch prowjob.json and return parsed JSON.""" + gcs_path = f"{gcs_base_path}/prowjob.json" + local_path = os.path.join(output_dir, 'prowjob.json') + + if download_from_gcs(gcs_path, local_path): + with open(local_path, 'r') as f: + return json.load(f) + return None + + +def fetch_build_log(gcs_base_path, output_dir): + """Fetch build-log.txt.""" + gcs_path = f"{gcs_base_path}/build-log.txt" + local_path = os.path.join(output_dir, 'build-log.txt') + return download_from_gcs(gcs_path, local_path) + + + + +def main(): + parser = argparse.ArgumentParser(description='Fetch Prow CI job artifacts from GCS') + parser.add_argument('url', help='Prow job URL (gcsweb or direct GCS)') + parser.add_argument('-o', '--output', default='.work/prow-artifacts', + help='Output directory (default: .work/prow-artifacts)') + + args = parser.parse_args() + + # Parse URL + try: + parsed = parse_prow_url(args.url) + except ValueError as e: + print(f"Error: {e}", file=sys.stderr) + return 1 + + print(f"Prow Job: {parsed['job_name']}") + print(f"Build ID: {parsed['build_id']}") + print(f"GCS Path: {parsed['gcs_base_path']}") + print() + + # Create output directory + output_dir = os.path.join(args.output, parsed['build_id']) + os.makedirs(output_dir, exist_ok=True) + + # Fetch prowjob.json + print("Fetching prowjob.json...") + prowjob = fetch_prowjob_json(parsed['gcs_base_path'], output_dir) + if prowjob: + print("✓ prowjob.json downloaded") + else: + print("✗ Could not fetch prowjob.json") + + # Fetch build-log.txt + print("Fetching build-log.txt...") + if fetch_build_log(parsed['gcs_base_path'], output_dir): + print("✓ build-log.txt downloaded") + else: + print("✗ Could not fetch build-log.txt") + + print(f"\nArtifacts saved to: {output_dir}") + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/.gitignore b/.gitignore index 96f122fee..cf0dfe409 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,4 @@ saasfile.yaml *.swo *~ .vscode +.work/ diff --git a/.gitleaks.toml b/.gitleaks.toml new file mode 100644 index 000000000..97af5fff9 --- /dev/null +++ b/.gitleaks.toml @@ -0,0 +1,155 @@ +# Gitleaks Configuration for this operator +# https://github.com/gitleaks/gitleaks +# +# Purpose: Detect hardcoded secrets, credentials, and sensitive data +# Integration: Runs in pre-commit hook and Tekton CI +# +# Usage: +# gitleaks detect --source . --verbose +# pre-commit run gitleaks +# + +title = "gitleaks config for this operator" + +# ============================================================================= +# GLOBAL ALLOWLIST +# ============================================================================= + +[allowlist] +description = "Global allowlist for this operator" + +# Test fixtures with fake credentials (not real secrets) +# Boilerplate files (managed upstream, reviewed separately) +# Vendor directory (third-party code) +# Generated code +paths = [ + '''test/fixtures/.*''', + '''test/deploy/.*''', + '''.*_test\.go''', + '''boilerplate/.*''', + '''vendor/.*''', + '''zz_generated\..*\.go''', +] + +# Allow specific test values that look like secrets but aren't +regexes = [ + '''(?i)fake[_-]?token''', + '''(?i)test[_-]?secret''', + '''(?i)example[_-]?key''', + '''(?i)dummy[_-]?password''', + '''(?i)placeholder''', + '''AKIAIOSFODNN7EXAMPLE''', # AWS example from docs +] + +# Specific commit hashes that were already reviewed and accepted +# Format: "commit:" +# Example: "commit:abc123def456..." +commits = [] + +# Stopwords that appear in code but aren't secrets +stopwords = [ + "example", + "test", + "fake", + "dummy", + "placeholder", + "sample", + "mock", +] + +# ============================================================================= +# CUSTOM RULES (this operator specific) +# ============================================================================= + +[[rules]] +id = "operator-service-token" +description = "Operator service token" +regex = '''(?i)ocm[_-]?agent[_-]?token\s*[:=]\s*['"]?[a-zA-Z0-9]{32,}''' +tags = ["token", "ocm", "critical"] + +[[rules]] +id = "openshift-pull-secret" +description = "OpenShift pull secret" +regex = '''(?i)pull[_-]?secret.*auth.*[a-zA-Z0-9+/]{30,}={0,2}''' +tags = ["secret", "openshift", "high"] + +[[rules]] +id = "kubeconfig-embedded" +description = "Embedded kubeconfig with credentials" +regex = '''client-certificate-data:\s*[a-zA-Z0-9+/]{30,}={0,2}''' +tags = ["kubeconfig", "certificate", "critical"] + +[[rules]] +id = "private-key-pem" +description = "PEM-encoded private key" +regex = '''-----BEGIN\s+(RSA\s+)?PRIVATE KEY-----''' +tags = ["private-key", "pem", "critical"] + +# ============================================================================= +# ENTROPY DETECTION +# ============================================================================= +# Gitleaks has built-in entropy detection for high-entropy strings +# Variable names with high entropy are filtered via stopwords above + +# ============================================================================= +# PER-RULE ALLOWLISTS +# ============================================================================= + +# AWS Keys - Use default gitleaks rules +# GitHub Tokens - Use default gitleaks rules +# Generic API Keys - Use default gitleaks rules + +# ============================================================================= +# FILE-SPECIFIC EXCEPTIONS +# ============================================================================= +# (Consolidated into main allowlist.paths array above) + +# ============================================================================= +# RULE CUSTOMIZATION +# ============================================================================= + +# Extend default gitleaks rules with custom allowlists + +# Example: Allow specific AWS key format used in test fixtures +# [[rules]] +# id = "aws-access-key-id" +# description = "AWS Access Key ID" +# regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}''' +# tags = ["aws", "access-key", "critical"] +# [rules.allowlist] +# regexes = ['''AKIAIOSFODNN7EXAMPLE'''] # AWS documentation example + +# ============================================================================= +# SEVERITY THRESHOLDS +# ============================================================================= + +# Gitleaks doesn't have severity levels built-in, but we tag rules +# Tags help categorize findings for triage + +# Tag meanings: +# - critical: Must fix immediately (committed secrets, private keys) +# - high: Should fix before merge (API keys, tokens) +# - medium: Review and assess (potential secrets) +# - low: Informational (weak patterns) + +# ============================================================================= +# NOTES +# ============================================================================= + +# 1. This config extends gitleaks default rules +# 2. False positives should be added to allowlist with justification +# 3. Never disable gitleaks entirely (security critical) +# 4. Review allowlist periodically for stale entries +# 5. All allowlist additions should be documented in PR + +# To test this config: +# gitleaks detect --source . --config .gitleaks.toml --verbose +# +# To scan specific commit: +# gitleaks detect --source . --log-opts +# +# To update allowlist: +# 1. Identify false positive +# 2. Add to appropriate allowlist section +# 3. Document reason in comments +# 4. Test with: gitleaks detect --source . --config .gitleaks.toml diff --git a/.prek-version b/.prek-version new file mode 100644 index 000000000..5aff472dd --- /dev/null +++ b/.prek-version @@ -0,0 +1 @@ +v0.4.1 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..73a0d8020 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,154 @@ +# Contributing to Managed Node Metadata + +Thank you for your interest in contributing to the Managed Node Metadata project. + +## Quick Start + +1. **Setup**: Install Go 1.22.7+, operator-sdk v1.21.0 +2. **Install tools**: `make tools` +3. **Run pre-commit**: `pip install pre-commit && pre-commit install` +4. **Build**: `make go-build` +5. **Test**: `make go-test` +6. **Lint**: `make go-check` + +See [DEVELOPMENT.md](./DEVELOPMENT.md) for detailed setup instructions. + +## Before Submitting a PR + +All contributions must pass: + +1. **Formatting & linting**: `pre-commit run --all-files` +2. **Unit tests**: `make go-test` +3. **Build verification**: `make go-build` +4. **Security scan**: Automatic via pre-commit (gitleaks) + +## Development Workflow + +### Human Contributors + +```bash +# Create a feature branch +git checkout -b feature/my-change + +# Make changes, following existing code patterns +# Add/update tests for your changes + +# Run validation locally +pre-commit run --all-files +make go-test + +# Commit with descriptive message +git commit -m "feat: add support for X" + +# Push and create PR +git push origin feature/my-change +``` + +### AI-Assisted Development + +When using AI coding agents (Claude Code, GitHub Copilot, Cursor, etc.): + +**Agents MUST:** +- Run `pre-commit run` on changed files before committing +- Execute relevant tests after code changes: `make go-test` +- Preserve existing code style and patterns +- Avoid editing generated files (`**/zz_generated.*.go`, `go.sum` without `go.mod`) +- Never bypass hooks with `--no-verify` +- Never commit secrets, tokens, or credentials +- Reuse existing utilities and abstractions +- Make incremental, focused changes + +**Validation expectations:** +1. Format check: `go fmt ./...` +2. Lint: `make go-check` (or `pre-commit run golangci-lint`) +3. Type safety: Verified by `go build ./...` in pre-commit +4. Tests: `make go-test` for affected packages +5. Secret scan: Automatic via pre-commit gitleaks hook + +**Required checks before PR:** +- [ ] All pre-commit hooks pass +- [ ] Unit tests pass for modified packages +- [ ] No new linter warnings introduced +- [ ] No secrets or credentials in diff +- [ ] Mocks regenerated if interfaces changed: `boilerplate/_lib/container-make generate` + +## Code Style + +Follow existing patterns: +- Standard Go formatting (`gofmt`) +- golangci-lint rules in `boilerplate/openshift/golang-osd-operator/golangci.yml` +- Ginkgo/Gomega for tests +- GoMock for interface mocking + +## Testing Requirements + +- **Unit tests required** for all new functionality +- Use Ginkgo BDD style: `Describe`, `Context`, `It` +- Mock external dependencies with GoMock +- Aim for meaningful test coverage, not just metrics + +See [TESTING.md](./TESTING.md) for testing guidelines. + +## Regenerating Code + +After modifying API types or interfaces: + +```bash +# Regenerate deepcopy, OpenAPI, mocks (in container for consistency) +boilerplate/_lib/container-make generate +``` + +## Security + +**Never commit:** +- API keys, tokens, passwords +- AWS credentials, kubeconfig files +- Private keys, certificates +- `.env` files with secrets +- Debug statements printing sensitive data + +The pre-commit gitleaks hook will block commits containing secrets. + +**High-risk changes** (requiring extra review): +- Authentication/authorization logic +- RBAC manifests with wildcard permissions +- Network policies +- CI/CD pipeline modifications +- Dockerfile changes + +## Commit Message Format + +Use conventional commits style: + +```text +: + + + + +``` + +Types: `feat`, `fix`, `docs`, `test`, `refactor`, `chore`, `ci` + +Examples: +- `feat: add support for fleet notification filtering` +- `fix: correct RBAC permissions for service monitor` +- `test: add unit tests for network policy handler` + +## Pull Request Process + +1. **Title**: Clear, descriptive summary +2. **Description**: Explain what changed and why +3. **Testing**: Describe how you tested the changes +4. **CI**: All Tekton pipeline checks must pass +5. **Review**: Address review feedback promptly + +## Questions? + +- Check existing documentation in [docs/](./docs/) +- Review similar PRs for patterns +- Ask in PR comments for clarification + +## License + +All contributions are licensed under Apache 2.0. See [LICENSE](./LICENSE). diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md new file mode 100644 index 000000000..0a997dd2f --- /dev/null +++ b/DEVELOPMENT.md @@ -0,0 +1,198 @@ +# Development Guide + +Quick reference for developing the Managed Node Metadata. + +## Prerequisites + +- **Go**: 1.22.7 or later +- **operator-sdk**: v1.21.0 +- **kubectl**: For cluster interaction +- **pre-commit**: `pip install pre-commit` + +## Initial Setup + +```bash +# Clone repository +git clone https://github.com/openshift/managed-node-metadata-operator.git +cd managed-node-metadata-operator + +# Install development tools +make tools + +# Install pre-commit hooks +pre-commit install +``` + +## Common Commands + +### Build +```bash +make go-build # Build operator binary +make docker-build # Build container image +``` + +### Test +```bash +make go-test # Run all unit tests +go test ./controllers/... # Test specific package +ginkgo -r ./controllers/ # Run controller tests with Ginkgo +``` + +### Lint +```bash +make go-check # Full linting (golangci-lint) +pre-commit run --all-files # Run all pre-commit hooks +pre-commit run golangci-lint # Lint only +``` + +### Code Generation +```bash +# After modifying API types (api/v1alpha1/*.go) +# or interfaces requiring mocks +boilerplate/_lib/container-make generate + +# What this generates: +# - Deepcopy methods (zz_generated.deepcopy.go) +# - OpenAPI schemas +# - Mock interfaces for testing +``` + +### Run Locally +```bash +# Run against cluster in ~/.kube/config +make run + +# Run with verbose logging +make run-verbose +``` + +### Container-based Build +```bash +# Run make targets inside boilerplate container +# (ensures consistent environment with CI) +boilerplate/_lib/container-make +boilerplate/_lib/container-make go-test +boilerplate/_lib/container-make generate +``` + +## Fast Local Iteration + +**Minimal validation loop:** +```bash +# After code changes +go build ./... # Fast compile check (~5s) +go test ./pkg/mypackage # Run affected tests +pre-commit run # Lint staged files +``` + +**Full validation (pre-PR):** +```bash +pre-commit run --all-files # All hooks (~15-30s) +make go-test # Full test suite +``` + +## Targeted Testing + +```bash +# Run specific test +ginkgo -focus="NetworkPolicy" ./controllers/ + +# Run tests for one package +go test -v ./controllers/ + +# Skip slow tests during development +ginkgo -skip="E2E" -r ./... +``` + +## Debugging + +```bash +# Verbose operator logs +make run-verbose + +# Print specific package logs +go test -v ./pkg/... 2>&1 | grep "MyFunction" + +# Ginkgo verbose output +ginkgo -v ./... +``` + +## Dependency Management + +```bash +# Add new dependency +go get github.com/some/package@v1.2.3 + +# Update dependency +go get -u github.com/some/package + +# Tidy (removes unused, adds missing) +go mod tidy + +# Verify checksums +go mod verify +``` + +**Note**: `go.sum` changes automatically trigger validation in pre-commit. + +## Architecture Pointers + +- **API Types**: `api/v1alpha1/` - CRD definitions +- **Controllers**: `controllers/` - Reconciliation logic +- **Business Logic**: `controllers/` - Resource management +- **Tests**: `*_test.go` alongside source, `*_suite_test.go` for Ginkgo +- **Mocks**: `pkg/util/test/generated/` - Generated mocks +- **E2E**: `test/e2e/` - End-to-end tests + +## CI Parity + +Local pre-commit hooks mirror Tekton CI checks: +- **go-check** ↔ Tekton lint job +- **go-build** ↔ Compilation in CI +- **go-test** ↔ Unit test job +- **gitleaks** ↔ Security scanning + +Run `pre-commit run --all-files` before pushing to catch CI failures early. + +## Boilerplate Integration + +This repo uses Red Hat's standardized boilerplate: +- Centralized Makefiles: `boilerplate/openshift/golang-osd-operator/` +- Standard targets: `go-build`, `go-check`, `go-test` +- Container builds: `boilerplate/_lib/container-make` +- Update boilerplate: `make boilerplate-update` + +## Troubleshooting + +**Mock generation fails:** +```bash +# Use container-make for consistency with CI +boilerplate/_lib/container-make generate +``` + +**Pre-commit hook timeout:** +```bash +# macOS: Install GNU timeout +brew install coreutils + +# Linux: timeout is built-in +``` + +**go.sum checksum mismatch:** +```bash +export GOPROXY="https://proxy.golang.org" +go mod tidy +``` + +**Tests fail locally but pass in CI:** +```bash +# Use container environment +boilerplate/_lib/container-make go-test +``` + +## Further Reading + +- [Testing Guide](./TESTING.md) +- [Design Documentation](./docs/design.md) +- [How to Test](./docs/how-to-test.md) +- [Operator SDK Docs](https://sdk.operatorframework.io/) diff --git a/TESTING.md b/TESTING.md new file mode 100644 index 000000000..22f18a59c --- /dev/null +++ b/TESTING.md @@ -0,0 +1,282 @@ +# Testing Guide + +Testing guidelines for the Managed Node Metadata. + +## Framework + +- **Ginkgo v2**: BDD testing framework +- **Gomega**: Matchers and assertions +- **GoMock**: Interface mocking +- **envtest**: Kubernetes API server for controller testing + +## Quick Commands + +```bash +# Run all tests +make go-test + +# Run tests with Ginkgo runner +ginkgo -r ./... + +# Run specific package +go test ./controllers/ + +# Verbose output +ginkgo -v ./... + +# Run focused test +ginkgo -focus="NetworkPolicy" ./controllers/ + +# Container-based (CI parity) +boilerplate/_lib/container-make go-test +``` + +## Writing Tests + +### Test Structure + +Each package with tests includes: +- `*_suite_test.go`: Ginkgo test suite setup +- `*_test.go`: Actual test cases + +**Example:** +```go +package mypackage_test + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("MyFeature", func() { + Context("when condition X", func() { + It("should do Y", func() { + result := MyFunction() + Expect(result).To(Equal(expected)) + }) + }) +}) +``` + +### Bootstrapping Tests + +```bash +cd pkg/newpackage +ginkgo bootstrap # Creates suite +ginkgo generate myfile.go # Creates test file +``` + +### Mocking Interfaces + +Use GoMock for external dependencies: + +```go +//go:generate mockgen -destination=mocks/mock_client.go -package=mocks sigs.k8s.io/controller-runtime/pkg/client Client +``` + +**Regenerate all mocks:** +```bash +boilerplate/_lib/container-make generate +``` + +**Why container-make?** +- Ensures same mockgen version as CI +- Prevents version drift in generated code + +## Test Organization + +### Unit Tests +- Test individual functions and methods +- Mock external dependencies (Kubernetes client, HTTP calls) +- Fast execution (<1s per package) +- Located alongside source code + +### Controller Tests +- Test reconciliation logic +- Use envtest for simulated Kubernetes API +- Test custom resource lifecycle +- Located in `controllers/*/` + +### E2E Tests +- Full operator deployment +- Real cluster interaction +- Located in `test/e2e/` +- Run in CI via Tekton + +## Agent-Driven Validation + +When AI agents modify code: + +**Minimal validation:** +```bash +# After changing controllers/ +go test ./controllers/ +``` + +**Full validation before commit:** +```bash +make go-test +``` + +**If tests fail:** +1. Read test output carefully +2. Fix the underlying issue (don't skip tests) +3. Rerun to confirm fix +4. Regenerate mocks if interface changed: `boilerplate/_lib/container-make generate` + +## Common Patterns + +### Testing Controllers + +```go +```go +It("should reconcile resource", func() { + // Create custom resource + resource := &v1alpha1.CustomResource{...} + Expect(k8sClient.Create(ctx, resource)).To(Succeed()) + + // Trigger reconciliation + _, err := reconciler.Reconcile(ctx, req) + Expect(err).NotTo(HaveOccurred()) + + // Verify reconciliation result + Expect(k8sClient.Get(ctx, resourceKey, resource)).To(Succeed()) + Expect(resource.Status.Conditions).ToNot(BeEmpty()) +}) +``` +``` + +### Testing Error Conditions + +```go +It("should return error when resource not found", func() { + _, err := reconciler.Reconcile(ctx, reqForNonExistent) + Expect(err).To(HaveOccurred()) +}) +``` + +### Using Matchers + +```go +// Equality +Expect(result).To(Equal(expected)) + +// Nil checks +Expect(err).NotTo(HaveOccurred()) +Expect(obj).To(BeNil()) + +// Collections +Expect(slice).To(ContainElement("item")) +Expect(slice).To(HaveLen(3)) +Expect(slice).To(BeEmpty()) + +// Booleans +Expect(condition).To(BeTrue()) +Expect(condition).To(BeFalse()) + +// Eventually (async) +Eventually(func() bool { + return checkCondition() +}).Should(BeTrue()) +``` + +## Coverage + +Generate coverage report: +```bash +go test -coverprofile=coverage.out ./... +go tool cover -html=coverage.out -o coverage.html +``` + +**Note**: Aim for meaningful coverage, not arbitrary percentages. +- Test critical paths and error handling +- Don't test generated code or trivial getters/setters + +## Debugging Tests + +```bash +# Verbose Ginkgo output +ginkgo -v ./... + +# Print statements in tests +fmt.Fprintf(GinkgoWriter, "Debug: %v\n", value) + +# Skip flaky tests temporarily +ginkgo -skip="FlakyTest" ./... + +# Run single test +ginkgo -focus="exact test name" ./... +``` + +## CI Expectations + +Tests run in Tekton pipeline with: +- Fresh environment +- No cached dependencies +- Strict timeout limits + +**Local CI parity:** +```bash +boilerplate/_lib/container-make go-test +``` + +## Test Performance + +**Target timings:** +- Unit tests: <5s per package +- Controller tests: <15s per controller +- Full suite: <2min + +**If tests are slow:** +- Check for unnecessary sleeps +- Use `Eventually` with shorter intervals +- Mock external calls +- Avoid creating unnecessary Kubernetes resources + +## Common Issues + +**Mock not found:** +```bash +# Regenerate mocks +boilerplate/_lib/container-make generate +``` + +**envtest not installed:** +```bash +make setup-envtest +``` + +**Test passes locally, fails in CI:** +```bash +# Run in container environment +boilerplate/_lib/container-make go-test + +# Check for: +# - Time-dependent tests +# - Environment-specific assumptions +# - File path dependencies +``` + +**Flaky tests:** +- Use `Eventually` instead of `Expect` for async operations +- Avoid hardcoded delays +- Ensure test isolation (clean up resources) + +## Pre-commit Integration + +Tests run automatically in pre-commit when Go files change: +```yaml +- id: go-test + entry: make go-test + files: '\.go$' +``` + +This is NOT in current pre-commit config (too slow for pre-commit). +Run manually before pushing: `make go-test` + +## Further Reading + +- [Ginkgo Documentation](https://onsi.github.io/ginkgo/) +- [Gomega Matchers](https://onsi.github.io/gomega/) +- [GoMock Guide](https://github.com/golang/mock) +- [controller-runtime Testing](https://book.kubebuilder.io/reference/testing.html) diff --git a/hack/ci.sh b/hack/ci.sh new file mode 100755 index 000000000..9b556ce99 --- /dev/null +++ b/hack/ci.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail + +if ! command -v prek &>/dev/null; then + echo "Error: prek is not installed. Install with: uv tool install prek" >&2 + exit 1 +fi + +prek run --config hack/prek.ci.toml --all-files diff --git a/hack/prek.ci.toml b/hack/prek.ci.toml new file mode 100644 index 000000000..4c9be8dfb --- /dev/null +++ b/hack/prek.ci.toml @@ -0,0 +1,63 @@ +# Prek Configuration for CI +# Excludes hooks requiring internal network access or that may not be available in CI +# https://prek.j178.dev/ + +# File hygiene and syntax validation +[[repos]] +repo = "builtin" +hooks = [ + { id = "trailing-whitespace", args = ["--markdown-linebreak-ext=md"], exclude = "^(boilerplate/|test/e2e/|\\.pre-commit-config\\.yaml)" }, + { id = "end-of-file-fixer", exclude = "^(boilerplate/|test/e2e/|\\.pre-commit-config\\.yaml)" }, + { id = "check-added-large-files", args = ["--maxkb=1024"] }, + { id = "check-case-conflict" }, + { id = "check-merge-conflict" }, + { id = "check-json" }, + { id = "check-yaml", args = ["--allow-multiple-documents"] }, + { id = "check-toml" }, +] + +# golangci-lint static analysis +[[repos]] +repo = "https://github.com/golangci/golangci-lint" +rev = "v2.0.2" +hooks = [ + { id = "golangci-lint", args = [ + "--config=boilerplate/openshift/golang-osd-operator/golangci.yml", + "--timeout=120s" + ] }, +] + +# Local custom hooks +[[repos]] +repo = "local" +hooks = [ + # Go build check + { + id = "go-build", + name = "go build", + language = "system", + entry = "bash -c 'T=$(command -v timeout || command -v gtimeout || echo); ${T:+$T 30s} go build ./...'", + types = ["go"], + pass_filenames = false + }, + + # Go mod tidy check + { + id = "go-mod-tidy", + name = "go mod tidy", + language = "system", + entry = "bash -c 'T=$(command -v timeout || command -v gtimeout || echo); ${T:+$T 60s} go mod tidy && git diff --exit-code go.mod go.sum'", + files = '(\.go$|go\.(mod|sum)$)', + pass_filenames = false + }, + + # RBAC wildcard check + { + id = "rbac-wildcard-check", + name = "RBAC wildcard permissions", + language = "system", + entry = "bash -c 'make rbac-wildcard-check'", + files = '^deploy/.*\.ya?ml$', + pass_filenames = false + }, +] diff --git a/prek.toml b/prek.toml new file mode 100644 index 000000000..3f49270df --- /dev/null +++ b/prek.toml @@ -0,0 +1,78 @@ +# Prek Configuration for this operator +# https://prek.j178.dev/ + +# File hygiene and syntax validation +[[repos]] +repo = "builtin" +hooks = [ + { id = "trailing-whitespace", args = ["--markdown-linebreak-ext=md"], exclude = "^(boilerplate/|test/e2e/|\\.pre-commit-config\\.yaml)" }, + { id = "end-of-file-fixer", exclude = "^(boilerplate/|test/e2e/|\\.pre-commit-config\\.yaml)" }, + { id = "check-added-large-files", args = ["--maxkb=1024"] }, + { id = "check-case-conflict" }, + { id = "check-merge-conflict" }, + { id = "check-json" }, + { id = "check-yaml", args = ["--allow-multiple-documents"] }, + { id = "check-toml" }, +] + +# Red Hat InfoSec security scanning +[[repos]] +repo = "https://gitlab.cee.redhat.com/infosec-public/developer-workbench/tools.git" +rev = "rh-pre-commit-2.3.0" +hooks = [ + { id = "rh-pre-commit", stages = ["pre-commit"] }, +] + +# Gitleaks secret scanning +[[repos]] +repo = "https://github.com/gitleaks/gitleaks" +rev = "v8.18.0" +hooks = [ + { id = "gitleaks", args = ["--config=.gitleaks.toml"] }, +] + +# golangci-lint static analysis +[[repos]] +repo = "https://github.com/golangci/golangci-lint" +rev = "v2.0.2" +hooks = [ + { id = "golangci-lint", args = [ + "--config=boilerplate/openshift/golang-osd-operator/golangci.yml", + "--timeout=120s" + ] }, +] + +# Local custom hooks +[[repos]] +repo = "local" +hooks = [ + # Go build check + { + id = "go-build", + name = "go build", + language = "system", + entry = "bash -c 'T=$(command -v timeout || command -v gtimeout || echo); ${T:+$T 30s} go build ./...'", + types = ["go"], + pass_filenames = false + }, + + # Go mod tidy check + { + id = "go-mod-tidy", + name = "go mod tidy", + language = "system", + entry = "bash -c 'T=$(command -v timeout || command -v gtimeout || echo); ${T:+$T 60s} go mod tidy && git diff --exit-code go.mod go.sum'", + files = '(\.go$|go\.(mod|sum)$)', + pass_filenames = false + }, + + # RBAC wildcard check + { + id = "rbac-wildcard-check", + name = "RBAC wildcard permissions", + language = "system", + entry = "bash -c 'make rbac-wildcard-check'", + files = '^deploy/.*\.ya?ml$', + pass_filenames = false + }, +] From 050fbe063d7ca06069833d54e8f88e3c2e826f2c Mon Sep 17 00:00:00 2001 From: devppratik Date: Tue, 2 Jun 2026 21:40:21 +0530 Subject: [PATCH 2/2] feat: add Claude Code infrastructure and documentation --- .claude/hooks/README.md | 8 +- .claude/hooks/pre-edit.sh | 17 ++++ .claude/hooks/stop-prek-validation.sh | 2 +- .claude/skills/README.md | 6 +- .claude/skills/prow-ci/SKILL.md | 82 +++++++++---------- .../skills/prow-ci/fetch_prow_artifacts.py | 8 +- 6 files changed, 72 insertions(+), 51 deletions(-) diff --git a/.claude/hooks/README.md b/.claude/hooks/README.md index a798afc17..8db53834e 100644 --- a/.claude/hooks/README.md +++ b/.claude/hooks/README.md @@ -138,7 +138,7 @@ Used for local development with internal network access. **Usage**: ```bash -prek run --all-files +prek run --config hack/prek.ci.toml ``` ### 2. **hack/prek.ci.toml** (CI-compatible) @@ -179,13 +179,13 @@ This sets up pre-commit hooks that run validation automatically. ### Automatic Validation Prek runs automatically: -- **On every turn**: Stop hook runs `prek run --all-files` +- **On every turn**: Stop hook runs `prek run --config hack/prek.ci.toml` - **On commit**: Pre-commit hook runs relevant checks ### Manual Validation ```bash # Run all checks -prek run --all-files +prek run --config hack/prek.ci.toml # Run specific check prek run gitleaks @@ -320,7 +320,7 @@ SKIP=hook-id git commit - Open an issue documenting the problem - Request reviewer approval before merge 3. **Re-run full validation:** - - `prek run --all-files` locally + - `prek run --config hack/prek.ci.toml` locally - Ensure all required CI checks pass - Get explicit code review approval diff --git a/.claude/hooks/pre-edit.sh b/.claude/hooks/pre-edit.sh index 45c96aa8f..192a9fcb5 100755 --- a/.claude/hooks/pre-edit.sh +++ b/.claude/hooks/pre-edit.sh @@ -18,10 +18,27 @@ fi # Normalize file path to be repo-relative for consistent pattern matching # This ensures patterns like vendor/* work regardless of whether the input is absolute or relative REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || echo ".") + +# Reject absolute paths outside the repo if [[ "$FILE" = /* ]]; then + if [[ ! "$FILE" == "$REPO_ROOT"/* ]]; then + echo "❌ ERROR: File path is outside repository: $FILE" + exit 1 + fi # Convert absolute path to repo-relative FILE="${FILE#"$REPO_ROOT"/}" fi + +# Canonicalize and reject traversal segments +if command -v realpath >/dev/null 2>&1; then + CANONICAL=$(realpath -s --relative-to="$REPO_ROOT" "$FILE" 2>/dev/null || echo "") + if [[ -z "$CANONICAL" ]] || [[ "$CANONICAL" == *".."* ]]; then + echo "❌ ERROR: Invalid file path (contains traversal): $FILE" + exit 1 + fi + FILE="$CANONICAL" +fi + # Strip leading ./ FILE="${FILE#./}" diff --git a/.claude/hooks/stop-prek-validation.sh b/.claude/hooks/stop-prek-validation.sh index 52c6cc142..de573e2c3 100755 --- a/.claude/hooks/stop-prek-validation.sh +++ b/.claude/hooks/stop-prek-validation.sh @@ -27,7 +27,7 @@ if [[ -z "$REPO_ROOT" ]]; then jq -n '{"decision": "block", "reason": "Not in a git repository. Cannot run prek validation."}' exit 0 fi -cd "$REPO_ROOT" +cd "$REPO_ROOT" || exit 1 # Check for jq dependency if ! command -v jq &> /dev/null; then diff --git a/.claude/skills/README.md b/.claude/skills/README.md index e7063f86f..ef7210877 100644 --- a/.claude/skills/README.md +++ b/.claude/skills/README.md @@ -33,7 +33,7 @@ Skills can be referenced in Claude conversations: ```text "Use the prow-ci skill to investigate the failed test in PR #123" "Check Prow CI results for the latest build" -``` +```text ### Skill Components @@ -63,13 +63,13 @@ To add a new skill: 6. Test the skill workflow **Directory structure**: -``` +```text .claude/skills/ ├── README.md └── skillname/ ├── SKILL.md # Required: skill definition └── reference/ # Optional: supporting docs -``` +```text ## Integration with Other Components diff --git a/.claude/skills/prow-ci/SKILL.md b/.claude/skills/prow-ci/SKILL.md index 843cdb14b..11cd25296 100644 --- a/.claude/skills/prow-ci/SKILL.md +++ b/.claude/skills/prow-ci/SKILL.md @@ -4,7 +4,7 @@ description: Fetch and analyze OpenShift Prow CI job failures with automated art trigger: prow, prow-ci, /prow-ci, ci results, check ci, analyze ci failure --- -# Prow CI Analysis for Rbac Permissions Operator +# Prow CI Analysis This skill fetches Prow CI job artifacts from Google Cloud Storage and provides automated failure analysis. @@ -29,7 +29,7 @@ gh pr checks /prow-ci # Or ask naturally: -"Analyze the lint failure in PR 328" +"Analyze the lint failure in PR " "Check why the validate job failed" "Show me what broke in the coverage job" ``` @@ -70,14 +70,14 @@ gh pr view --json statusCheckRollup --jq '.statusCheckRollup[] | sel Example Prow job URL: ``` -https://prow.ci.openshift.org/view/gs/test-platform-results/pr-logs/pull/openshift_rbac-permissions-operator/328/pull-ci-openshift-rbac-permissions-operator-master-lint/2059308810190721024 +https://prow.ci.openshift.org/view/gs/test-platform-results/pr-logs/pull/openshift_managed_node_metadata_operator//pull-ci-openshift-managed-node-metadata-operator-master-lint/ ``` ### Step 2: Fetch and Analyze Run the fetch script first: ```bash -cd /Users/ppanda/rh-projects/ROSA-730/rbac-permissions-operator/.claude/skills/prow-ci +cd $PWD/.claude/skills/prow-ci python3 fetch_prow_artifacts.py "" -o .work/prow-artifacts ``` @@ -109,18 +109,18 @@ Create a clear summary for the user with: ### Example Workflow ```bash -# User provides: "Analyze the lint failure in PR 328" +# User provides: "Analyze the lint failure in PR " # 1. Get Prow job URL -gh pr checks 328 | grep lint +gh pr checks | grep lint # 2. Fetch artifacts python3 .claude/skills/prow-ci/fetch_prow_artifacts.py \ - "https://prow.ci.openshift.org/view/gs/test-platform-results/pr-logs/pull/openshift_rbac-permissions-operator/328/pull-ci-openshift-rbac-permissions-operator-master-lint/2059308810190721024" + "https://prow.ci.openshift.org/view/gs/test-platform-results/pr-logs/pull/openshift_managed_node_metadata_operator//pull-ci-openshift-managed-node-metadata-operator-master-lint/" # 3. Analyze python3 .claude/skills/prow-ci/analyze_failure.py \ - .work/prow-artifacts/2059308810190721024 \ + .work/prow-artifacts/ \ -f markdown # 4. Review the output and provide actionable summary @@ -130,7 +130,7 @@ python3 .claude/skills/prow-ci/analyze_failure.py \ **Main Dashboard**: https://prow.ci.openshift.org/ **CI Search**: https://github.com/openshift/ci-search -**Job History**: https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator +**Job History**: https://prow.ci.openshift.org/?repo=openshift%2Fmanaged-node-metadata-operator ## Common Use Cases @@ -138,7 +138,7 @@ python3 .claude/skills/prow-ci/analyze_failure.py \ ```bash # View recent PR jobs -curl -s "https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator&type=presubmit" | grep -E "pull-ci-openshift-rbac-permissions-operator" +curl -s "https://prow.ci.openshift.org/?repo=openshift%2Fmanaged-node-metadata-operator&type=presubmit" | grep -E "pull-ci-openshift-managed-node-metadata-operator" # Check latest job status for specific PR # Replace PR_NUMBER with actual PR number @@ -148,12 +148,12 @@ gh pr view PR_NUMBER --json statusCheckRollup --jq '.statusCheckRollup[] | selec ### 2. Access Build Logs Prow logs are stored at: -- **Pull request jobs**: `gs://test-platform-results/pr-logs/pull/openshift_rbac-permissions-operator/[PR_NUMBER]/[JOB_NAME]/[JOB_ID]` +- **Pull request jobs**: `gs://test-platform-results/pr-logs/pull/openshift_managed_node_metadata_operator/[PR_NUMBER]/[JOB_NAME]/[JOB_ID]` - **Periodic jobs**: `gs://test-platform-results/logs/[JOB_NAME]/[JOB_ID]` **Viewing logs via web**: ```text -https://prow.ci.openshift.org/view/gs/test-platform-results/pr-logs/pull/openshift_rbac-permissions-operator/[PR_NUMBER]/[JOB_NAME]/[JOB_ID] +https://prow.ci.openshift.org/view/gs/test-platform-results/pr-logs/pull/openshift_managed_node_metadata_operator/[PR_NUMBER]/[JOB_NAME]/[JOB_ID] ``` ### 3. Analyze Test Failures @@ -175,16 +175,16 @@ gh pr checks PR_NUMBER | grep -i "fail" ### 4. Common Job Names **Prow CI Jobs** (configured in openshift/release): -- `pull-ci-openshift-rbac-permissions-operator-master-e2e-binary-build-success` - E2E binary build verification -- `pull-ci-openshift-rbac-permissions-operator-master-coverage` - Code coverage analysis (with Codecov) -- `pull-ci-openshift-rbac-permissions-operator-master-lint` - Linting checks -- `pull-ci-openshift-rbac-permissions-operator-master-test` - Unit tests -- `pull-ci-openshift-rbac-permissions-operator-master-validate` - Validation checks +- `pull-ci-openshift-managed-node-metadata-operator-master-e2e-binary-build-success` - E2E binary build verification +- `pull-ci-openshift-managed-node-metadata-operator-master-coverage` - Code coverage analysis (with Codecov) +- `pull-ci-openshift-managed-node-metadata-operator-master-lint` - Linting checks +- `pull-ci-openshift-managed-node-metadata-operator-master-test` - Unit tests +- `pull-ci-openshift-managed-node-metadata-operator-master-validate` - Validation checks **Tekton Pipelines** (configured in `.tekton/`): -- `rbac-permissions-operator-pull-request` - Main PR pipeline (docker build with OCI-TA) -- `rbac-permissions-operator-e2e-pull-request` - E2E testing pipeline -- `rbac-permissions-operator-pko-pull-request` - PKO (Package Operator) pipeline +- `managed-node-metadata-operator-pull-request` - Main PR pipeline (docker build with OCI-TA) +- `managed-node-metadata-operator-e2e-pull-request` - E2E testing pipeline +- `managed-node-metadata-operator-pko-pull-request` - PKO (Package Operator) pipeline - Corresponding `-push` pipelines for merged commits ## Debugging CI Failures @@ -197,7 +197,7 @@ gh pr checks PR_NUMBER ### Step 2: Access Prow UI Open the Prow link from PR checks or construct manually: ```text -https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator&type=presubmit +https://prow.ci.openshift.org/?repo=openshift%2Fmanaged-node-metadata-operator&type=presubmit ``` ### Step 3: Review Logs @@ -238,15 +238,15 @@ make docker-build This repo uses **both Prow and Tekton** for comprehensive CI: **Prow CI** (openshift/release): -- Configuration: `ci-operator/config/openshift/rbac-permissions-operator/openshift-rbac-permissions-operator-master.yaml` +- Configuration: `ci-operator/config/openshift/managed-node-metadata-operator/openshift-managed-node-metadata-operator-master.yaml` - Runs: lint, test, validate, coverage, e2e-binary-build -- Uses Codecov for coverage reporting (secret: `rbac-permissions-operator-codecov-token`) +- Uses Codecov for coverage reporting (secret: `managed-node-metadata-operator-codecov-token`) - Skip rules: Changes to `.tekton/`, `.github/`, `.md` files, `OWNERS`, `LICENSE` don't trigger most jobs **Tekton Pipelines** (`.tekton/`): - Primary build pipeline using Pipelines as Code - Three pipeline types: main, e2e, pko -- Builds container images to Quay (rbac-permissions-operator-tenant) +- Builds container images to Quay (managed-node-metadata-operator-tenant) - Pull request images expire after 5 days - Uses boilerplate framework from `openshift/boilerplate` (docker-build-oci-ta pipeline) @@ -260,31 +260,31 @@ gh pr checks gh pr view --json statusCheckRollup # Filter only Prow jobs -gh pr checks | grep "pull-ci-openshift-rbac-permissions-operator" +gh pr checks | grep "pull-ci-openshift-managed-node-metadata-operator" # Check Tekton pipeline status gh pr view --json statusCheckRollup --jq '.statusCheckRollup[] | select(.context | contains("Tekton"))' # Open Prow dashboard in browser (cross-platform) # Copy and paste this URL into your browser: -# https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator +# https://prow.ci.openshift.org/?repo=openshift%2Fmanaged-node-metadata-operator # Or use platform-specific command: -# macOS: open "https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator" -# Linux: xdg-open "https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator" -# Windows: start "https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator" +# macOS: open "https://prow.ci.openshift.org/?repo=openshift%2Fmanaged-node-metadata-operator" +# Linux: xdg-open "https://prow.ci.openshift.org/?repo=openshift%2Fmanaged-node-metadata-operator" +# Windows: start "https://prow.ci.openshift.org/?repo=openshift%2Fmanaged-node-metadata-operator" # View specific PR on Prow (replace ) -# https://prow.ci.openshift.org/?repo=openshift%2Frbac-permissions-operator&type=presubmit&pull= +# https://prow.ci.openshift.org/?repo=openshift%2Fmanaged-node-metadata-operator&type=presubmit&pull= ``` ## Troubleshooting ### Can't find job results? - Check both Prow AND Tekton - this repo uses both systems -- Prow jobs: `pull-ci-openshift-rbac-permissions-operator-master-*` +- Prow jobs: `pull-ci-openshift-managed-node-metadata-operator-master-*` - Tekton jobs: Usually show as "Tekton" or pipeline names in PR checks -- Verify repo name format in Prow: `openshift_rbac-permissions-operator` (underscore, not dash) +- Verify repo name format in Prow: `openshift_managed_node_metadata_operator` (underscore, not dash) - Ensure PR has been opened and CI has run ### Logs show permission denied? @@ -303,7 +303,7 @@ gh pr view --json statusCheckRollup --jq '.statusCheckRollup[] | sel - Common issues: - Image build failures → Check Dockerfile syntax and build context - Pipeline timeout → Check for slow steps or network issues - - Auth failures → Secret configuration in `rbac-permissions-operator-tenant` namespace + - Auth failures → Secret configuration in `managed-node-metadata-operator-tenant` namespace - Local validation: ```bash # Validate Tekton YAML syntax @@ -333,27 +333,27 @@ git clone https://github.com/openshift/ci-search.git ## CI Configuration Files **Prow Configuration** (in openshift/release repo): -- Location: `ci-operator/config/openshift/rbac-permissions-operator/openshift-rbac-permissions-operator-master.yaml` +- Location: `ci-operator/config/openshift/managed-node-metadata-operator/openshift-managed-node-metadata-operator-master.yaml` - Update process: Submit PR to openshift/release repository -- Auto-generated jobs in: `ci-operator/jobs/openshift/rbac-permissions-operator/` +- Auto-generated jobs in: `ci-operator/jobs/openshift/managed-node-metadata-operator/` **Tekton Pipelines** (in this repo): - Location: `.tekton/` directory - Files: - - `rbac-permissions-operator-pull-request.yaml` - Main PR pipeline - - `rbac-permissions-operator-push.yaml` - Post-merge pipeline - - `rbac-permissions-operator-e2e-pull-request.yaml` - E2E testing - - `rbac-permissions-operator-pko-pull-request.yaml` - PKO validation + - `managed-node-metadata-operator-pull-request.yaml` - Main PR pipeline + - `managed-node-metadata-operator-push.yaml` - Post-merge pipeline + - `managed-node-metadata-operator-e2e-pull-request.yaml` - E2E testing + - `managed-node-metadata-operator-pko-pull-request.yaml` - PKO validation - Triggered by: Pipelines as Code (via Tekton) - Uses: Boilerplate docker-build-oci-ta pipeline from openshift/boilerplate ## Coverage Reporting This repository uses Codecov for coverage tracking: -- Secret: `rbac-permissions-operator-codecov-token` (stored in Prow) +- Secret: `managed-node-metadata-operator-codecov-token` (stored in Prow) - Generate coverage locally: `make coverage` - Coverage runs on PRs and post-merge (`publish-coverage`) -- Dashboard: Check Codecov for rbac-permissions-operator +- Dashboard: Check Codecov for managed-node-metadata-operator ## Integration with Other Skills diff --git a/.claude/skills/prow-ci/fetch_prow_artifacts.py b/.claude/skills/prow-ci/fetch_prow_artifacts.py index 53d2272ce..246007a63 100755 --- a/.claude/skills/prow-ci/fetch_prow_artifacts.py +++ b/.claude/skills/prow-ci/fetch_prow_artifacts.py @@ -81,8 +81,12 @@ def fetch_prowjob_json(gcs_base_path, output_dir): local_path = os.path.join(output_dir, 'prowjob.json') if download_from_gcs(gcs_path, local_path): - with open(local_path, 'r') as f: - return json.load(f) + try: + with open(local_path, 'r') as f: + return json.load(f) + except json.JSONDecodeError as e: + print(f"Error: Could not parse JSON from {local_path}: {e}", file=sys.stderr) + return None return None