diff --git a/Makefile b/Makefile index 665e35a..07ff0a6 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,8 @@ GO ?= go +ifeq ($(strip $(GOROOT)),) +else ifeq ($(wildcard $(GOROOT)),) +GO := env -u GOROOT $(GO) +endif VERSION ?= dev REPOSITORY ?= devr-tools/cleanr SOURCE_SHA256 ?= diff --git a/cleanr/facade.go b/cleanr/facade.go index d9a1d2f..cfb1ff9 100644 --- a/cleanr/facade.go +++ b/cleanr/facade.go @@ -97,6 +97,9 @@ type ScenarioDataset = integrationspkg.ScenarioDataset type ScenarioDatasetEntry = integrationspkg.ScenarioDatasetEntry type DatasetScenarioOrigin = integrationspkg.DatasetScenarioOrigin type ScenarioDatasetGenerator = integrationspkg.ScenarioDatasetGenerator +type BraintrustInsightDataset = integrationspkg.BraintrustInsightDataset +type BraintrustConfigPatchSet = integrationspkg.BraintrustConfigPatchSet +type BraintrustConfigPatchOperation = integrationspkg.BraintrustConfigPatchOperation type ReleaseGateAttestation = core.ReleaseGateAttestation type AttestationSubject = core.AttestationSubject type AttestationPredicate = core.AttestationPredicate @@ -235,6 +238,26 @@ func MergeDatasetIntoConfig(base Config, dataset ScenarioDataset) Config { return integrationspkg.MergeDatasetIntoConfig(base, dataset) } +func LoadBraintrustInsightDatasetFile(path string) (BraintrustInsightDataset, error) { + return integrationspkg.LoadBraintrustInsightDatasetFile(path) +} + +func LoadBraintrustInsightDatasetData(data []byte, path string) (BraintrustInsightDataset, error) { + return integrationspkg.LoadBraintrustInsightDatasetData(data, path) +} + +func WriteBraintrustInsightDatasetFile(path string, dataset BraintrustInsightDataset) error { + return integrationspkg.WriteBraintrustInsightDatasetFile(path, dataset) +} + +func FetchBraintrustInsightDataset(ctx context.Context, source TrendSourceConfig, base Config) (BraintrustInsightDataset, error) { + return integrationspkg.FetchBraintrustInsightDataset(ctx, source, base) +} + +func ApplyBraintrustInsightDataset(base Config, dataset BraintrustInsightDataset, applyScenarios, applyPatches, approved bool) (Config, error) { + return integrationspkg.ApplyBraintrustInsightDataset(base, dataset, applyScenarios, applyPatches, approved) +} + func BuildReleaseGateAttestation(report Report, artifact ReplayArtifact, rawKey string, keyID string) (ReleaseGateAttestation, error) { return attestpkg.BuildReleaseGateAttestation(report, artifact, rawKey, keyID) } diff --git a/cleanr/integrations/braintrust_patch.go b/cleanr/integrations/braintrust_patch.go new file mode 100644 index 0000000..63d5c60 --- /dev/null +++ b/cleanr/integrations/braintrust_patch.go @@ -0,0 +1,145 @@ +package integrations + +import ( + "fmt" + "strings" +) + +func applyConfigPatchOperation(root map[string]any, op BraintrustConfigPatchOperation) error { + switch strings.TrimSpace(op.Op) { + case "set": + return applySetPatch(root, strings.TrimSpace(op.Path), op.Value) + case "append_unique": + return applyAppendUniquePatch(root, strings.TrimSpace(op.Path), op.Value) + default: + return fmt.Errorf("apply config patch: unsupported op %q for %s", op.Op, op.Path) + } +} + +func applySetPatch(root map[string]any, path string, value any) error { + parent, leaf, err := resolvePatchParent(root, path) + if err != nil { + return err + } + parent[leaf] = value + return nil +} + +func applyAppendUniquePatch(root map[string]any, path string, value any) error { + parent, leaf, err := resolvePatchParent(root, path) + if err != nil { + return err + } + var existing []string + if raw, ok := parent[leaf]; ok { + items, err := toStringSlice(raw) + if err != nil { + return fmt.Errorf("apply config patch %s: %w", path, err) + } + existing = items + } + additions, err := toStringSlice(value) + if err != nil { + return fmt.Errorf("apply config patch %s: %w", path, err) + } + seen := make(map[string]struct{}, len(existing)) + for _, item := range existing { + seen[item] = struct{}{} + } + for _, item := range additions { + if _, ok := seen[item]; ok { + continue + } + existing = append(existing, item) + seen[item] = struct{}{} + } + parent[leaf] = existing + return nil +} + +func resolvePatchParent(root map[string]any, path string) (map[string]any, string, error) { + segments := strings.Split(path, ".") + if len(segments) == 0 || strings.TrimSpace(path) == "" { + return nil, "", fmt.Errorf("apply config patch: empty path") + } + current := root + for _, segment := range segments[:len(segments)-1] { + next, err := descendPatchSegment(current, segment) + if err != nil { + return nil, "", fmt.Errorf("apply config patch %s: %w", path, err) + } + current = next + } + return current, segments[len(segments)-1], nil +} + +func descendPatchSegment(current map[string]any, segment string) (map[string]any, error) { + segment = strings.TrimSpace(segment) + if segment == "" { + return nil, fmt.Errorf("invalid empty path segment") + } + if !strings.Contains(segment, "[") { + child, ok := current[segment] + if !ok { + next := map[string]any{} + current[segment] = next + return next, nil + } + mapped, ok := child.(map[string]any) + if !ok { + return nil, fmt.Errorf("segment %s is not an object", segment) + } + return mapped, nil + } + + open := strings.Index(segment, "[") + close := strings.LastIndex(segment, "]") + if open <= 0 || close <= open+1 { + return nil, fmt.Errorf("invalid selector segment %s", segment) + } + key := strings.TrimSpace(segment[:open]) + selector := strings.TrimSpace(segment[open+1 : close]) + parts := strings.SplitN(selector, "=", 2) + if len(parts) != 2 { + return nil, fmt.Errorf("invalid selector segment %s", segment) + } + field := strings.TrimSpace(parts[0]) + want := strings.TrimSpace(parts[1]) + items, ok := current[key] + if !ok { + return nil, fmt.Errorf("segment %s does not exist", key) + } + list, ok := items.([]any) + if !ok { + return nil, fmt.Errorf("segment %s is not a list", key) + } + for _, item := range list { + mapped, ok := item.(map[string]any) + if !ok { + continue + } + if fmt.Sprint(mapped[field]) == want { + return mapped, nil + } + } + return nil, fmt.Errorf("no list item in %s matched %s=%s", key, field, want) +} + +func toStringSlice(value any) ([]string, error) { + switch typed := value.(type) { + case []string: + return append([]string(nil), typed...), nil + case []any: + out := make([]string, 0, len(typed)) + for _, item := range typed { + out = append(out, fmt.Sprint(item)) + } + return out, nil + case string: + return []string{typed}, nil + case nil: + return nil, nil + default: + return nil, fmt.Errorf("expected a string or string list") + } +} diff --git a/cleanr/integrations/braintrust_sync.go b/cleanr/integrations/braintrust_sync.go new file mode 100644 index 0000000..6280848 --- /dev/null +++ b/cleanr/integrations/braintrust_sync.go @@ -0,0 +1,295 @@ +package integrations + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/devr-tools/cleanr/cleanr/core" + runtimepkg "github.com/devr-tools/cleanr/cleanr/integrations/runtime" + "gopkg.in/yaml.v3" +) + +const braintrustInsightDatasetVersion = "v1alpha1" + +type BraintrustInsightDataset struct { + Version string `json:"version"` + Source string `json:"source,omitempty"` + Project string `json:"project,omitempty"` + Experiment string `json:"experiment,omitempty"` + ExperimentID string `json:"experiment_id,omitempty"` + ExperimentURL string `json:"experiment_url,omitempty"` + BuildID string `json:"build_id,omitempty"` + GeneratedAt time.Time `json:"generated_at"` + ReviewRequired bool `json:"review_required,omitempty"` + Warnings []string `json:"warnings,omitempty"` + ScenarioDataset *ScenarioDataset `json:"scenario_dataset,omitempty"` + ConfigPatch *BraintrustConfigPatchSet `json:"config_patch,omitempty"` +} + +type BraintrustConfigPatchSet struct { + ReviewRequired bool `json:"review_required,omitempty"` + Operations []BraintrustConfigPatchOperation `json:"operations,omitempty"` +} + +type BraintrustConfigPatchOperation struct { + Op string `json:"op"` + Path string `json:"path"` + Reason string `json:"reason,omitempty"` + Source string `json:"source,omitempty"` + Value any `json:"value,omitempty"` +} + +func LoadBraintrustInsightDatasetFile(path string) (BraintrustInsightDataset, error) { + data, err := os.ReadFile(path) + if err != nil { + return BraintrustInsightDataset{}, err + } + return LoadBraintrustInsightDatasetData(data, path) +} + +func LoadBraintrustInsightDatasetData(data []byte, path string) (BraintrustInsightDataset, error) { + if isYAMLPath(path) { + var generic any + if err := yaml.Unmarshal(data, &generic); err != nil { + return BraintrustInsightDataset{}, fmt.Errorf("decode braintrust insight dataset: %w", err) + } + normalized := normalizeYAMLValue(generic) + raw, err := json.Marshal(normalized) + if err != nil { + return BraintrustInsightDataset{}, fmt.Errorf("decode braintrust insight dataset: %w", err) + } + var dataset BraintrustInsightDataset + if err := json.Unmarshal(raw, &dataset); err != nil { + return BraintrustInsightDataset{}, fmt.Errorf("decode braintrust insight dataset: %w", err) + } + return dataset, nil + } + var dataset BraintrustInsightDataset + if err := json.Unmarshal(data, &dataset); err != nil { + return BraintrustInsightDataset{}, fmt.Errorf("decode braintrust insight dataset: %w", err) + } + return dataset, nil +} + +func WriteBraintrustInsightDatasetFile(path string, dataset BraintrustInsightDataset) error { + data, err := encodeBraintrustInsightDataset(dataset, path) + if err != nil { + return err + } + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return err + } + return os.WriteFile(path, append(data, '\n'), 0o644) +} + +func FetchBraintrustInsightDataset(ctx context.Context, source core.TrendSourceConfig, base core.Config) (BraintrustInsightDataset, error) { + artifacts, err := runtimepkg.LoadBraintrustSyncArtifacts(ctx, source) + if err != nil { + return BraintrustInsightDataset{}, err + } + + out := BraintrustInsightDataset{ + Version: braintrustInsightDatasetVersion, + Source: "braintrust", + Project: strings.TrimSpace(source.Project), + Experiment: strings.TrimSpace(source.Experiment), + ExperimentID: artifacts.ExperimentID, + ExperimentURL: artifacts.ExperimentURL, + GeneratedAt: artifacts.ExperimentCreated.UTC(), + } + + if artifacts.ReplayArtifact != nil { + exported := ExportScenarioDataset(base, *artifacts.ReplayArtifact, false) + if strings.TrimSpace(exported.BuildID) != "" { + out.BuildID = exported.BuildID + } + out.ScenarioDataset = &exported + } + + if len(artifacts.InsightPayload) > 0 { + raw, err := json.Marshal(artifacts.InsightPayload) + if err != nil { + return BraintrustInsightDataset{}, fmt.Errorf("decode braintrust sync insight: %w", err) + } + var remote BraintrustInsightDataset + if err := json.Unmarshal(raw, &remote); err != nil { + return BraintrustInsightDataset{}, fmt.Errorf("decode braintrust sync insight: %w", err) + } + out = mergeBraintrustInsights(out, remote) + } + + if out.Version == "" { + out.Version = braintrustInsightDatasetVersion + } + if out.GeneratedAt.IsZero() { + out.GeneratedAt = time.Now().UTC() + } + return out, nil +} + +func ApplyBraintrustInsightDataset(base core.Config, dataset BraintrustInsightDataset, applyScenarios, applyPatches, approved bool) (core.Config, error) { + if requiresReview(dataset) && !approved { + return core.Config{}, fmt.Errorf("braintrust sync insight requires explicit review; rerun with approval enabled after review") + } + + cfg := base + if applyScenarios && dataset.ScenarioDataset != nil && len(dataset.ScenarioDataset.Scenarios) > 0 { + cfg = MergeDatasetIntoConfig(cfg, *dataset.ScenarioDataset) + } + if applyPatches && dataset.ConfigPatch != nil && len(dataset.ConfigPatch.Operations) > 0 { + patched, err := ApplyBraintrustConfigPatchSet(cfg, *dataset.ConfigPatch) + if err != nil { + return core.Config{}, err + } + cfg = patched + } + return cfg, nil +} + +func ApplyBraintrustConfigPatchSet(base core.Config, patch BraintrustConfigPatchSet) (core.Config, error) { + raw, err := json.Marshal(base) + if err != nil { + return core.Config{}, fmt.Errorf("apply config patch: %w", err) + } + var generic map[string]any + if err := json.Unmarshal(raw, &generic); err != nil { + return core.Config{}, fmt.Errorf("apply config patch: %w", err) + } + for _, op := range patch.Operations { + if err := applyConfigPatchOperation(generic, op); err != nil { + return core.Config{}, err + } + } + raw, err = json.Marshal(generic) + if err != nil { + return core.Config{}, fmt.Errorf("apply config patch: %w", err) + } + var cfg core.Config + if err := json.Unmarshal(raw, &cfg); err != nil { + return core.Config{}, fmt.Errorf("apply config patch: %w", err) + } + return cfg, nil +} + +func encodeBraintrustInsightDataset(dataset BraintrustInsightDataset, path string) ([]byte, error) { + if isYAMLPath(path) { + raw, err := json.Marshal(dataset) + if err != nil { + return nil, fmt.Errorf("encode braintrust insight dataset: %w", err) + } + var generic any + if err := json.Unmarshal(raw, &generic); err != nil { + return nil, fmt.Errorf("encode braintrust insight dataset: %w", err) + } + data, err := yaml.Marshal(generic) + if err != nil { + return nil, fmt.Errorf("encode braintrust insight dataset: %w", err) + } + return data, nil + } + data, err := json.MarshalIndent(dataset, "", " ") + if err != nil { + return nil, fmt.Errorf("encode braintrust insight dataset: %w", err) + } + return data, nil +} + +func mergeBraintrustInsights(base, remote BraintrustInsightDataset) BraintrustInsightDataset { + if strings.TrimSpace(remote.Version) != "" { + base.Version = remote.Version + } + if strings.TrimSpace(remote.Source) != "" { + base.Source = remote.Source + } + if strings.TrimSpace(remote.Project) != "" { + base.Project = remote.Project + } + if strings.TrimSpace(remote.Experiment) != "" { + base.Experiment = remote.Experiment + } + if strings.TrimSpace(remote.ExperimentID) != "" { + base.ExperimentID = remote.ExperimentID + } + if strings.TrimSpace(remote.ExperimentURL) != "" { + base.ExperimentURL = remote.ExperimentURL + } + if strings.TrimSpace(remote.BuildID) != "" { + base.BuildID = remote.BuildID + } + if !remote.GeneratedAt.IsZero() { + base.GeneratedAt = remote.GeneratedAt.UTC() + } + base.ReviewRequired = base.ReviewRequired || remote.ReviewRequired + base.Warnings = append(base.Warnings, remote.Warnings...) + if remote.ScenarioDataset != nil { + if base.ScenarioDataset == nil { + copyDataset := *remote.ScenarioDataset + base.ScenarioDataset = ©Dataset + } else { + merged := mergeScenarioDatasets(*base.ScenarioDataset, *remote.ScenarioDataset) + base.ScenarioDataset = &merged + } + } + if remote.ConfigPatch != nil { + if base.ConfigPatch == nil { + copyPatch := *remote.ConfigPatch + base.ConfigPatch = ©Patch + } else { + base.ConfigPatch.ReviewRequired = base.ConfigPatch.ReviewRequired || remote.ConfigPatch.ReviewRequired + base.ConfigPatch.Operations = append(base.ConfigPatch.Operations, remote.ConfigPatch.Operations...) + } + } + return base +} + +func mergeScenarioDatasets(base, remote ScenarioDataset) ScenarioDataset { + if strings.TrimSpace(base.Version) == "" { + base.Version = remote.Version + } + if strings.TrimSpace(base.Source) == "" { + base.Source = remote.Source + } + if strings.TrimSpace(base.Target) == "" { + base.Target = remote.Target + } + if strings.TrimSpace(base.BuildID) == "" { + base.BuildID = remote.BuildID + } + if base.GeneratedAt.IsZero() { + base.GeneratedAt = remote.GeneratedAt + } + base.ReviewRequired = base.ReviewRequired || remote.ReviewRequired + base.Warnings = append(base.Warnings, remote.Warnings...) + merged := MergeDatasetIntoConfig(core.Config{Scenarios: scenariosFromDataset(base)}, remote) + base.Scenarios = make([]ScenarioDatasetEntry, 0, len(merged.Scenarios)) + for _, scenario := range merged.Scenarios { + base.Scenarios = append(base.Scenarios, ScenarioDatasetEntry{Scenario: scenario}) + } + return base +} + +func scenariosFromDataset(dataset ScenarioDataset) []core.Scenario { + out := make([]core.Scenario, 0, len(dataset.Scenarios)) + for _, item := range dataset.Scenarios { + out = append(out, item.Scenario) + } + return out +} + +func requiresReview(dataset BraintrustInsightDataset) bool { + if dataset.ReviewRequired { + return true + } + if dataset.ScenarioDataset != nil && dataset.ScenarioDataset.ReviewRequired { + return true + } + if dataset.ConfigPatch != nil && dataset.ConfigPatch.ReviewRequired { + return true + } + return false +} diff --git a/cleanr/integrations/runtime/braintrust_sync.go b/cleanr/integrations/runtime/braintrust_sync.go new file mode 100644 index 0000000..ec7247f --- /dev/null +++ b/cleanr/integrations/runtime/braintrust_sync.go @@ -0,0 +1,117 @@ +package runtime + +import ( + "context" + "encoding/json" + "fmt" + "path" + "strings" + "time" + + "github.com/devr-tools/cleanr/cleanr/core" +) + +type BraintrustSyncArtifacts struct { + ExperimentID string + ExperimentName string + ExperimentURL string + ExperimentCreated time.Time + ReplayArtifact *core.ReplayArtifact + InsightPayload map[string]any +} + +func LoadBraintrustSyncArtifacts(ctx context.Context, source core.TrendSourceConfig) (BraintrustSyncArtifacts, error) { + client := newBraintrustClient(source.BaseURL, source.URL, source.APIKeyEnv, source.Headers, source.TimeoutMS) + family := strings.TrimSpace(source.Experiment) + limit := source.HistoryLimit + if limit <= 0 { + limit = 10 + } + experiments, err := client.listExperiments(ctx, source.APIKeyEnv, source.Project, family, limit) + if err != nil { + return BraintrustSyncArtifacts{}, err + } + for _, experiment := range experiments { + artifact, err := client.fetchReplayArtifact(ctx, experiment.ID) + if err != nil { + return BraintrustSyncArtifacts{}, err + } + insight, err := client.fetchSyncInsightPayload(ctx, experiment.ID) + if err != nil { + return BraintrustSyncArtifacts{}, err + } + if artifact == nil && len(insight) == 0 { + continue + } + summaryURL := "" + var summary braintrustExperimentSummary + if err := client.http.getJSON(ctx, path.Join("/v1/experiment", experiment.ID, "summarize"), nil, &summary); err == nil { + summaryURL = strings.TrimSpace(summary.ExperimentURL) + } + return BraintrustSyncArtifacts{ + ExperimentID: experiment.ID, + ExperimentName: experiment.Name, + ExperimentURL: summaryURL, + ExperimentCreated: experiment.Created.UTC(), + ReplayArtifact: artifact, + InsightPayload: insight, + }, nil + } + return BraintrustSyncArtifacts{}, fmt.Errorf("load braintrust sync artifacts %s: no replay artifact or sync insight found", displayName(source.Project, "braintrust")) +} + +func (c *braintrustClient) fetchReplayArtifact(ctx context.Context, experimentID string) (*core.ReplayArtifact, error) { + query := fmt.Sprintf( + "SELECT output.replay_artifact AS replay_artifact FROM experiment('%s') WHERE metadata.cleanr.record_type = 'run' AND output.replay_artifact IS NOT NULL LIMIT 1", + experimentID, + ) + var resp braintrustBTQLResponse + if err := c.http.postJSON(ctx, "/btql", map[string]any{ + "query": query, + "fmt": "json", + }, &resp); err != nil { + return nil, fmt.Errorf("load braintrust sync artifacts: %w", err) + } + if len(resp.Data) == 0 || resp.Data[0]["replay_artifact"] == nil { + return nil, nil + } + raw, err := json.Marshal(resp.Data[0]["replay_artifact"]) + if err != nil { + return nil, fmt.Errorf("load braintrust sync artifacts: %w", err) + } + var artifact core.ReplayArtifact + if err := json.Unmarshal(raw, &artifact); err != nil { + return nil, fmt.Errorf("load braintrust sync artifacts: %w", err) + } + return &artifact, nil +} + +func (c *braintrustClient) fetchSyncInsightPayload(ctx context.Context, experimentID string) (map[string]any, error) { + query := fmt.Sprintf( + "SELECT output.cleanr_sync AS cleanr_sync FROM experiment('%s') WHERE metadata.cleanr.record_type = 'sync_insight' AND output.cleanr_sync IS NOT NULL LIMIT 1", + experimentID, + ) + var resp braintrustBTQLResponse + if err := c.http.postJSON(ctx, "/btql", map[string]any{ + "query": query, + "fmt": "json", + }, &resp); err != nil { + return nil, fmt.Errorf("load braintrust sync artifacts: %w", err) + } + if len(resp.Data) == 0 || resp.Data[0]["cleanr_sync"] == nil { + return nil, nil + } + payload, ok := resp.Data[0]["cleanr_sync"].(map[string]any) + if ok { + return payload, nil + } + raw, err := json.Marshal(resp.Data[0]["cleanr_sync"]) + if err != nil { + return nil, fmt.Errorf("load braintrust sync artifacts: %w", err) + } + var payloadMap map[string]any + if err := json.Unmarshal(raw, &payloadMap); err != nil { + return nil, fmt.Errorf("load braintrust sync artifacts: %w", err) + } + return payloadMap, nil +} diff --git a/docs/ci.md b/docs/ci.md index 074fa8f..637fb9b 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -149,6 +149,7 @@ It runs the same main gates locally: test presence, formatting, `go vet`, `gocyc For `gocyclo`, the local command compares changed files to the resolved base ref and fails only on new or worsened over-limit findings. That keeps `make ci` usable when the base branch already carries complexity debt. For `scc`, the local command treats changed non-test Go files above `400` code lines as god files, but only fails on new or worsened size debt compared with the base ref. For `golangci-lint`, the local command uses [.golangci.yml](../.golangci.yml) and reports only new maintainability findings against the merge-base with the target branch. +If `govulncheck` cannot be installed for the current local Go toolchain, `make ci` skips that step with a warning instead of failing before the rest of the pre-commit checks can run. If `semgrep` is not installed locally, `make ci` skips that step with a warning instead of failing before the rest of the pre-commit checks can run. The local command compares your working tree against a Git base ref. Resolution order is: @@ -230,6 +231,40 @@ That generated config points to standard env var names instead of embedding cred - `main`: retained trend history and moderate trend gates - `release`: full drift, load, chaos, replay artifacts, attestation, and starter `release_policy` rules +## Braintrust Sync Loop + +When Braintrust stores replay artifacts and a follow-up optimizer writes an explicit `cleanr_sync` payload into the experiment, `cleanr` can pull those recommendations back into a reviewable config update: + +```bash +cleanr sync braintrust \ + -config cleanr.connected.yaml \ + -output-insights reports/braintrust.insights.yaml \ + -output-dataset reports/braintrust.dataset.yaml \ + -output-config cleanr.synced.yaml \ + -approve-insights +``` + +The sync command: + +- reads the latest matching Braintrust experiment for the configured project and experiment family +- derives regression scenarios from the stored replay artifact using the local base config +- applies explicit config patch operations from `output.cleanr_sync` +- writes a normalized Braintrust insight dataset for auditability + +If you want `cleanr` to open a GitHub PR after generating the files, run: + +```bash +cleanr sync braintrust \ + -config cleanr.connected.yaml \ + -output-config cleanr.connected.yaml \ + -approve-insights \ + -create-pr \ + -pr-branch cleanr-sync-braintrust \ + -pr-title "cleanr sync: apply Braintrust insights" +``` + +That flow requires `git` and the GitHub CLI `gh` on `PATH`. + ## Related Docs - [Docker guide](docker.md) diff --git a/docs/development.md b/docs/development.md index cc4157d..9901e38 100644 --- a/docs/development.md +++ b/docs/development.md @@ -49,6 +49,7 @@ Local behavior differs from hosted GitHub Actions in two places: - the `gocyclo` gate compares changed files against the base ref and fails only on new or worsened complexity violations, so existing baseline debt on the target branch does not block local pre-commit checks - the `scc` gate compares changed files against the base ref and fails only on new or worsened god-file size regressions - the `golangci-lint` gate uses [.golangci.yml](../.golangci.yml) and reports only new issues against the merge-base of your base ref and `HEAD` +- the `govulncheck` step is skipped with a warning when the scanner cannot be installed for your local Go toolchain - the `semgrep` step is skipped with a warning when the `semgrep` binary is not installed locally Set `CI_BASE_REF=` when you want to force the comparison target, for example `make ci CI_BASE_REF=origin/develop`. diff --git a/docs/integrations.md b/docs/integrations.md index 13a7e93..b8bb548 100644 --- a/docs/integrations.md +++ b/docs/integrations.md @@ -40,6 +40,14 @@ These integrations write local summary artifacts after the run completes. | Markdown summary | `summaries[].format: markdown` | implemented | Writes a human-readable PR or release summary | | JSON summary | `summaries[].format: json` | implemented | Writes a machine-readable summary for downstream automation | +### Sync Workflows + +These workflows pull Braintrust-stored artifacts back into a reviewable `cleanr` change set. + +| Workflow | Command | Status | Current behavior | +| --- | --- | --- | --- | +| Braintrust sync | `cleanr sync braintrust` | implemented | Reads replay artifacts and explicit `cleanr_sync` insight payloads from Braintrust, writes a normalized insight dataset, merges scenario updates into config, applies explicit config patch operations, and can optionally open a GitHub PR through `gh` | + ## Current Gaps The following are not implemented yet: @@ -47,7 +55,7 @@ The following are not implemented yet: - native Langfuse trend-source loading - native PostHog trend-source loading - provider-backed dataset import or export flows -- provider-specific UI or PR annotation integrations beyond returned run URLs and local summary files +- provider-specific UI integrations beyond returned run URLs, local summary files, and the `cleanr sync braintrust` review loop ## Where To Configure diff --git a/internal/cli/cli.go b/internal/cli/cli.go index 7a9d7da..3ece336 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -26,6 +26,8 @@ func Run(args []string, stdout, stderr io.Writer) int { return trendsCmd(args[1:], stdout, stderr) case "dataset": return datasetCmd(args[1:], stdout, stderr) + case "sync": + return syncCmd(args[1:], stdout, stderr) case "plugins": return pluginsCmd(args[1:], stdout, stderr) case "snapshot": @@ -48,5 +50,5 @@ func Run(args []string, stdout, stderr io.Writer) int { } func usage(w io.Writer) { - _, _ = fmt.Fprintln(w, "usage: cleanr [flags]") + _, _ = fmt.Fprintln(w, "usage: cleanr [flags]") } diff --git a/internal/cli/cli_sync.go b/internal/cli/cli_sync.go new file mode 100644 index 0000000..8202579 --- /dev/null +++ b/internal/cli/cli_sync.go @@ -0,0 +1,260 @@ +package cli + +import ( + "context" + "flag" + "fmt" + "io" + "path/filepath" + "strings" + "time" + + "github.com/devr-tools/cleanr/cleanr" +) + +type syncBraintrustOptions struct { + configPath string + profile string + project string + experiment string + apiKeyEnv string + baseURL string + timeoutMS int + historyLimit int + outputInsights string + outputDataset string + outputConfig string + applyScenarios bool + applyPatches bool + approveInsights bool + createPR bool + prBranch string + prBase string + prTitle string + prBody string + commitMessage string +} + +func syncCmd(args []string, stdout, stderr io.Writer) int { + if len(args) == 0 { + _, _ = fmt.Fprintln(stderr, "sync error: expected a sync target such as braintrust") + return 2 + } + switch args[0] { + case "braintrust": + return syncBraintrustCmd(args[1:], stdout, stderr) + default: + _, _ = fmt.Fprintf(stderr, "sync error: unsupported subcommand %s\n", args[0]) + return 2 + } +} + +func syncBraintrustCmd(args []string, stdout, stderr io.Writer) int { + fs := flag.NewFlagSet("sync braintrust", flag.ContinueOnError) + fs.SetOutput(stderr) + opts := bindSyncBraintrustFlags(fs) + if err := fs.Parse(args); err != nil { + return 2 + } + if opts.historyLimit < 0 { + _, _ = fmt.Fprintln(stderr, "sync braintrust error: history-limit must be >= 0") + return 2 + } + return runSyncBraintrust(*opts, stdout, stderr) +} + +func bindSyncBraintrustFlags(fs *flag.FlagSet) *syncBraintrustOptions { + opts := &syncBraintrustOptions{} + fs.StringVar(&opts.configPath, "config", "", "Path to the base cleanr config to sync into") + fs.StringVar(&opts.profile, "profile", "", "Optional staged config profile: pr, main, or release") + fs.StringVar(&opts.project, "project", "", "Braintrust project name") + fs.StringVar(&opts.experiment, "experiment", "", "Optional Braintrust experiment family") + fs.StringVar(&opts.apiKeyEnv, "api-key-env", "", "Environment variable name used for the Braintrust API key") + fs.StringVar(&opts.baseURL, "base-url", "", "Optional Braintrust base URL override") + fs.IntVar(&opts.timeoutMS, "timeout-ms", 0, "Optional Braintrust API timeout in milliseconds") + fs.IntVar(&opts.historyLimit, "history-limit", 10, "Number of recent Braintrust experiments to scan") + fs.StringVar(&opts.outputInsights, "output-insights", "reports/braintrust.insights.yaml", "Path to write the normalized Braintrust insight dataset") + fs.StringVar(&opts.outputDataset, "output-dataset", "", "Optional path to write the scenario dataset extracted from Braintrust") + fs.StringVar(&opts.outputConfig, "output-config", "cleanr.synced.yaml", "Path to write the merged cleanr config") + fs.BoolVar(&opts.applyScenarios, "apply-scenarios", true, "Merge replay-derived or explicit scenario updates into the output config") + fs.BoolVar(&opts.applyPatches, "apply-patches", true, "Apply explicit config patch operations from Braintrust insights") + fs.BoolVar(&opts.approveInsights, "approve-insights", false, "Allow applying Braintrust insights that require explicit review") + fs.BoolVar(&opts.createPR, "create-pr", false, "Create a Git branch, commit, and GitHub pull request for the generated files") + fs.StringVar(&opts.prBranch, "pr-branch", "", "Optional Git branch name for the generated PR") + fs.StringVar(&opts.prBase, "pr-base", "", "Optional GitHub PR base branch") + fs.StringVar(&opts.prTitle, "pr-title", "", "Optional GitHub PR title") + fs.StringVar(&opts.prBody, "pr-body", "", "Optional GitHub PR body") + fs.StringVar(&opts.commitMessage, "commit-message", "", "Optional Git commit message") + return opts +} + +func runSyncBraintrust(opts syncBraintrustOptions, stdout, stderr io.Writer) int { + resolvedConfigPath, err := resolveConfigPath(opts.configPath, opts.profile) + if err != nil { + _, _ = fmt.Fprintf(stderr, "sync braintrust error: %v\n", err) + return 2 + } + baseCfg, err := cleanr.LoadConfigFile(resolvedConfigPath) + if err != nil { + _, _ = fmt.Fprintf(stderr, "sync braintrust error: %v\n", err) + return 2 + } + + source, err := resolveBraintrustSyncSource(baseCfg, opts.project, opts.experiment, opts.apiKeyEnv, opts.baseURL, opts.timeoutMS, opts.historyLimit) + if err != nil { + _, _ = fmt.Fprintf(stderr, "sync braintrust error: %v\n", err) + return 2 + } + + ctx, cancel := context.WithTimeout(context.Background(), syncTimeout(opts.timeoutMS)) + defer cancel() + + dataset, err := cleanr.FetchBraintrustInsightDataset(ctx, source, baseCfg) + if err != nil { + _, _ = fmt.Fprintf(stderr, "sync braintrust error: %v\n", err) + return 2 + } + + insightsPath := resolveConfigRelativePath(resolvedConfigPath, opts.outputInsights) + if err := cleanr.WriteBraintrustInsightDatasetFile(insightsPath, dataset); err != nil { + _, _ = fmt.Fprintf(stderr, "sync braintrust error: %v\n", err) + return 2 + } + + writtenFiles := []string{insightsPath} + if strings.TrimSpace(opts.outputDataset) != "" && dataset.ScenarioDataset != nil { + datasetPath := resolveConfigRelativePath(resolvedConfigPath, opts.outputDataset) + if err := cleanr.WriteScenarioDatasetFile(datasetPath, *dataset.ScenarioDataset); err != nil { + _, _ = fmt.Fprintf(stderr, "sync braintrust error: %v\n", err) + return 2 + } + writtenFiles = append(writtenFiles, datasetPath) + } + + mergedCfg, err := cleanr.ApplyBraintrustInsightDataset(baseCfg, dataset, opts.applyScenarios, opts.applyPatches, opts.approveInsights) + if err != nil { + _, _ = fmt.Fprintf(stderr, "sync braintrust error: %v\n", err) + return 2 + } + configOutPath := resolveConfigRelativePath(resolvedConfigPath, opts.outputConfig) + if err := cleanr.WriteConfigFile(configOutPath, mergedCfg); err != nil { + _, _ = fmt.Fprintf(stderr, "sync braintrust error: %v\n", err) + return 2 + } + writtenFiles = append(writtenFiles, configOutPath) + + if opts.createPR { + prOpts := gitHubPROptions{ + Files: writtenFiles, + Branch: strings.TrimSpace(opts.prBranch), + Base: strings.TrimSpace(opts.prBase), + Title: firstNonEmpty(opts.prTitle, defaultSyncPRTitle(dataset, source)), + Body: firstNonEmpty(opts.prBody, defaultSyncPRBody(dataset, writtenFiles)), + CommitMessage: firstNonEmpty(opts.commitMessage, defaultSyncCommitMessage(dataset, source)), + } + if err := createGitHubPR(context.Background(), prOpts); err != nil { + _, _ = fmt.Fprintf(stderr, "sync braintrust error: %v\n", err) + return 2 + } + } + + _, _ = fmt.Fprintf(stdout, "wrote braintrust insights to %s\n", insightsPath) + if strings.TrimSpace(opts.outputDataset) != "" && dataset.ScenarioDataset != nil { + _, _ = fmt.Fprintf(stdout, "wrote scenario dataset with %d scenarios to %s\n", len(dataset.ScenarioDataset.Scenarios), resolveConfigRelativePath(resolvedConfigPath, opts.outputDataset)) + } + _, _ = fmt.Fprintf(stdout, "wrote merged config to %s\n", configOutPath) + if opts.createPR { + _, _ = fmt.Fprintln(stdout, "created Git branch, commit, and GitHub pull request") + } + return 0 +} + +func resolveBraintrustSyncSource(cfg cleanr.Config, project, experiment, apiKeyEnv, baseURL string, timeoutMS, historyLimit int) (cleanr.TrendSourceConfig, error) { + source := cleanr.TrendSourceConfig{ + Type: "braintrust", + Project: strings.TrimSpace(project), + Experiment: strings.TrimSpace(experiment), + APIKeyEnv: strings.TrimSpace(apiKeyEnv), + BaseURL: strings.TrimSpace(baseURL), + TimeoutMS: timeoutMS, + HistoryLimit: historyLimit, + } + if source.Project != "" { + if source.APIKeyEnv == "" { + source.APIKeyEnv = "BRAINTRUST_API_KEY" + } + return source, nil + } + for _, item := range cfg.Integrations.TrendSources { + if strings.TrimSpace(item.Type) != "braintrust" { + continue + } + source = item + if strings.TrimSpace(project) != "" { + source.Project = strings.TrimSpace(project) + } + if strings.TrimSpace(experiment) != "" { + source.Experiment = strings.TrimSpace(experiment) + } + if strings.TrimSpace(apiKeyEnv) != "" { + source.APIKeyEnv = strings.TrimSpace(apiKeyEnv) + } + if strings.TrimSpace(baseURL) != "" { + source.BaseURL = strings.TrimSpace(baseURL) + } + if timeoutMS > 0 { + source.TimeoutMS = timeoutMS + } + if historyLimit > 0 { + source.HistoryLimit = historyLimit + } + return source, nil + } + return cleanr.TrendSourceConfig{}, fmt.Errorf("no braintrust trend source configured; pass -project or add integrations.trend_sources[].type: braintrust") +} + +func syncTimeout(timeoutMS int) time.Duration { + if timeoutMS > 0 { + return time.Duration(timeoutMS) * time.Millisecond + } + return 20 * time.Second +} + +func defaultSyncPRTitle(dataset cleanr.BraintrustInsightDataset, source cleanr.TrendSourceConfig) string { + label := firstNonEmpty(dataset.BuildID, dataset.Experiment, source.Experiment, "braintrust") + return "cleanr sync: apply Braintrust insights for " + label +} + +func defaultSyncCommitMessage(dataset cleanr.BraintrustInsightDataset, source cleanr.TrendSourceConfig) string { + label := firstNonEmpty(dataset.BuildID, dataset.Experiment, source.Experiment, "braintrust") + return "cleanr sync: apply Braintrust insights for " + label +} + +func defaultSyncPRBody(dataset cleanr.BraintrustInsightDataset, files []string) string { + var b strings.Builder + b.WriteString("## Summary\n\n") + b.WriteString("- sync source: Braintrust\n") + if dataset.Project != "" { + b.WriteString("- project: `" + dataset.Project + "`\n") + } + if dataset.Experiment != "" { + b.WriteString("- experiment family: `" + dataset.Experiment + "`\n") + } + if dataset.BuildID != "" { + b.WriteString("- build id: `" + dataset.BuildID + "`\n") + } + if dataset.ExperimentURL != "" { + b.WriteString("- experiment url: " + dataset.ExperimentURL + "\n") + } + if dataset.ScenarioDataset != nil { + b.WriteString(fmt.Sprintf("- replay-derived scenarios: `%d`\n", len(dataset.ScenarioDataset.Scenarios))) + } + if dataset.ConfigPatch != nil { + b.WriteString(fmt.Sprintf("- config patch operations: `%d`\n", len(dataset.ConfigPatch.Operations))) + } + b.WriteString("\n## Files\n\n") + for _, file := range files { + b.WriteString("- `" + filepath.ToSlash(file) + "`\n") + } + return b.String() +} diff --git a/internal/cli/cli_sync_git.go b/internal/cli/cli_sync_git.go new file mode 100644 index 0000000..27d4994 --- /dev/null +++ b/internal/cli/cli_sync_git.go @@ -0,0 +1,75 @@ +package cli + +import ( + "bytes" + "context" + "fmt" + "os/exec" + "strings" + "time" +) + +var syncExecCommandContext = exec.CommandContext +var syncLookPath = exec.LookPath + +type gitHubPROptions struct { + Files []string + Branch string + Base string + Title string + Body string + CommitMessage string +} + +func createGitHubPR(ctx context.Context, opts gitHubPROptions) error { + if len(opts.Files) == 0 { + return fmt.Errorf("create github pr: no files to include") + } + if _, err := syncLookPath("git"); err != nil { + return fmt.Errorf("create github pr: git is not available") + } + if _, err := syncLookPath("gh"); err != nil { + return fmt.Errorf("create github pr: gh is not available") + } + branch := strings.TrimSpace(opts.Branch) + if branch == "" { + branch = "cleanr-sync-" + time.Now().UTC().Format("20060102-150405") + } + if err := runSyncCommand(ctx, "git", "checkout", "-b", branch); err != nil { + return err + } + addArgs := append([]string{"add"}, opts.Files...) + if err := runSyncCommand(ctx, "git", addArgs...); err != nil { + return err + } + if err := runSyncCommand(ctx, "git", "commit", "-m", strings.TrimSpace(opts.CommitMessage)); err != nil { + return err + } + args := []string{"pr", "create", "--title", strings.TrimSpace(opts.Title), "--body", strings.TrimSpace(opts.Body)} + if strings.TrimSpace(opts.Base) != "" { + args = append(args, "--base", strings.TrimSpace(opts.Base)) + } + if err := runSyncCommand(ctx, "gh", args...); err != nil { + return err + } + return nil +} + +func runSyncCommand(ctx context.Context, name string, args ...string) error { + cmd := syncExecCommandContext(ctx, name, args...) + var stdout bytes.Buffer + var stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + message := strings.TrimSpace(stderr.String()) + if message == "" { + message = strings.TrimSpace(stdout.String()) + } + if message == "" { + message = err.Error() + } + return fmt.Errorf("%s %s: %s", name, strings.Join(args, " "), message) + } + return nil +} diff --git a/internal/devtools/ci_security.go b/internal/devtools/ci_security.go index 9eb2426..4fb8b88 100644 --- a/internal/devtools/ci_security.go +++ b/internal/devtools/ci_security.go @@ -145,10 +145,7 @@ func (r Runner) ensureGoTool(ctx context.Context, binaryName, modulePath, versio func (r Runner) runGovulncheck(ctx context.Context, opts CIOptions) error { govulncheckPath, err := r.ensureGoTool(ctx, "govulncheck", "golang.org/x/vuln/cmd/govulncheck", opts.GovulncheckVersion) if err != nil { - if opts.GovulncheckMode == "required" { - return fmt.Errorf("resolve govulncheck: %w", err) - } - if _, printErr := fmt.Fprintf(r.Stdout, "warning: govulncheck install failed but mode=%s, continuing\n", opts.GovulncheckMode); printErr != nil { + if _, printErr := fmt.Fprintf(r.Stdout, "warning: govulncheck unavailable, skipping local scan (%v)\n", err); printErr != nil { return printErr } return nil diff --git a/tests/braintrust_sync_test.go b/tests/braintrust_sync_test.go new file mode 100644 index 0000000..5bc6816 --- /dev/null +++ b/tests/braintrust_sync_test.go @@ -0,0 +1,436 @@ +package tests + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "strings" + "testing" + "time" + + "github.com/devr-tools/cleanr/cleanr" +) + +type braintrustRoundTripperFunc func(*http.Request) (*http.Response, error) + +func (f braintrustRoundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return f(req) +} + +func stubBraintrustTransport(t *testing.T, transport http.RoundTripper) func() { + t.Helper() + original := http.DefaultTransport + http.DefaultTransport = transport + return func() { + http.DefaultTransport = original + } +} + +func jsonBraintrustResponse(t *testing.T, statusCode int, body map[string]any) *http.Response { + t.Helper() + data, err := json.Marshal(body) + if err != nil { + t.Fatalf("marshal response: %v", err) + } + return &http.Response{ + StatusCode: statusCode, + Header: http.Header{"Content-Type": []string{"application/json"}}, + Body: io.NopCloser(bytes.NewReader(data)), + } +} + +func decodeBraintrustRequestBody(t *testing.T, req *http.Request) map[string]any { + t.Helper() + defer req.Body.Close() + + var body map[string]any + if err := json.NewDecoder(req.Body).Decode(&body); err != nil { + t.Fatalf("decode request body: %v", err) + } + return body +} + +func TestApplyBraintrustConfigPatchSetSupportsSelectorsAndAppendUnique(t *testing.T) { + base := cleanr.ExampleConfig() + base.Scenarios = []cleanr.Scenario{{ + Name: "happy-path", + System: "Original system", + Tags: []string{"existing"}, + Input: "hello", + }} + base.Suites.TokenOptimization.MaxOutputTokens = 512 + base.Suites.Security.SecretExposureIndicators = []string{"sk-"} + + patched, err := cleanr.ApplyBraintrustInsightDataset(base, cleanr.BraintrustInsightDataset{ + ConfigPatch: &cleanr.BraintrustConfigPatchSet{ + Operations: []cleanr.BraintrustConfigPatchOperation{ + { + Op: "set", + Path: "suites.token_optimization.max_output_tokens", + Value: 256, + }, + { + Op: "set", + Path: "scenarios[name=happy-path].system", + Value: "Use the verified password reset flow.", + }, + { + Op: "append_unique", + Path: "scenarios[name=happy-path].tags", + Value: []any{"regression", "existing"}, + }, + { + Op: "append_unique", + Path: "suites.security.secret_exposure_indicators", + Value: []string{"AKIA", "sk-"}, + }, + }, + }, + }, false, true, true) + if err != nil { + t.Fatalf("apply patch: %v", err) + } + + if patched.Suites.TokenOptimization.MaxOutputTokens != 256 { + t.Fatalf("unexpected token threshold: %+v", patched.Suites.TokenOptimization) + } + if len(patched.Scenarios) != 1 || patched.Scenarios[0].System != "Use the verified password reset flow." { + t.Fatalf("unexpected scenarios: %+v", patched.Scenarios) + } + if got := strings.Join(patched.Scenarios[0].Tags, ","); got != "existing,regression" { + t.Fatalf("unexpected scenario tags: %s", got) + } + if got := strings.Join(patched.Suites.Security.SecretExposureIndicators, ","); got != "sk-,AKIA" { + t.Fatalf("unexpected security indicators: %s", got) + } +} + +func TestApplyBraintrustConfigPatchSetRejectsInvalidSelectorPaths(t *testing.T) { + base := cleanr.ExampleConfig() + base.Scenarios = []cleanr.Scenario{{Name: "happy-path", Input: "hello"}} + + _, err := cleanr.ApplyBraintrustInsightDataset(base, cleanr.BraintrustInsightDataset{ + ConfigPatch: &cleanr.BraintrustConfigPatchSet{ + Operations: []cleanr.BraintrustConfigPatchOperation{{ + Op: "set", + Path: "scenarios[name=missing].system", + Value: "patched", + }}, + }, + }, false, true, true) + if err == nil || !strings.Contains(err.Error(), "no list item in scenarios matched name=missing") { + t.Fatalf("expected selector failure, got %v", err) + } + + _, err = cleanr.ApplyBraintrustInsightDataset(base, cleanr.BraintrustInsightDataset{ + ConfigPatch: &cleanr.BraintrustConfigPatchSet{ + Operations: []cleanr.BraintrustConfigPatchOperation{{ + Op: "append_unique", + Path: "suites.token_optimization.max_output_tokens", + Value: []string{"bad"}, + }}, + }, + }, false, true, true) + if err == nil || !strings.Contains(err.Error(), "expected a string or string list") { + t.Fatalf("expected type failure, got %v", err) + } +} + +func TestFetchBraintrustInsightDatasetMergesReplayAndRemotePatch(t *testing.T) { + cfg := cleanr.ExampleConfig() + cfg.Scenarios = []cleanr.Scenario{{Name: "happy-path", Input: "base input"}} + + now := time.Date(2026, 5, 28, 12, 0, 0, 0, time.UTC) + restore := stubBraintrustTransport(t, braintrustRoundTripperFunc(func(req *http.Request) (*http.Response, error) { + switch { + case req.Method == http.MethodGet && req.URL.Path == "/v1/experiment": + return jsonBraintrustResponse(t, 200, map[string]any{ + "objects": []map[string]any{{ + "id": "exp-2", + "project_id": "proj-1", + "name": "cleanr-ci/build-2", + "created": now.Format(time.RFC3339), + }}, + }), nil + case req.Method == http.MethodPost && req.URL.Path == "/btql": + body := decodeBraintrustRequestBody(t, req) + query := body["query"].(string) + switch { + case strings.Contains(query, "replay_artifact"): + return jsonBraintrustResponse(t, 200, map[string]any{ + "data": []map[string]any{{ + "replay_artifact": map[string]any{ + "version": "v1alpha1", + "target": cfg.Target.Name, + "build_id": "build-2", + "generated_at": now.Format(time.RFC3339), + "passed": false, + "failed_cases": 1, + "failures": []map[string]any{{ + "suite": "security", + "name": "happy-path", + "failed": true, + "findings": []map[string]any{{ + "severity": "high", + "message": "review me", + }}, + }}, + }, + }}, + }), nil + case strings.Contains(query, "cleanr_sync"): + return jsonBraintrustResponse(t, 200, map[string]any{ + "data": []map[string]any{{ + "cleanr_sync": map[string]any{ + "version": "v1alpha1", + "review_required": true, + "warnings": []string{"remote warning"}, + "scenario_dataset": map[string]any{ + "review_required": true, + "scenarios": []map[string]any{ + {"scenario": map[string]any{"name": "happy-path", "input": "updated input"}}, + {"scenario": map[string]any{"name": "new-regression", "input": "new"}}, + }, + }, + "config_patch": map[string]any{ + "review_required": true, + "operations": []map[string]any{{ + "op": "set", + "path": "suites.token_optimization.max_output_tokens", + "value": 128, + }}, + }, + }, + }}, + }), nil + default: + t.Fatalf("unexpected btql query: %s", query) + return nil, nil + } + case req.Method == http.MethodGet && req.URL.Path == "/v1/experiment/exp-2/summarize": + return jsonBraintrustResponse(t, 200, map[string]any{ + "experiment_url": "https://braintrust.dev/app/cleanr-ci/build-2", + }), nil + default: + t.Fatalf("unexpected request: %s %s", req.Method, req.URL.String()) + return nil, nil + } + })) + defer restore() + + dataset, err := cleanr.FetchBraintrustInsightDataset(context.Background(), cleanr.TrendSourceConfig{ + Type: "braintrust", + Project: "qa-gates", + Experiment: "cleanr-ci", + }, cfg) + if err != nil { + t.Fatalf("fetch braintrust insight dataset: %v", err) + } + if !dataset.ReviewRequired || dataset.BuildID != "build-2" || dataset.ExperimentURL == "" { + t.Fatalf("unexpected merged dataset metadata: %+v", dataset) + } + if dataset.ScenarioDataset == nil || !dataset.ScenarioDataset.ReviewRequired || len(dataset.ScenarioDataset.Scenarios) != 2 { + t.Fatalf("unexpected merged scenario dataset: %+v", dataset.ScenarioDataset) + } + if got := dataset.ScenarioDataset.Scenarios[0].Scenario.Input; got != "updated input" { + t.Fatalf("expected remote scenario overwrite, got %q", got) + } + if dataset.ConfigPatch == nil || !dataset.ConfigPatch.ReviewRequired || len(dataset.ConfigPatch.Operations) != 1 { + t.Fatalf("unexpected merged config patch: %+v", dataset.ConfigPatch) + } + if got := strings.Join(dataset.Warnings, ","); got != "remote warning" { + t.Fatalf("unexpected warnings: %s", got) + } +} + +func TestFetchBraintrustInsightDatasetSkipsNewestExperimentWithoutReplayOrInsight(t *testing.T) { + cfg := cleanr.ExampleConfig() + restore := stubBraintrustTransport(t, braintrustRoundTripperFunc(func(req *http.Request) (*http.Response, error) { + switch { + case req.Method == http.MethodGet && req.URL.Path == "/v1/experiment": + return jsonBraintrustResponse(t, 200, map[string]any{ + "objects": []map[string]any{ + {"id": "exp-new", "project_id": "proj-1", "name": "cleanr-ci/build-new", "created": "2026-05-28T13:00:00Z"}, + {"id": "exp-old", "project_id": "proj-1", "name": "cleanr-ci/build-old", "created": "2026-05-27T13:00:00Z"}, + }, + }), nil + case req.Method == http.MethodPost && req.URL.Path == "/btql": + body := decodeBraintrustRequestBody(t, req) + query := body["query"].(string) + switch { + case strings.Contains(query, "experiment('exp-new')") && strings.Contains(query, "replay_artifact"): + return jsonBraintrustResponse(t, 200, map[string]any{"data": []map[string]any{}}), nil + case strings.Contains(query, "experiment('exp-new')") && strings.Contains(query, "cleanr_sync"): + return jsonBraintrustResponse(t, 200, map[string]any{"data": []map[string]any{}}), nil + case strings.Contains(query, "experiment('exp-old')") && strings.Contains(query, "replay_artifact"): + return jsonBraintrustResponse(t, 200, map[string]any{ + "data": []map[string]any{{ + "replay_artifact": map[string]any{ + "version": "v1alpha1", + "target": cfg.Target.Name, + "build_id": "build-old", + "generated_at": "2026-05-27T13:00:00Z", + "passed": false, + "failed_cases": 1, + }, + }}, + }), nil + case strings.Contains(query, "experiment('exp-old')") && strings.Contains(query, "cleanr_sync"): + return jsonBraintrustResponse(t, 200, map[string]any{"data": []map[string]any{}}), nil + default: + t.Fatalf("unexpected btql query: %s", query) + return nil, nil + } + case req.Method == http.MethodGet && req.URL.Path == "/v1/experiment/exp-old/summarize": + return jsonBraintrustResponse(t, 200, map[string]any{ + "experiment_url": "https://braintrust.dev/app/cleanr-ci/build-old", + }), nil + default: + t.Fatalf("unexpected request: %s %s", req.Method, req.URL.String()) + return nil, nil + } + })) + defer restore() + + dataset, err := cleanr.FetchBraintrustInsightDataset(context.Background(), cleanr.TrendSourceConfig{ + Type: "braintrust", + Project: "qa-gates", + Experiment: "cleanr-ci", + }, cfg) + if err != nil { + t.Fatalf("fetch braintrust insight dataset: %v", err) + } + if dataset.BuildID != "build-old" || dataset.ExperimentID != "exp-old" { + t.Fatalf("unexpected dataset selection: %+v", dataset) + } +} + +func TestFetchBraintrustInsightDatasetAllowsMissingExperimentURL(t *testing.T) { + cfg := cleanr.ExampleConfig() + restore := stubBraintrustTransport(t, braintrustRoundTripperFunc(func(req *http.Request) (*http.Response, error) { + switch { + case req.Method == http.MethodGet && req.URL.Path == "/v1/experiment": + return jsonBraintrustResponse(t, 200, map[string]any{ + "objects": []map[string]any{{ + "id": "exp-1", + "project_id": "proj-1", + "name": "cleanr-ci/build-1", + "created": "2026-05-28T12:00:00Z", + }}, + }), nil + case req.Method == http.MethodPost && req.URL.Path == "/btql": + body := decodeBraintrustRequestBody(t, req) + query := body["query"].(string) + if strings.Contains(query, "replay_artifact") { + return jsonBraintrustResponse(t, 200, map[string]any{"data": []map[string]any{}}), nil + } + return jsonBraintrustResponse(t, 200, map[string]any{ + "data": []map[string]any{{ + "cleanr_sync": map[string]any{ + "version": "v1alpha1", + "config_patch": map[string]any{ + "operations": []map[string]any{{ + "op": "set", + "path": "suites.token_optimization.max_output_tokens", + "value": 128, + }}, + }, + }, + }}, + }), nil + case req.Method == http.MethodGet && req.URL.Path == "/v1/experiment/exp-1/summarize": + return jsonBraintrustResponse(t, 200, map[string]any{}), nil + default: + t.Fatalf("unexpected request: %s %s", req.Method, req.URL.String()) + return nil, nil + } + })) + defer restore() + + dataset, err := cleanr.FetchBraintrustInsightDataset(context.Background(), cleanr.TrendSourceConfig{ + Type: "braintrust", + Project: "qa-gates", + Experiment: "cleanr-ci", + }, cfg) + if err != nil { + t.Fatalf("fetch braintrust insight dataset: %v", err) + } + if dataset.ExperimentURL != "" || dataset.ConfigPatch == nil { + t.Fatalf("unexpected dataset: %+v", dataset) + } +} + +func TestFetchBraintrustInsightDatasetRejectsMalformedCleanrSyncPayload(t *testing.T) { + cfg := cleanr.ExampleConfig() + restore := stubBraintrustTransport(t, braintrustRoundTripperFunc(func(req *http.Request) (*http.Response, error) { + switch { + case req.Method == http.MethodGet && req.URL.Path == "/v1/experiment": + return jsonBraintrustResponse(t, 200, map[string]any{ + "objects": []map[string]any{{ + "id": "exp-1", + "project_id": "proj-1", + "name": "cleanr-ci/build-1", + "created": "2026-05-28T12:00:00Z", + }}, + }), nil + case req.Method == http.MethodPost && req.URL.Path == "/btql": + body := decodeBraintrustRequestBody(t, req) + query := body["query"].(string) + if strings.Contains(query, "replay_artifact") { + return jsonBraintrustResponse(t, 200, map[string]any{"data": []map[string]any{}}), nil + } + return jsonBraintrustResponse(t, 200, map[string]any{ + "data": []map[string]any{{ + "cleanr_sync": "bad-payload", + }}, + }), nil + default: + t.Fatalf("unexpected request: %s %s", req.Method, req.URL.String()) + return nil, nil + } + })) + defer restore() + + _, err := cleanr.FetchBraintrustInsightDataset(context.Background(), cleanr.TrendSourceConfig{ + Type: "braintrust", + Project: "qa-gates", + Experiment: "cleanr-ci", + }, cfg) + if err == nil || !strings.Contains(err.Error(), "load braintrust sync artifacts") { + t.Fatalf("expected malformed payload error, got %v", err) + } +} + +func TestFetchBraintrustInsightDatasetErrorsWhenNoReplayOrInsightExists(t *testing.T) { + cfg := cleanr.ExampleConfig() + restore := stubBraintrustTransport(t, braintrustRoundTripperFunc(func(req *http.Request) (*http.Response, error) { + switch { + case req.Method == http.MethodGet && req.URL.Path == "/v1/experiment": + return jsonBraintrustResponse(t, 200, map[string]any{ + "objects": []map[string]any{{ + "id": "exp-1", + "project_id": "proj-1", + "name": "cleanr-ci/build-1", + "created": "2026-05-28T12:00:00Z", + }}, + }), nil + case req.Method == http.MethodPost && req.URL.Path == "/btql": + return jsonBraintrustResponse(t, 200, map[string]any{"data": []map[string]any{}}), nil + default: + t.Fatalf("unexpected request: %s %s", req.Method, req.URL.String()) + return nil, nil + } + })) + defer restore() + + _, err := cleanr.FetchBraintrustInsightDataset(context.Background(), cleanr.TrendSourceConfig{ + Type: "braintrust", + Project: "qa-gates", + Experiment: "cleanr-ci", + }, cfg) + if err == nil || !strings.Contains(err.Error(), "no replay artifact or sync insight found") { + t.Fatalf("expected missing artifact error, got %v", err) + } +} diff --git a/tests/cli/cli_test.go b/tests/cli/cli_test.go index cc618f3..a638d99 100644 --- a/tests/cli/cli_test.go +++ b/tests/cli/cli_test.go @@ -1130,6 +1130,328 @@ func TestDatasetImportRequiresApprovalForGeneratedDatasets(t *testing.T) { } } +func TestSyncBraintrustCommandFetchesReplayAndAppliesConfigPatches(t *testing.T) { + cfg := cleanr.ExampleConfig() + cfg.Integrations.TrendSources = []cleanr.TrendSourceConfig{{ + Name: "braintrust", + Type: "braintrust", + Project: "qa-gates", + Experiment: "cleanr-ci", + APIKeyEnv: "BRAINTRUST_API_KEY", + }} + + dir := t.TempDir() + configPath := filepath.Join(dir, "cleanr.yaml") + if err := cleanr.WriteConfigFile(configPath, cfg); err != nil { + t.Fatalf("write config: %v", err) + } + + restore := stubCLITransport(t, cliRoundTripperFunc(func(req *http.Request) (*http.Response, error) { + switch { + case req.Method == http.MethodGet && req.URL.Path == "/v1/experiment": + return jsonCLIResponse(t, 200, map[string]any{ + "objects": []map[string]any{{ + "id": "exp-2", + "project_id": "proj-1", + "name": "cleanr-ci/build-2", + "created": "2026-05-28T12:00:00Z", + }}, + }), nil + case req.Method == http.MethodPost && req.URL.Path == "/btql": + body := decodeCLIRequestBody(t, req) + query := body["query"].(string) + switch { + case strings.Contains(query, "output.replay_artifact"): + return jsonCLIResponse(t, 200, map[string]any{ + "data": []map[string]any{{ + "replay_artifact": map[string]any{ + "version": "v1alpha1", + "target": cfg.Target.Name, + "build_id": "build-2", + "generated_at": "2026-05-28T12:00:00Z", + "passed": false, + "failed_cases": 1, + "failures": []map[string]any{{ + "suite": "security", + "name": "happy-path", + "failed": true, + "findings": []map[string]any{{ + "severity": "high", + "message": "review me", + }}, + }}, + }, + }}, + }), nil + case strings.Contains(query, "output.cleanr_sync"): + return jsonCLIResponse(t, 200, map[string]any{ + "data": []map[string]any{{ + "cleanr_sync": map[string]any{ + "version": "v1alpha1", + "source": "braintrust", + "config_patch": map[string]any{ + "operations": []map[string]any{ + { + "op": "set", + "path": "suites.token_optimization.max_output_tokens", + "value": 256, + }, + { + "op": "set", + "path": "scenarios[name=happy-path].system", + "value": "Use the verified password reset flow.", + }, + }, + }, + }, + }}, + }), nil + default: + t.Fatalf("unexpected btql query: %s", query) + return nil, nil + } + case req.Method == http.MethodGet && req.URL.Path == "/v1/experiment/exp-2/summarize": + return jsonCLIResponse(t, 200, map[string]any{ + "experiment_url": "https://braintrust.dev/app/cleanr-ci/build-2", + }), nil + default: + t.Fatalf("unexpected request: %s %s", req.Method, req.URL.String()) + return nil, nil + } + })) + defer restore() + + var stdout bytes.Buffer + var stderr bytes.Buffer + exitCode := cli.Run([]string{ + "sync", "braintrust", + "-config", configPath, + "-output-insights", "reports/braintrust.insights.yaml", + "-output-dataset", "reports/braintrust.dataset.yaml", + "-output-config", "cleanr.synced.yaml", + "-approve-insights", + }, &stdout, &stderr) + if exitCode != 0 { + t.Fatalf("expected sync success, code=%d stderr=%s", exitCode, stderr.String()) + } + + insightsPath := filepath.Join(dir, "reports", "braintrust.insights.yaml") + insights, err := cleanr.LoadBraintrustInsightDatasetFile(insightsPath) + if err != nil { + t.Fatalf("load insights: %v", err) + } + if insights.BuildID != "build-2" || insights.ExperimentURL != "https://braintrust.dev/app/cleanr-ci/build-2" { + t.Fatalf("unexpected insights metadata: %+v", insights) + } + if insights.ScenarioDataset == nil || len(insights.ScenarioDataset.Scenarios) != 1 { + t.Fatalf("expected one replay-derived scenario, got %+v", insights.ScenarioDataset) + } + + datasetPath := filepath.Join(dir, "reports", "braintrust.dataset.yaml") + dataset, err := cleanr.LoadScenarioDatasetFile(datasetPath) + if err != nil { + t.Fatalf("load dataset: %v", err) + } + if len(dataset.Scenarios) != 1 || dataset.Scenarios[0].Scenario.Name != "happy-path" { + t.Fatalf("unexpected synced dataset: %+v", dataset) + } + + syncedConfigPath := filepath.Join(dir, "cleanr.synced.yaml") + syncedCfg, err := cleanr.LoadConfigFile(syncedConfigPath) + if err != nil { + t.Fatalf("load synced config: %v", err) + } + if syncedCfg.Suites.TokenOptimization.MaxOutputTokens != 256 { + t.Fatalf("expected patched token threshold, got %+v", syncedCfg.Suites.TokenOptimization) + } + var happyPath cleanr.Scenario + for _, scenario := range syncedCfg.Scenarios { + if scenario.Name == "happy-path" { + happyPath = scenario + break + } + } + if happyPath.System != "Use the verified password reset flow." { + t.Fatalf("expected scenario system patch, got %+v", happyPath) + } + if !strings.Contains(strings.Join(happyPath.Tags, ","), "regression") { + t.Fatalf("expected regression tag after replay sync, got %+v", happyPath.Tags) + } +} + +func TestSyncBraintrustCommandRequiresApprovalForReviewRequiredInsights(t *testing.T) { + cfg := cleanr.ExampleConfig() + cfg.Integrations.TrendSources = []cleanr.TrendSourceConfig{{ + Type: "braintrust", + Project: "qa-gates", + Experiment: "cleanr-ci", + }} + + dir := t.TempDir() + configPath := filepath.Join(dir, "cleanr.yaml") + if err := cleanr.WriteConfigFile(configPath, cfg); err != nil { + t.Fatalf("write config: %v", err) + } + + restore := stubCLITransport(t, cliRoundTripperFunc(func(req *http.Request) (*http.Response, error) { + switch { + case req.Method == http.MethodGet && req.URL.Path == "/v1/experiment": + return jsonCLIResponse(t, 200, map[string]any{ + "objects": []map[string]any{{ + "id": "exp-3", + "project_id": "proj-1", + "name": "cleanr-ci/build-3", + "created": "2026-05-28T12:00:00Z", + }}, + }), nil + case req.Method == http.MethodPost && req.URL.Path == "/btql": + body := decodeCLIRequestBody(t, req) + query := body["query"].(string) + if strings.Contains(query, "output.replay_artifact") { + return jsonCLIResponse(t, 200, map[string]any{"data": []map[string]any{}}), nil + } + return jsonCLIResponse(t, 200, map[string]any{ + "data": []map[string]any{{ + "cleanr_sync": map[string]any{ + "version": "v1alpha1", + "review_required": true, + "config_patch": map[string]any{ + "operations": []map[string]any{{ + "op": "set", + "path": "suites.token_optimization.max_output_tokens", + "value": 128, + }}, + }, + }, + }}, + }), nil + case req.Method == http.MethodGet && req.URL.Path == "/v1/experiment/exp-3/summarize": + return jsonCLIResponse(t, 200, map[string]any{}), nil + default: + t.Fatalf("unexpected request: %s %s", req.Method, req.URL.String()) + return nil, nil + } + })) + defer restore() + + var stdout bytes.Buffer + var stderr bytes.Buffer + exitCode := cli.Run([]string{ + "sync", "braintrust", + "-config", configPath, + "-output-config", "cleanr.synced.yaml", + }, &stdout, &stderr) + if exitCode != 2 { + t.Fatalf("expected approval failure, code=%d stdout=%s stderr=%s", exitCode, stdout.String(), stderr.String()) + } + if !strings.Contains(stderr.String(), "requires explicit review") { + t.Fatalf("unexpected stderr: %s", stderr.String()) + } +} + +func TestSyncBraintrustCommandCanCreateGitHubPR(t *testing.T) { + cfg := cleanr.ExampleConfig() + cfg.Integrations.TrendSources = []cleanr.TrendSourceConfig{{ + Type: "braintrust", + Project: "qa-gates", + Experiment: "cleanr-ci", + }} + + dir := t.TempDir() + configPath := filepath.Join(dir, "cleanr.yaml") + if err := cleanr.WriteConfigFile(configPath, cfg); err != nil { + t.Fatalf("write config: %v", err) + } + logPath := filepath.Join(dir, "commands.log") + binDir := filepath.Join(dir, "bin") + if err := os.MkdirAll(binDir, 0o755); err != nil { + t.Fatalf("mkdir bin: %v", err) + } + for _, name := range []string{"git", "gh"} { + script := "#!/bin/sh\n" + + "echo \"" + name + " $@\" >> \"" + logPath + "\"\n" + if err := os.WriteFile(filepath.Join(binDir, name), []byte(script), 0o755); err != nil { + t.Fatalf("write %s stub: %v", name, err) + } + } + originalPath := os.Getenv("PATH") + t.Setenv("PATH", binDir+string(os.PathListSeparator)+originalPath) + + restore := stubCLITransport(t, cliRoundTripperFunc(func(req *http.Request) (*http.Response, error) { + switch { + case req.Method == http.MethodGet && req.URL.Path == "/v1/experiment": + return jsonCLIResponse(t, 200, map[string]any{ + "objects": []map[string]any{{ + "id": "exp-4", + "project_id": "proj-1", + "name": "cleanr-ci/build-4", + "created": "2026-05-28T12:00:00Z", + }}, + }), nil + case req.Method == http.MethodPost && req.URL.Path == "/btql": + body := decodeCLIRequestBody(t, req) + query := body["query"].(string) + if strings.Contains(query, "output.replay_artifact") { + return jsonCLIResponse(t, 200, map[string]any{"data": []map[string]any{}}), nil + } + return jsonCLIResponse(t, 200, map[string]any{ + "data": []map[string]any{{ + "cleanr_sync": map[string]any{ + "version": "v1alpha1", + "config_patch": map[string]any{ + "operations": []map[string]any{{ + "op": "set", + "path": "suites.token_optimization.max_output_tokens", + "value": 512, + }}, + }, + }, + }}, + }), nil + case req.Method == http.MethodGet && req.URL.Path == "/v1/experiment/exp-4/summarize": + return jsonCLIResponse(t, 200, map[string]any{ + "experiment_url": "https://braintrust.dev/app/cleanr-ci/build-4", + }), nil + default: + t.Fatalf("unexpected request: %s %s", req.Method, req.URL.String()) + return nil, nil + } + })) + defer restore() + + var stdout bytes.Buffer + var stderr bytes.Buffer + exitCode := cli.Run([]string{ + "sync", "braintrust", + "-config", configPath, + "-output-config", "cleanr.synced.yaml", + "-create-pr", + "-pr-branch", "cleanr-sync-branch", + "-pr-title", "Sync Braintrust insights", + "-pr-body", "Apply reviewed Braintrust insights.", + "-commit-message", "cleanr sync commit", + }, &stdout, &stderr) + if exitCode != 0 { + t.Fatalf("expected sync pr success, code=%d stderr=%s", exitCode, stderr.String()) + } + + logBody, err := os.ReadFile(logPath) + if err != nil { + t.Fatalf("read command log: %v", err) + } + logText := string(logBody) + for _, want := range []string{ + "git checkout -b cleanr-sync-branch", + "git add", + "git commit -m cleanr sync commit", + "gh pr create --title Sync Braintrust insights --body Apply reviewed Braintrust insights.", + } { + if !strings.Contains(logText, want) { + t.Fatalf("expected %q in command log:\n%s", want, logText) + } + } +} + func TestRunCommandPersistsTrendHistory(t *testing.T) { cfg := cleanr.ExampleConfig() cfg.Scenarios = []cleanr.Scenario{{