From 2e130704952a2b00d00019313c77cf265eec5a45 Mon Sep 17 00:00:00 2001 From: igor-ctrl Date: Fri, 8 May 2026 12:13:37 -0500 Subject: [PATCH 1/4] feat(team-deployment): bcli doctor, signed bundles, query metadata, records view MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 — `bcli doctor` (diagnostics): Self-rescue command for team installs. 11 independent checks (active profile, tenant, env, company, auth mode, profile-scoped token cache, registry, field coverage, saved queries, team bundle, BC connectivity). --json for monitoring scripts; non-zero exit on any FAIL. Connectivity uses httpx with trust_env=True so corporate proxies are honored. Phase 2 — team bundle infra (`bcli config refresh`/`make-bundle`/etc.): Manifest schema, fetch over HTTPS (file:// gated behind BCLI_DEV=1), per-file content-hash + canonical roll-up verifier, atomic apply with per-profile flock + .previous backup retention, rollback. Hardening: allowlisted file set, no symlinks/devices, bounded compressed/decom- pressed sizes (gzip-bomb defense), traversal-rejecting verifier. field_lists.json is merged into the registry on apply so the prewarm isn't a dead artifact. NOTE: Sha256Verifier is internal-consistency only, not publisher authentication — phase 2 cannot ship to finance without a real cryptographic signer at the Verifier seam OR strict HTTPS+org-auth distribution. Gap is loud in plan + verifier docstring. Phase 3 — saved-query metadata + `bcli q` discovery: Additive YAML keys (aliases, tags, owner, freshness, examples, related). New sub-verbs `bcli q list [tag=...] [owner=...]`, `bcli q search `, `bcli q info `, `bcli q run ` (escape hatch). Reserved sub-verb names hard-error at bundle load. Substring + alias + tag scoring, no embeddings — codex consult was right that boring metadata wins for a 50-100 query library. Phase 6 — Windows PowerShell wide-table fix: New `records` formatter (vertical, psql-\\x style). Auto-promotion when result is 1-2 rows with >8 columns or wider than the terminal — only when format was auto-detected, never when user passed -f explicitly (auto_format flag threaded through CLIState). Plan: docs/plans/team-deployment.md (drafted earlier; updated to fix schema mismatch and call out the signing ship-blocker explicitly). Tests: 593 passing (+45 new across 4 phases). Includes concurrency test for the apply lock, gzip-bomb rejection, traversal rejection in fetch and verify paths, file://-blocked-without-dev, profile-scoped token cache regression, and explicit-format-disables-auto-records. Codex review (high reasoning) found 2 P1 + 8 P2 issues; all fixed in this commit except the publisher-signing P1 which is correctly deferred to a phase-2-extension and now flagged as a finance-rollout ship-blocker in the plan. --- docs/plans/team-deployment.md | 304 ++++++++++++ src/bcli/bundle/__init__.py | 47 ++ src/bcli/bundle/_apply.py | 323 +++++++++++++ src/bcli/bundle/_fetch.py | 197 ++++++++ src/bcli/bundle/_manifest.py | 127 +++++ src/bcli/bundle/_publish.py | 110 +++++ src/bcli/bundle/_verify.py | 168 +++++++ src/bcli/diagnostics/__init__.py | 26 ++ src/bcli/diagnostics/_checks.py | 510 +++++++++++++++++++++ src/bcli/workflow/_query_search.py | 154 +++++++ src/bcli_cli/_state.py | 5 + src/bcli_cli/app.py | 3 + src/bcli_cli/commands/config_cmd.py | 10 + src/bcli_cli/commands/doctor_cmd.py | 171 +++++++ src/bcli_cli/commands/get_cmd.py | 3 +- src/bcli_cli/commands/query_cmd.py | 280 ++++++++++- src/bcli_cli/commands/refresh_cmd.py | 298 ++++++++++++ src/bcli_cli/output/_formatters.py | 106 ++++- tests/test_bundle/__init__.py | 0 tests/test_bundle/test_bundle_roundtrip.py | 394 ++++++++++++++++ tests/test_cli/test_records_format.py | 139 ++++++ tests/test_diagnostics/__init__.py | 0 tests/test_diagnostics/test_checks.py | 307 +++++++++++++ tests/test_workflow/test_query_search.py | 140 ++++++ 24 files changed, 3798 insertions(+), 24 deletions(-) create mode 100644 docs/plans/team-deployment.md create mode 100644 src/bcli/bundle/__init__.py create mode 100644 src/bcli/bundle/_apply.py create mode 100644 src/bcli/bundle/_fetch.py create mode 100644 src/bcli/bundle/_manifest.py create mode 100644 src/bcli/bundle/_publish.py create mode 100644 src/bcli/bundle/_verify.py create mode 100644 src/bcli/diagnostics/__init__.py create mode 100644 src/bcli/diagnostics/_checks.py create mode 100644 src/bcli/workflow/_query_search.py create mode 100644 src/bcli_cli/commands/doctor_cmd.py create mode 100644 src/bcli_cli/commands/refresh_cmd.py create mode 100644 tests/test_bundle/__init__.py create mode 100644 tests/test_bundle/test_bundle_roundtrip.py create mode 100644 tests/test_cli/test_records_format.py create mode 100644 tests/test_diagnostics/__init__.py create mode 100644 tests/test_diagnostics/test_checks.py create mode 100644 tests/test_workflow/test_query_search.py diff --git a/docs/plans/team-deployment.md b/docs/plans/team-deployment.md new file mode 100644 index 0000000..4d80a44 --- /dev/null +++ b/docs/plans/team-deployment.md @@ -0,0 +1,304 @@ +# Team Deployment Plan + +Status: draft. Target: finance team (~10 users) + engine technical team (~10 users) on the existing scoped-profile substrate. + +## Why this plan exists + +bcli is moving from solo developer to a real team rollout. The current substrate already supports it (sandboxed profiles, curated registries, scoped saved queries, device-code auth, read-only-by-permission-set), but three workflow gaps will dominate first-month support load: + +1. **No team-wide registry/query distribution.** Whoever last updated the JSON wins. New hires re-discover everything. +2. **No diagnostic surface.** Users with broken setups can't self-rescue. "Wrong profile / stale bundle / not authenticated / wrong company" will be every other support ticket. +3. **No discoverability for the saved-query library.** Finance ops will not learn YAML. They will ask "is there a query for overdue intercompany invoices?" via email. + +This plan ships three boring high-leverage things to address (1)–(3), explicitly defers Redis and response caching until telemetry justifies them, and locks in trigger conditions so the deferral doesn't become indefinite. + +## Phase 0 — pre-flight (this sprint) + +- Decide bundle storage backend: pick whichever of `S3`, `Azure Blob`, or `GitHub Releases` the org already authenticates to cleanly. Decision lives in `docs/plans/team-deployment.md` once made. +- Identify two bundle owners: one finance, one engine-tech. They are the publish path. +- Land a minimal telemetry sink config so phase 4 has data when it's time. The pluggable `[telemetry]` substrate already exists at `src/bcli/telemetry/`; pick `console` for dev, set up Azure Monitor or a custom HTTP sink for prod. Capture `bcli.command`, `bcli.query`, `bcli.error` at minimum. **Do not** capture filter text or UPN unless privacy review approves. + +## Phase 1 — `bcli doctor` (ships first) + +Self-rescue command for non-technical users. This alone will eliminate most week-one tickets. + +### Surface + +``` +bcli doctor [--profile ] [--json] +``` + +Default output: human-readable, color-coded (green/yellow/red per check), with a one-line verdict at the bottom (`OK`, `WARN`, or `FAIL`). `--json` for scripting. Non-zero exit on `FAIL`. + +### Checks + +| Check | Source | Fail condition | +|---|---|---| +| Active profile | `CLIState` resolved profile | Missing or unknown profile name | +| Bundle version | `manifest.json` `version` field (phase 2) | Missing manifest, or `last_refresh > 30d` | +| Signature verified | bundle signature check (phase 2) | Signature missing or invalid | +| Last refresh time | bundle metadata | > 7d warn, > 30d fail | +| Registry endpoint count | `EndpointRegistry.list_all()` | 0 endpoints in scoped profile | +| Saved query count | `queries/.yaml` | File missing in scoped profile | +| Field-list coverage | count of endpoints with `field_names` populated | Warn under 50% for scoped profiles | +| Auth mode | profile config | Unknown mode | +| Auth status | non-blocking probe of cached token | Expired and no refresh path | +| Company | `--company` resolution | No default and no override available | +| Environment | profile config | Missing | +| Tenant ID | profile config | Missing | +| BC connectivity | one-shot `GET companies` with 5s timeout | Non-2xx | +| Local overlay present | overlay file exists (phase 2) | Informational only | + +### Output sketch + +``` +bcli doctor — profile: finance + + ✓ Active profile finance + ✓ Bundle version 2026.05.07-1 (signed by ops-bcli-bot) + ✓ Last refresh 2 days ago + ✓ Registry 42 endpoints, 38 with field lists (90%) + ✓ Saved queries 17 queries + ✓ Auth device_code, token valid for 47 min + ✓ Tenant contoso.onmicrosoft.com + ✓ Environment production + ✓ Company BTALI (default) + ✓ BC connectivity reachable, 312 ms + ⚠ Field coverage 2 endpoints below 80% (run `bcli endpoint fields ...`) + + Verdict: OK +``` + +### Files to add/modify + +- New: `src/bcli_cli/commands/doctor_cmd.py` +- New: `src/bcli/diagnostics/_checks.py` (testable check primitives, returns `CheckResult` with status + message) +- Modify: `src/bcli_cli/app.py` to register the command group +- New: `tests/test_diagnostics/test_checks.py` + +### Done when + +- `bcli doctor` runs in under 3 seconds for a healthy install. +- Each check is independently unit-tested with parametrized fail cases. +- Engine-tech and finance both ran it on their own laptop and the output made sense without explanation. + +## Phase 2 — signed bundle distribution + +### Bundle layout + +``` +finance-2026.05.07-1.tar.gz + manifest.json + registry.json # mirrors current ~/.config/bcli/registries/.json + queries.yaml # mirrors current ~/.config/bcli/queries/.yaml + field_lists.json # pre-warmed field discovery (avoids first-touch tax) + README.md # human notes for this bundle version +``` + +`manifest.json`: + +```json +{ + "schema_version": 1, + "profile": "finance", + "version": "2026.05.07-1", + "published_at": "2026-05-07T14:32:00Z", + "publisher": "ops-bcli-bot", + "checksum_sha256": "…", + "signature": "…", + "min_bcli_version": "0.3.0", + "previous_version": "2026.04.30-2", + "release_notes": "Added overdue-ic and posted-invoice-by-id queries" +} +``` + +### Storage + transport + +- Backend: signed HTTPS, single source of truth per profile. Bundles published to versioned object keys, e.g. `https://bundles.example.com/bcli/finance/2026.05.07-1.tar.gz` with a `latest.json` pointer for resolution. The org-specific URL lives in `~/.config/bcli/config.toml` as `[bundle.finance] url = "..."` so different teams can self-host. +- Signing: detached signature (`minisign` or `cosign`, decision pending) with the public key shipped in the user's profile config. Refresh fails closed if signature does not verify. +- **Not** `git pull`. Maintainers can author bundles in a private GitHub repo and ship via Release assets, but the user-facing transport is a signed tarball over HTTPS. + +### `bcli config refresh` UX + +``` +bcli config refresh # refresh active profile +bcli config refresh --profile engine-tech # explicit profile +bcli config refresh --dry-run # show diff vs local, no writes +bcli config refresh --rollback # restore previous version +bcli config refresh --check # exit code only, no output (cron-friendly) +``` + +Non-interactive, atomic. Output: + +``` +Refreshing finance from https://bundles.example.com/bcli/finance/latest.json + Current: 2026.04.30-2 + Latest: 2026.05.07-1 (published 2 days ago by ops-bcli-bot) + Verifying signature… ok + Diff: + + 2 endpoints (postedSalesInvoices, salesInvoiceLines) + + 3 saved queries (overdue-ic, posted-by-id, customer-aging) + ~ 1 query updated (open-pos: added customer parameter) + Applied. Previous version retained at ~/.config/bcli/registries/finance.2026.04.30-2.json +``` + +### Overlay semantics + +- Team bundle is **authoritative** for scoped profiles. `bcli config refresh` overwrites `registry.json` and `queries.yaml` atomically (write-temp + rename). Previous version retained for one rollback. +- Local overlay file `~/.config/bcli/overlays/.yaml` exists *only if* the profile config has `allow_local_overrides = true`. Off by default for sandboxed domain profiles. +- Effective view: team bundle merged with overlay, **team wins on name conflicts**. No interactive merge prompts ever. Non-technical users never see a conflict. +- `bcli endpoint fields` discovery for sandboxed profiles writes to overlay if enabled, otherwise informs the user to send the discovered fields to their bundle owner. + +### Files to add/modify + +- New: `src/bcli/bundle/__init__.py` (manifest schema, signature verification, atomic apply) +- New: `src/bcli/bundle/_fetch.py`, `_verify.py`, `_apply.py`, `_rollback.py` +- New: `src/bcli_cli/commands/refresh_cmd.py` (registered as `bcli config refresh`) +- Modify: `src/bcli/config/_loader.py` to compose registry from team bundle + optional overlay +- Modify: `src/bcli/registry/_registry.py` to load from the new layered path +- New: `examples/bundles/sample-bundle.tar.gz` + a `make-bundle` script for admins +- New: `docs/team-bundles.md` covering the publish workflow + +### Done when + +- An admin can produce a signed bundle with one command and publish it. +- A finance user can run `bcli config refresh` cold and have a working setup in under 30 seconds. +- `--rollback` restores the previous version verifiably. +- Tampered bundle is rejected with a clear error, not silently applied. + +## Phase 3 — query metadata extension + +Saved queries get richer descriptive metadata so substring + tag search beats the discoverability problem without embeddings. + +### YAML schema additions + +```yaml +queries: + overdue-ic: + description: Overdue intercompany invoices for a vendor + aliases: [overdue-intercompany, ic-overdue, ar-overdue-ic] + tags: [period-close, ap, intercompany] + owner: finance-ops + freshness: live # one of: live, daily, reference + examples: + - bcli q overdue-ic vendor=ACME-IC + - bcli q overdue-ic vendor=ACME-IC days=30 + related: [open-invoices, vendor-aging] + params: + vendor: { required: true, hint: "BC Vendor No." } + days: { default: 30, hint: "Days overdue" } + # Query body lives at the top level (matches the runtime — there is + # no `odata:` wrapper). The metadata block above is purely for + # discoverability; nothing in it changes how the query executes. + endpoint: vendorLedgerEntries + filter: "vendorNumber eq '${{ params.vendor }}' and dueDate lt now sub '${{ params.days }}d' and remainingAmount gt 0" + orderby: dueDate +``` + +### Search surface + +``` +bcli q list # all queries, table +bcli q list --tag period-close # filter by tag +bcli q list --owner finance-ops # filter by owner +bcli q search "overdue invoices" # substring + alias + description match +bcli q info overdue-ic # full metadata view +``` + +`bcli q search` ranks by: exact name > alias hit > tag hit > description substring > example substring. No embeddings. Honest "no match, did you mean X" output when the score floor isn't met. + +### Files to add/modify + +- Modify: `src/bcli/workflow/` query schema (extend Pydantic model) +- Modify: `src/bcli_cli/commands/query_cmd.py` to add `list`, `search`, `info` subcommands +- New: `tests/test_workflow/test_query_metadata.py` +- Update: `docs/saved-queries.md` with the schema additions +- Migration: existing queries without the new fields keep working (all new fields optional) + +### Done when + +- Existing queries still run unchanged. +- `bcli q search` finds an existing query when the user types a plausible NL phrase. +- Finance ops can browse the catalog by tag without reading YAML. + +## Phase 4 — response caching (deferred, telemetry-gated) + +**Do not ship until all three triggers are met:** + +1. Telemetry shows P95 latency for posted-invoice / open-PO endpoints exceeding 2s under finance close-week load. +2. Telemetry shows ≥ 5% of GETs returning 429 / 503 during close week. +3. The two bundle owners agree the workflow pain is real, not theoretical. + +If shipped: + +- Backend: `hishel`-backed disk cache around `httpx`, **not** Redis. Single-process. Lives at `~/.config/bcli/cache/`. +- Cache key composition: `tenant_id + environment + company + profile + resolved_url + sorted(query_params) + select_hash`. Never less. +- TTL ceilings (max — actual values configurable per endpoint, can be lower): + - Vendor balances: no cache by default; 5-15s if forced, output labeled `(cached 8s ago)` + - Open POs / open invoices: 15-60s + - Inventory / utilization / preservation status: 10-60s + - Posted invoice **list** queries: 60-300s only when filtered to a closed period + - Posted invoice / journal entry **by exact record ID**: 1-24h (immutable post-posting) +- Never cache `--all`. Never cache write-adjacent commands. Cache hits are visible in output and structured logs. +- Opt-in per profile via `[cache] enabled = true`. + +## Phase 5 — Redis-for-AI (deferred, condition-gated) + +**Do not ship until at least one of:** + +1. A centralized `bcli-mcp` service is running for multiple agents/users (cross-process state stops being free). +2. Saved-query library exceeds 200 entries with measurable search misses in telemetry. +3. Field-list "did you mean" produces wrong suggestions ≥ 5% of attempts measurably. + +If shipped, the integration points are: + +- Vector "did you mean" over discovered field names (replaces substring fuzzy in `src/bcli/client/_async.py:469`). +- Vector search over saved queries by NL intent. Caches **query plans**, never query results. +- Shared field-discovery cache for the centralized MCP path. + +Backend: pluggable `cache_backend` with `redis` extra, mirroring the existing `[telemetry]` pattern. NullCache default. Redis is optional infrastructure. + +## Out of scope + +- Semantic caching of OData result data. Stale balances / inventory / posted-document state silently returned would destroy trust in bcli as a BC truth source. If we want a low-risk reference subset later, it lands as an explicit feature with `cached as of` labels in output, not a silent layer. +- Token sharing across users. Each user authenticates as themselves. The BC permission set is the security boundary, not the bcli flag. +- Replacing the existing per-profile registry JSON layout. Bundles are a publish-and-distribute layer on top, not a replacement. + +## Risks and open questions + +- **Phase 2 signing is a ship-blocker for finance rollout.** The current + `Sha256Verifier` only proves internal consistency: each file matches + its declared hash, and the manifest's roll-up matches the contents + map. It does NOT authenticate the publisher. A compromised CDN can + mint a malicious `registry.json`, recompute the hashes, and pass + verification. Before bundles go to finance, either ship a real + cryptographic signer (`minisign` / `cosign` / ed25519 + pinned key) + at the `bcli.bundle.Verifier` seam, or restrict bundle distribution + to private blob storage with org-level auth and treat HTTPS+auth as + the trust boundary. Document the choice; do not ship "verified" + without one of the two. +- **Signing key custody.** Who owns the bundle signing key, and how is it rotated when an owner leaves? Decide before phase 2 ships. +- **Bundle URL discovery.** First-time install needs to know where to refresh from. Likely `bcli config init --scoped --bundle-url ` extends the existing wizard. Verify this fits the wizard's current shape. +- **Field discovery in scoped profiles.** Today `bcli endpoint fields` writes back to the local registry. With overlay-off-by-default, sandboxed users can't improve their own setup. The plan: those discoveries get logged to a "candidate fields" file the user can email to their bundle owner. Better mechanism welcome. +- **Bundle drift between teams.** If finance and engine-tech import the same standard endpoint into both bundles and they diverge, which wins? Today: each profile is isolated. Keep it that way. +- **Telemetry privacy.** Phase 0 telemetry must not capture filter text or UPN by default. Confirm with the legal/privacy reviewer. + +## Validation gates per phase + +| Phase | Gate | +|---|---| +| 1 | `bcli doctor` runs on engine-tech and finance laptops cold; output makes sense to a non-developer | +| 2 | Bundle round-trip works: admin publishes, user runs `refresh`, signature verifies, rollback works | +| 3 | Existing queries unchanged; `q search "overdue invoices"` finds `overdue-ic` | +| 4 | Telemetry triggers met before any code is written | +| 5 | At least one of three triggers met before any code is written | + +## Sequencing summary + +1. **Now:** phase 0 (pick backend + telemetry sink) and phase 1 (`bcli doctor`). Two weeks. +2. **Next:** phase 2 (bundle distribution + `config refresh`). Three weeks. +3. **After phase 2 ships and bakes for two weeks:** phase 3 (query metadata + search). One week. +4. **On telemetry:** phase 4. Maybe never. +5. **On condition trigger:** phase 5. Maybe never. + +The honest read: phases 1–3 are most of the user-facing value. Phases 4–5 are the interesting features but only earn their cost under conditions we haven't observed yet. diff --git a/src/bcli/bundle/__init__.py b/src/bcli/bundle/__init__.py new file mode 100644 index 0000000..e573676 --- /dev/null +++ b/src/bcli/bundle/__init__.py @@ -0,0 +1,47 @@ +"""Team-shared registry / saved-query bundles. + +A bundle is a tarball published by an admin to a known HTTPS location and +pulled by team members with ``bcli config refresh``. The format is a thin +contract on top of the existing per-profile JSON / YAML files: the bundle +ships a ``manifest.json`` plus the same ``registry.json`` and +``queries.yaml`` shapes the user already has on disk, so an admin can +hand-author a bundle without learning a new schema. + +Phase 2 ships the format, fetch / verify / apply / rollback primitives, and +the ``bcli config refresh`` command. Cryptographic signing is gated behind +a ``Verifier`` protocol: today the default is a SHA-256 checksum (covers +in-flight tampering on a trusted CDN), and a real signing scheme +(``minisign`` or ``cosign``) plugs into the same seam once the team picks +one. +""" + +from bcli.bundle._apply import BundleApplyResult, apply_bundle, rollback_bundle +from bcli.bundle._fetch import BundleFetchError, fetch_bundle +from bcli.bundle._manifest import ( + Bundle, + BundleManifest, + BundleVerifyError, + load_local_manifest, +) +from bcli.bundle._verify import ( + NullVerifier, + Sha256Verifier, + Verifier, + verify_bundle, +) + +__all__ = [ + "Bundle", + "BundleApplyResult", + "BundleFetchError", + "BundleManifest", + "BundleVerifyError", + "NullVerifier", + "Sha256Verifier", + "Verifier", + "apply_bundle", + "fetch_bundle", + "load_local_manifest", + "rollback_bundle", + "verify_bundle", +] diff --git a/src/bcli/bundle/_apply.py b/src/bcli/bundle/_apply.py new file mode 100644 index 0000000..0f09ef7 --- /dev/null +++ b/src/bcli/bundle/_apply.py @@ -0,0 +1,323 @@ +"""Atomically apply a verified bundle to the user's config tree. + +Concurrency contract: two ``bcli config refresh`` runs against the same +profile may execute simultaneously (one in a terminal, one fired by an +agent). The apply step holds an advisory file lock on +``/.refresh.lock`` for its duration so the second +process serializes behind the first instead of racing on .previous / +.incoming temp names. ``flock`` is POSIX; on Windows we fall back to a +best-effort exclusive create on the same lock file. +""" + +from __future__ import annotations + +import contextlib +import json +import logging +import os +import sys +import tempfile +from dataclasses import dataclass +from pathlib import Path + +from bcli.bundle._manifest import Bundle, write_local_manifest + +logger = logging.getLogger("bcli.bundle.apply") + + +@dataclass(frozen=True) +class BundleApplyResult: + """Summary of what changed after a refresh. + + Returned to the CLI so the user can see exactly what their team bundle + just rewrote — registry, queries, both — and what the previous version + was, in case rollback is needed. + """ + + profile: str + new_version: str + previous_version: str + registry_changed: bool + queries_changed: bool + field_lists_changed: bool + + +def apply_bundle( + bundle: Bundle, + *, + registries_dir: Path, + queries_dir: Path, + bundle_dir: Path, +) -> BundleApplyResult: + """Apply ``bundle`` atomically; retain the previous version for rollback. + + Behaviour: + * A profile-scoped advisory lock serializes concurrent refreshes so + two processes can't race on the .previous / .incoming temp names. + * Each destination file is written via ``write-temp + replace`` — + per-process unique temp names, atomic ``Path.replace`` for the + final swap. A partial failure leaves the user on the prior version. + * The previous file is preserved at ``.previous`` so + :func:`rollback_bundle` can undo the apply without re-fetching. + * ``manifest.json`` is the *last* thing written. Its presence is the + signal "a bundle is installed" used by ``bcli doctor``. + * ``field_lists.json`` is merged into the registry on apply so the + registry loader (which only reads ``.json``) actually + sees the prewarmed field names. Bundles that don't ship field + lists leave the registry untouched. + """ + profile = bundle.manifest.profile + registries_dir.mkdir(parents=True, exist_ok=True) + queries_dir.mkdir(parents=True, exist_ok=True) + bundle_dir.mkdir(parents=True, exist_ok=True) + + lock_path = bundle_dir / f"{profile}.refresh.lock" + with _profile_lock(lock_path): + registry_dest = registries_dir / f"{profile}.json" + queries_dest = queries_dir / f"{profile}.yaml" + + registry_changed = False + queries_changed = False + field_lists_changed = False + + if bundle.has_registry(): + registry_payload = bundle.registry_path.read_bytes() + if bundle.has_field_lists(): + merged = _merge_field_lists( + registry_payload, bundle.field_lists_path.read_bytes() + ) + if merged is not None: + registry_payload = merged + field_lists_changed = True + registry_changed = _atomic_replace_bytes_with_backup( + registry_payload, registry_dest + ) + if bundle.has_queries(): + queries_changed = _atomic_replace_with_backup( + bundle.queries_path, queries_dest + ) + + previous = _previous_version(bundle_dir, profile) + # Back up the in-place manifest so rollback can restore the version + # marker too, not just the content files. Without this, a rollback + # would restore the registry but leave `bcli doctor` claiming the + # newer bundle version is installed. + manifest_path = bundle_dir / f"{profile}.manifest.json" + if manifest_path.is_file(): + backup = manifest_path.with_suffix(manifest_path.suffix + ".previous") + manifest_path.replace(backup) + write_local_manifest(bundle_dir, bundle.manifest) + + return BundleApplyResult( + profile=profile, + new_version=bundle.manifest.version, + previous_version=previous, + registry_changed=registry_changed, + queries_changed=queries_changed, + field_lists_changed=field_lists_changed, + ) + + +def rollback_bundle( + profile: str, + *, + registries_dir: Path, + queries_dir: Path, + bundle_dir: Path, +) -> bool: + """Restore the most recent ``.previous`` siblings for a profile. + + Returns ``True`` when at least one file was rolled back. Idempotent — + calling it twice in a row is harmless. + """ + candidates = [ + registries_dir / f"{profile}.json", + queries_dir / f"{profile}.yaml", + registries_dir / f"{profile}.field_lists.json", + ] + rolled_back = False + for path in candidates: + backup = path.with_suffix(path.suffix + ".previous") + if backup.is_file(): + backup.replace(path) + rolled_back = True + + manifest_path = bundle_dir / f"{profile}.manifest.json" + backup_manifest = manifest_path.with_suffix(manifest_path.suffix + ".previous") + if backup_manifest.is_file(): + backup_manifest.replace(manifest_path) + rolled_back = True + elif manifest_path.is_file() and not rolled_back: + # No prior backup at all — best we can do is wipe the manifest so + # `bcli doctor` reverts to "bundle not installed". + manifest_path.unlink() + rolled_back = True + + return rolled_back + + +# ─── helpers ────────────────────────────────────────────────────────── + + +def _atomic_replace_with_backup(src: Path, dest: Path) -> bool: + """Replace ``dest`` with ``src``, retaining the old one as ``.previous``. + + Returns ``True`` when the destination changed. Per-process unique + temp names (via :func:`tempfile.mkstemp`) keep concurrent processes + from clobbering each other's in-flight writes if the lock is somehow + bypassed — defense in depth on top of the profile lock. + """ + return _atomic_replace_bytes_with_backup(src.read_bytes(), dest) + + +def _atomic_replace_bytes_with_backup(payload: bytes, dest: Path) -> bool: + """Atomically write ``payload`` to ``dest`` with a ``.previous`` backup. + + Returns ``True`` when content changed; ``False`` for byte-identical + re-applies (no backup churn, no bogus "changed" line in the CLI + summary). + """ + if dest.is_file(): + existing = dest.read_bytes() + if existing == payload: + return False + backup = dest.with_suffix(dest.suffix + ".previous") + dest.replace(backup) + + fd, tmp_name = tempfile.mkstemp( + prefix=f"{dest.name}.", + suffix=".incoming", + dir=dest.parent, + ) + try: + with os.fdopen(fd, "wb") as f: + f.write(payload) + f.flush() + os.fsync(f.fileno()) + Path(tmp_name).replace(dest) + except Exception: + Path(tmp_name).unlink(missing_ok=True) + raise + return True + + +def _previous_version(bundle_dir: Path, profile: str) -> str: + """Read the in-place manifest before we overwrite it.""" + path = bundle_dir / f"{profile}.manifest.json" + if not path.is_file(): + return "" + try: + raw = json.loads(path.read_text(encoding="utf-8")) + return str(raw.get("version", "")) + except (OSError, json.JSONDecodeError): + return "" + + +def _merge_field_lists( + registry_bytes: bytes, field_lists_bytes: bytes +) -> bytes | None: + """Merge a ``field_lists.json`` map into ``registry.json``. + + The registry loader only reads ``.json``, so the bundle's + optional ``field_lists.json`` was a dead artifact until this step. + We update each endpoint's ``field_names`` in place with whatever the + bundle provides — the bundle's authoritative list wins, which is the + contract for team-managed bundles. + + Returns the merged JSON bytes, or ``None`` when either file is + unparseable (in which case the registry stays as-is and the apply + step proceeds without field-list prewarming — verifier already + validated content hashes, so unparseable here is genuinely + surprising). + """ + try: + registry = json.loads(registry_bytes.decode("utf-8")) + field_lists = json.loads(field_lists_bytes.decode("utf-8")) + except (UnicodeDecodeError, json.JSONDecodeError) as e: + logger.warning("field_lists merge skipped: %s", e) + return None + + if not isinstance(registry, dict) or not isinstance(field_lists, dict): + return None + + endpoints = registry.get("endpoints") + if not isinstance(endpoints, list): + return None + + overrides = field_lists.get("field_names") or field_lists + if not isinstance(overrides, dict): + return None + + changed = False + for entry in endpoints: + if not isinstance(entry, dict): + continue + name = entry.get("entity_set_name") + if isinstance(name, str) and name in overrides: + new_fields = overrides[name] + if isinstance(new_fields, list) and new_fields != entry.get("field_names"): + entry["field_names"] = list(new_fields) + changed = True + + if not changed: + return None + return json.dumps(registry, indent=2).encode("utf-8") + + +# ─── locking ────────────────────────────────────────────────────────── + + +@contextlib.contextmanager +def _profile_lock(lock_path: Path): + """Advisory lock to serialize concurrent refreshes for one profile. + + POSIX uses ``flock``. Windows falls back to a best-effort exclusive + file create with retries — not a real mutex, but sufficient to keep + two interactive runs from racing in the same minute. For real + multi-process safety on Windows, callers should serialize at a + higher layer. + """ + lock_path.parent.mkdir(parents=True, exist_ok=True) + if sys.platform == "win32": + yield from _win_lock(lock_path) + else: + yield from _posix_lock(lock_path) + + +def _posix_lock(lock_path: Path): + import fcntl + + f = open(lock_path, "w", encoding="utf-8") + try: + fcntl.flock(f.fileno(), fcntl.LOCK_EX) + try: + yield + finally: + fcntl.flock(f.fileno(), fcntl.LOCK_UN) + finally: + f.close() + + +def _win_lock(lock_path: Path): + import time + + deadline = time.monotonic() + 30.0 + while True: + try: + fd = os.open( + lock_path, + os.O_CREAT | os.O_EXCL | os.O_RDWR, + ) + break + except FileExistsError: + if time.monotonic() > deadline: + raise + time.sleep(0.2) + try: + yield + finally: + try: + os.close(fd) + finally: + with contextlib.suppress(FileNotFoundError): + lock_path.unlink() diff --git a/src/bcli/bundle/_fetch.py b/src/bcli/bundle/_fetch.py new file mode 100644 index 0000000..95d587e --- /dev/null +++ b/src/bcli/bundle/_fetch.py @@ -0,0 +1,197 @@ +"""Fetch a bundle tarball over HTTPS and extract it to a temp directory. + +Hardening notes — every limit here exists because the threat model is a +compromised CDN serving a malicious tarball to a finance laptop: + +* **Allowed file set.** Only ``manifest.json``, ``registry.json``, + ``queries.yaml``, ``field_lists.json``, and ``README.md`` are accepted. + Anything else aborts extraction. +* **No symlinks, no hardlinks, no devices.** Bundles ship plain files. +* **Bounded sizes.** Compressed payload, decompressed total, per-member, + and member count all have hard caps. A small archive that decompresses + to gigabytes is the canonical attack — bound at every layer. +* **Path traversal.** Member names are normalized; absolute paths and + ``..`` segments fail closed. + +These limits are deliberately set above any plausible legitimate bundle +(typical bundle is well under 1MB) but well below "OOM the laptop." Adjust +upward only after measuring real bundle sizes in production. +""" + +from __future__ import annotations + +import io +import json +import logging +import os +import shutil +import tarfile +import tempfile +from pathlib import Path +from urllib.parse import urlparse + +import httpx + +from bcli.bundle._manifest import Bundle, BundleManifest + +logger = logging.getLogger("bcli.bundle.fetch") + +# Hard ceilings. A real finance/engine bundle is small (KB-MB range); +# anything outside these is either misconfigured or hostile. +MAX_COMPRESSED_BYTES = 25 * 1024 * 1024 # 25 MB on the wire +MAX_TOTAL_EXTRACTED_BYTES = 100 * 1024 * 1024 # 100 MB after gunzip +MAX_PER_MEMBER_BYTES = 50 * 1024 * 1024 # 50 MB single file +MAX_MEMBER_COUNT = 64 + +# Allowlist of files the apply step actually consumes. Extra files in the +# tar are not just wasted bytes — they widen the attack surface for path +# tricks and surprise content. +_ALLOWED_BUNDLE_PATHS = frozenset( + { + "manifest.json", + "registry.json", + "queries.yaml", + "field_lists.json", + "README.md", + } +) + + +class BundleFetchError(Exception): + """Network, redirect, or extraction failure during bundle pull.""" + + +def fetch_bundle( + url: str, + *, + timeout: float = 30.0, + auth_header: str | None = None, + allow_file_url: bool | None = None, +) -> tuple[Bundle, bytes]: + """Download ``url``, extract it, and hand back the raw bytes. + + ``allow_file_url`` is the dev-only escape hatch for ``file://`` URLs. + Production callers must leave it ``None`` (the default), which only + accepts ``file://`` when ``BCLI_DEV=1`` is set in the environment. + Without that, only ``https://`` is allowed — the team contract. + """ + parsed = urlparse(url) + file_allowed = ( + allow_file_url + if allow_file_url is not None + else os.environ.get("BCLI_DEV") in ("1", "true", "yes") + ) + if parsed.scheme == "file": + if not file_allowed: + raise BundleFetchError( + "file:// URLs are dev-only; set BCLI_DEV=1 to opt in. " + "Production refresh expects an https:// URL pointing at a " + "signed bundle in your team's blob storage." + ) + elif parsed.scheme != "https": + raise BundleFetchError( + f"refusing to fetch over {parsed.scheme!r} — only https:// is " + "accepted (signed-HTTPS distribution is the team contract)" + ) + + headers = {"User-Agent": "bcli-bundle/1.0"} + if auth_header: + headers["Authorization"] = auth_header + + try: + if parsed.scheme == "file": + raw = Path(parsed.path).read_bytes() + else: + with httpx.Client(timeout=timeout, follow_redirects=True) as client: + resp = client.get(url, headers=headers) + resp.raise_for_status() + raw = resp.content + except (httpx.HTTPError, OSError) as e: + raise BundleFetchError(f"could not download {url}: {e}") from e + + if len(raw) > MAX_COMPRESSED_BYTES: + raise BundleFetchError( + f"bundle is {len(raw)} bytes, exceeds the {MAX_COMPRESSED_BYTES}-byte " + "compressed-size limit (gzip-bomb defense)" + ) + + extract_root = Path(tempfile.mkdtemp(prefix="bcli-bundle-")) + try: + bundle = _extract(raw, extract_root) + except Exception as e: # noqa: BLE001 + shutil.rmtree(extract_root, ignore_errors=True) + if isinstance(e, BundleFetchError): + raise + raise BundleFetchError(f"could not extract bundle: {e}") from e + + return bundle, raw + + +def _extract(raw: bytes, dest: Path) -> Bundle: + """Extract a tarball into ``dest`` with size + path + type guards.""" + with tarfile.open(fileobj=io.BytesIO(raw), mode="r:*") as tar: + members = tar.getmembers() + if len(members) > MAX_MEMBER_COUNT: + raise BundleFetchError( + f"bundle has {len(members)} members, max is {MAX_MEMBER_COUNT}" + ) + + total = 0 + for m in members: + _check_safe_member(m) + if m.size > MAX_PER_MEMBER_BYTES: + raise BundleFetchError( + f"member '{m.name}' is {m.size} bytes, max is " + f"{MAX_PER_MEMBER_BYTES} (per-file decompression-bomb defense)" + ) + total += m.size + if total > MAX_TOTAL_EXTRACTED_BYTES: + raise BundleFetchError( + f"bundle would extract to >{MAX_TOTAL_EXTRACTED_BYTES} bytes " + "(decompression-bomb defense)" + ) + + try: + tar.extractall(dest, filter="data") + except TypeError: + tar.extractall(dest) # noqa: S202 — pre-validated above + + manifest_path = dest / "manifest.json" + if not manifest_path.is_file(): + raise BundleFetchError("bundle is missing manifest.json at the root") + + raw_manifest = json.loads(manifest_path.read_text(encoding="utf-8")) + manifest = BundleManifest.model_validate(raw_manifest) + return Bundle(manifest=manifest, root=dest) + + +def _check_safe_member(member: tarfile.TarInfo) -> None: + """Reject anything that isn't a plain file at an allowlisted path. + + Defense-in-depth even though the verifier will catch most tampering + later: an extraction-time crash is cheaper to recover from than a + half-applied bundle, and the early reject keeps malicious tarballs + from touching disk at all. + """ + if not (member.isreg() or member.isdir()): + raise BundleFetchError( + f"unsafe member type in bundle: {member.name} " + f"(type={member.type!r}; only regular files and dirs allowed)" + ) + if member.issym() or member.islnk() or member.isdev(): + # Belt-and-suspenders — issym/islnk/isdev are subsumed by !isreg + # above but the check is cheap and the message is clearer. + raise BundleFetchError(f"links/devices not allowed: {member.name}") + + name = Path(member.name) + if name.is_absolute() or ".." in name.parts: + raise BundleFetchError(f"unsafe path in bundle: {member.name}") + + if member.isdir(): + return # directories don't need allowlist checks + rel = name.as_posix() + if rel not in _ALLOWED_BUNDLE_PATHS: + raise BundleFetchError( + f"unexpected file in bundle: {rel!r} " + f"(allowed: {sorted(_ALLOWED_BUNDLE_PATHS)})" + ) diff --git a/src/bcli/bundle/_manifest.py b/src/bcli/bundle/_manifest.py new file mode 100644 index 0000000..8141b3c --- /dev/null +++ b/src/bcli/bundle/_manifest.py @@ -0,0 +1,127 @@ +"""Bundle manifest schema and on-disk representation.""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path + +from pydantic import BaseModel, Field + + +class BundleVerifyError(Exception): + """Raised when a bundle fails checksum or signature verification. + + Always re-raise to abort the apply step; never silently fall back to + "use it anyway." The trust story for team bundles is the only thing + keeping a malicious upstream from injecting a custom endpoint pointing + at an attacker-controlled BC tenant. + """ + + +class BundleManifest(BaseModel): + """The ``manifest.json`` that lives at the root of every bundle. + + Schema is intentionally narrow: anything an admin needs to hand-edit + sits at the top level, anything bcli computes (checksum) is required + but plain. Forward-compatibility is handled via ``schema_version``; + bumps are major-version events for the bundle format itself, not for + bcli. + """ + + schema_version: int = 1 + profile: str + version: str + published_at: datetime + publisher: str = "" + checksum_sha256: str + signature: str = "" # detached signature, optional today + min_bcli_version: str = "" + previous_version: str = "" + release_notes: str = "" + contents: dict[str, str] = Field(default_factory=dict) + """Map of relative paths inside the bundle to their SHA-256 — populated + at publish time so we can detect partial-extraction corruption without + re-hashing the whole tarball.""" + + model_config = {"extra": "allow"} + + +@dataclass(frozen=True) +class Bundle: + """An extracted bundle ready to be applied.""" + + manifest: BundleManifest + root: Path + """Filesystem directory containing manifest.json plus the bundle's + files. Caller owns cleanup; the apply step does not delete the root.""" + + @property + def registry_path(self) -> Path: + return self.root / "registry.json" + + @property + def queries_path(self) -> Path: + return self.root / "queries.yaml" + + @property + def field_lists_path(self) -> Path: + return self.root / "field_lists.json" + + def has_registry(self) -> bool: + return self.registry_path.is_file() + + def has_queries(self) -> bool: + return self.queries_path.is_file() + + def has_field_lists(self) -> bool: + return self.field_lists_path.is_file() + + +def load_local_manifest(bundle_dir: Path, profile: str) -> BundleManifest | None: + """Load the currently-applied manifest for ``profile``, if any. + + Returns ``None`` when the user has never run ``bcli config refresh`` + for this profile. Returns ``None`` (rather than raising) on a corrupt + manifest so ``bcli doctor`` can flag it instead of crashing. + """ + path = bundle_dir / f"{profile}.manifest.json" + if not path.is_file(): + return None + try: + raw = json.loads(path.read_text(encoding="utf-8")) + return BundleManifest.model_validate(raw) + except (OSError, ValueError): + return None + + +def write_local_manifest(bundle_dir: Path, manifest: BundleManifest) -> Path: + """Write the applied manifest atomically. + + Uses ``write-temp + replace`` so a concurrent ``bcli doctor`` reading + the manifest never sees a half-written file. ``Path.replace`` is + atomic on POSIX and Windows, so a racing reader sees either the old + or the new manifest, never an empty / truncated one. + """ + import os + import tempfile + + bundle_dir.mkdir(parents=True, exist_ok=True) + final = bundle_dir / f"{manifest.profile}.manifest.json" + payload = manifest.model_dump_json(indent=2) + fd, tmp_name = tempfile.mkstemp( + prefix=f"{manifest.profile}.manifest.", + suffix=".tmp", + dir=bundle_dir, + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.write(payload) + f.flush() + os.fsync(f.fileno()) + Path(tmp_name).replace(final) + except Exception: + Path(tmp_name).unlink(missing_ok=True) + raise + return final diff --git a/src/bcli/bundle/_publish.py b/src/bcli/bundle/_publish.py new file mode 100644 index 0000000..4945e38 --- /dev/null +++ b/src/bcli/bundle/_publish.py @@ -0,0 +1,110 @@ +"""Author-side helper: build a signed bundle tarball from a directory tree. + +Lives in the SDK so admins can call it programmatically. The CLI surface +is ``bcli config make-bundle `` (registered alongside ``refresh``). +""" + +from __future__ import annotations + +import hashlib +import tarfile +from datetime import datetime, timezone +from io import BytesIO +from pathlib import Path + +from bcli.bundle._manifest import BundleManifest +from bcli.bundle._verify import canonical_roll_hash + + +def make_bundle( + source_dir: Path, + *, + profile: str, + version: str, + publisher: str = "", + release_notes: str = "", + previous_version: str = "", + output_path: Path | None = None, +) -> tuple[Path, BundleManifest]: + """Tar up ``source_dir`` and produce a signed-ish bundle. + + ``source_dir`` should contain at least ``registry.json``; ``queries.yaml`` + and ``field_lists.json`` are optional. The function: + + 1. Collects per-file SHA-256s into ``manifest.contents``. + 2. Builds the tarball in memory. + 3. Computes the archive checksum. + 4. Re-writes the manifest with the final checksum. + 5. Re-builds the tarball with the final manifest. + 6. Writes the result to ``output_path`` (or ``-.tar.gz``). + + The two-pass build (build / hash / re-build) keeps the manifest's + ``checksum_sha256`` referring to the *final* archive bytes — anything + else would either force the verifier to know which member to skip or + leave the user with a checksum that never matches. + """ + if not source_dir.is_dir(): + raise FileNotFoundError(f"bundle source dir not found: {source_dir}") + + files = _collect_files(source_dir) + if not files: + raise ValueError(f"no files found under {source_dir}") + + contents = {rel: _hash_bytes(b) for rel, b in files} + roll = canonical_roll_hash(contents) + + manifest = BundleManifest( + profile=profile, + version=version, + published_at=datetime.now(timezone.utc), + publisher=publisher, + checksum_sha256=roll, + previous_version=previous_version, + release_notes=release_notes, + contents=contents, + ) + + archive = _build_archive(files, manifest) + out = output_path or Path.cwd() / f"{profile}-{version}.tar.gz" + out.write_bytes(archive) + return out, manifest + + +# ─── helpers ────────────────────────────────────────────────────────── + + +def _collect_files(source_dir: Path) -> list[tuple[str, bytes]]: + """Walk ``source_dir`` and return [(relpath, bytes)] sorted by path.""" + out: list[tuple[str, bytes]] = [] + for path in sorted(source_dir.rglob("*")): + if not path.is_file(): + continue + # Skip anything that looks like build leftovers; admins shouldn't + # accidentally publish their .DS_Store or editor swap files. + if path.name.startswith(".") or path.name.endswith(("~", ".swp")): + continue + if path.name == "manifest.json": + # Authors hand-write contents.json siblings; the manifest is + # generated, not consumed. + continue + rel = str(path.relative_to(source_dir)) + out.append((rel, path.read_bytes())) + return out + + +def _build_archive(files: list[tuple[str, bytes]], manifest: BundleManifest) -> bytes: + buf = BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tar: + manifest_bytes = manifest.model_dump_json(indent=2).encode("utf-8") + info = tarfile.TarInfo("manifest.json") + info.size = len(manifest_bytes) + tar.addfile(info, BytesIO(manifest_bytes)) + for rel, data in files: + info = tarfile.TarInfo(rel) + info.size = len(data) + tar.addfile(info, BytesIO(data)) + return buf.getvalue() + + +def _hash_bytes(b: bytes) -> str: + return hashlib.sha256(b).hexdigest() diff --git a/src/bcli/bundle/_verify.py b/src/bcli/bundle/_verify.py new file mode 100644 index 0000000..b239158 --- /dev/null +++ b/src/bcli/bundle/_verify.py @@ -0,0 +1,168 @@ +"""Bundle verification — content checksum today, pluggable signature later. + +.. warning:: + + :class:`Sha256Verifier` is **not** an authenticity check. It proves + that the bundle on disk is internally consistent (each file matches + its declared hash, and the contents-roll-up hash matches the manifest + field). It does **not** prove the bundle came from the publisher. + + A compromised CDN can mint a malicious ``registry.json``, recompute + the per-file hashes, recompute the roll-up, and pass verification. + Until a real cryptographic signer (minisign / cosign / ed25519 + + pinned public key) lands at this seam, the team must distribute + bundles only via private blob storage with org-level auth and an + HTTPS-only contract. **Do not roll out to production without that + constraint, or without landing the signing upgrade.** + +The :class:`Verifier` protocol is the seam for cryptographic signing. +A real signer plugs in here without touching apply / fetch / publish — +refusing to ship phase 2 without a signer would have blocked the rest +of the rollout on a tooling decision that doesn't need to be made yet, +but tracking that decision as a phase-2-extension ship-blocker (see +``docs/plans/team-deployment.md`` "Risks") is mandatory. + +Why content hashes instead of "hash the tarball wire bytes": tarballs +encode file mtimes, ownership, and ordering, so the wire bytes aren't +deterministic across publish runs unless every TarInfo header is pinned. +Hashing per-file contents and sorting by path gives a deterministic +result that survives re-archiving while still catching any file-level +tampering. +""" + +from __future__ import annotations + +import hashlib +import json +from typing import Protocol + +from bcli.bundle._manifest import Bundle, BundleVerifyError + + +class Verifier(Protocol): + """Pluggable interface for bundle authenticity checks. + + An implementation must be safe to call repeatedly and must raise + :class:`BundleVerifyError` on any failure — never return False, never + log-and-continue. + """ + + def verify(self, bundle: Bundle, raw_archive: bytes | None = None) -> None: ... + + +class NullVerifier: + """Skip verification entirely. Only acceptable for local development.""" + + def verify(self, bundle: Bundle, raw_archive: bytes | None = None) -> None: + return None + + +class Sha256Verifier: + """Per-file content hash + canonical roll-up. + + Verifies that every file in the extracted bundle matches the SHA-256 + in the manifest's ``contents`` map, then recomputes the canonical + roll-up over those entries and compares it to ``checksum_sha256``. + Either step failing means a tampered or corrupt bundle. + """ + + def verify(self, bundle: Bundle, raw_archive: bytes | None = None) -> None: + manifest = bundle.manifest + if not manifest.contents: + raise BundleVerifyError( + "manifest.contents is empty; refusing to apply unverified bundle" + ) + expected_roll = (manifest.checksum_sha256 or "").lower().strip() + if not expected_roll: + raise BundleVerifyError( + "manifest.checksum_sha256 is missing; refusing to apply" + ) + + # Validate the contents map *before* opening any files. A + # malicious manifest can name `../etc/passwd` and we'd happily + # `bundle.root / rel` our way out of the extraction directory. + # Reject absolute paths, traversal, and anything outside the + # allowlist of files the apply step actually consumes. + for rel in manifest.contents: + _check_safe_relpath(rel) + + for rel, expected_hash in sorted(manifest.contents.items()): + file_path = bundle.root / rel + try: + resolved = file_path.resolve(strict=True) + root_resolved = bundle.root.resolve(strict=True) + resolved.relative_to(root_resolved) + except (FileNotFoundError, ValueError) as e: + raise BundleVerifyError( + f"manifest declares '{rel}' but the file is not safely " + f"contained in the bundle root ({e})" + ) from e + if not resolved.is_file(): + raise BundleVerifyError( + f"manifest declares '{rel}' but the file is not in the bundle" + ) + actual = _hash_bytes(resolved.read_bytes()) + if actual != expected_hash: + # Truncate hashes in error messages so a constant-time + # comparison would not be needed — we don't leak enough + # to grind a collision. + raise BundleVerifyError( + f"file '{rel}' content mismatch: manifest declared " + f"{expected_hash[:12]}…, got {actual[:12]}…" + ) + + roll = canonical_roll_hash(manifest.contents) + if roll != expected_roll: + raise BundleVerifyError( + f"manifest checksum mismatch: declared {expected_roll[:12]}…, " + f"computed {roll[:12]}… — manifest itself is tampered" + ) + + +_ALLOWED_CONTENT_PATHS = frozenset( + {"registry.json", "queries.yaml", "field_lists.json", "README.md"} +) + + +def _check_safe_relpath(rel: str) -> None: + """Reject manifest paths that are absolute, traverse, or unknown.""" + from pathlib import PurePosixPath + + p = PurePosixPath(rel) + if p.is_absolute() or ".." in p.parts: + raise BundleVerifyError(f"unsafe content path in manifest: {rel!r}") + if rel not in _ALLOWED_CONTENT_PATHS: + raise BundleVerifyError( + f"unexpected content path in manifest: {rel!r} " + f"(allowed: {sorted(_ALLOWED_CONTENT_PATHS)})" + ) + + +def verify_bundle( + bundle: Bundle, + *, + verifier: Verifier, + raw_archive: bytes | None = None, +) -> None: + """Run the configured verifier; re-raise on failure.""" + verifier.verify(bundle, raw_archive=raw_archive) + + +def canonical_roll_hash(contents: dict[str, str]) -> str: + """Hash the canonical-JSON form of the contents map. + + Both the publisher and the verifier compute this the same way: sorted + keys, no whitespace, ASCII-safe encoding. That makes the value + reproducible regardless of the dict-iteration order on either side. + """ + canonical = json.dumps( + {k: v for k, v in sorted(contents.items())}, + sort_keys=True, + separators=(",", ":"), + ensure_ascii=True, + ) + return hashlib.sha256(canonical.encode("utf-8")).hexdigest() + + +def _hash_bytes(b: bytes) -> str: + return hashlib.sha256(b).hexdigest() diff --git a/src/bcli/diagnostics/__init__.py b/src/bcli/diagnostics/__init__.py new file mode 100644 index 0000000..c18fbdd --- /dev/null +++ b/src/bcli/diagnostics/__init__.py @@ -0,0 +1,26 @@ +"""Self-rescue diagnostics for ``bcli doctor``. + +A team member with a broken setup should be able to run one command and see +exactly which check failed plus a one-line hint for fixing it. The check +primitives live here; the user-facing command lives in +``bcli_cli.commands.doctor_cmd``. + +Each check is independent, returns a :class:`CheckResult`, and never raises. +A failing check produces a ``fail`` result with a hint, not a traceback — +the whole point is that this command runs cleanly even when other parts of +the install are broken. +""" + +from bcli.diagnostics._checks import ( + CheckContext, + CheckResult, + CheckStatus, + run_all_checks, +) + +__all__ = [ + "CheckContext", + "CheckResult", + "CheckStatus", + "run_all_checks", +] diff --git a/src/bcli/diagnostics/_checks.py b/src/bcli/diagnostics/_checks.py new file mode 100644 index 0000000..d34eee4 --- /dev/null +++ b/src/bcli/diagnostics/_checks.py @@ -0,0 +1,510 @@ +"""Diagnostic check primitives for ``bcli doctor``. + +Each check is a small callable that takes a :class:`CheckContext` and returns +a :class:`CheckResult`. Checks never raise: a broken install must still +produce a clean report, otherwise the command is useless to the people who +need it most. Catch broadly, attribute the failure to the check, and move on. + +The check list is intentionally flat (no orchestrator, no dependency graph). +A skipped check is still a result with status ``info`` so the report shows +what was inspected. +""" + +from __future__ import annotations + +import json +import socket +from dataclasses import dataclass, field +from datetime import datetime, timezone +from enum import Enum +from pathlib import Path +from typing import TYPE_CHECKING, Any, Callable + +if TYPE_CHECKING: + from bcli.config import BCConfig, BCProfile + + +class CheckStatus(str, Enum): + OK = "ok" + WARN = "warn" + FAIL = "fail" + INFO = "info" + + +@dataclass(frozen=True) +class CheckResult: + """One row in the doctor report.""" + + name: str + status: CheckStatus + summary: str + hint: str = "" + detail: dict[str, Any] = field(default_factory=dict) + + @property + def is_fail(self) -> bool: + return self.status is CheckStatus.FAIL + + +@dataclass +class CheckContext: + """Inputs every check is allowed to read. + + Constructed once at the top of ``bcli doctor`` so individual checks stay + pure and trivially testable. ``config`` and ``profile`` are optional + because doctor must run on a totally broken install — that path returns + a single fail result and short-circuits. + """ + + config: BCConfig | None + profile: BCProfile | None + profile_name: str + bundle_dir: Path + token_cache_path: Path + queries_dir: Path + registries_dir: Path + bcli_version: str = "" + skip_network: bool = False + + +# ─── Individual checks ─────────────────────────────────────────────── + + +def check_active_profile(ctx: CheckContext) -> CheckResult: + if ctx.config is None: + return CheckResult( + "active profile", + CheckStatus.FAIL, + "no config loaded", + hint="run `bcli config init` to create one", + ) + if ctx.profile is None: + return CheckResult( + "active profile", + CheckStatus.FAIL, + f"profile '{ctx.profile_name}' not found", + hint=f"available: {', '.join(ctx.config.profiles) or '(none)'}", + ) + return CheckResult( + "active profile", + CheckStatus.OK, + ctx.profile_name, + ) + + +def check_tenant(ctx: CheckContext) -> CheckResult: + if ctx.profile is None: + return CheckResult("tenant", CheckStatus.INFO, "skipped (no profile)") + if not ctx.profile.tenant_id: + return CheckResult( + "tenant", + CheckStatus.FAIL, + "tenant_id missing on profile", + hint="add tenant_id to ~/.config/bcli/config.toml", + ) + return CheckResult("tenant", CheckStatus.OK, ctx.profile.tenant_id) + + +def check_environment(ctx: CheckContext) -> CheckResult: + if ctx.profile is None: + return CheckResult("environment", CheckStatus.INFO, "skipped (no profile)") + if not ctx.profile.environment: + return CheckResult( + "environment", + CheckStatus.FAIL, + "environment missing on profile", + hint="set `environment = \"production\"` (or sandbox name) in config.toml", + ) + return CheckResult("environment", CheckStatus.OK, ctx.profile.environment) + + +def check_company(ctx: CheckContext) -> CheckResult: + if ctx.profile is None: + return CheckResult("company", CheckStatus.INFO, "skipped (no profile)") + if not ctx.profile.company_id: + return CheckResult( + "company", + CheckStatus.WARN, + "no default company configured", + hint="run `bcli company list` then `bcli company use `", + ) + name = ctx.profile.company_name or "(unnamed)" + return CheckResult( + "company", + CheckStatus.OK, + f"{name} ({ctx.profile.company_id[:8]}…)", + ) + + +def check_auth_mode(ctx: CheckContext) -> CheckResult: + if ctx.profile is None: + return CheckResult("auth", CheckStatus.INFO, "skipped (no profile)") + mode = ctx.profile.auth_method or "" + if mode in {"client_credentials", "device_code", "browser"}: + return CheckResult("auth", CheckStatus.OK, mode) + if not mode: + return CheckResult( + "auth", + CheckStatus.FAIL, + "auth_method missing", + hint="set `auth_method = \"device_code\"` for finance/engine-tech profiles", + ) + return CheckResult( + "auth", + CheckStatus.WARN, + f"unknown auth_method '{mode}'", + hint="expected: client_credentials, device_code, or browser", + ) + + +def check_token_cache(ctx: CheckContext) -> CheckResult: + """Inspect the on-disk token cache for the active profile. + + The cache is keyed by ``:``. A common support + failure is "I have a valid token cached for some other tenant but not + the active one" — reporting "ok" in that case is misleading. Scope to + the active profile so the result reflects what the next CLI call will + actually use. + """ + if not ctx.token_cache_path.is_file(): + return CheckResult( + "token cache", + CheckStatus.INFO, + "no cached token (you will be prompted on next call)", + ) + try: + data = json.loads(ctx.token_cache_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as e: + return CheckResult( + "token cache", + CheckStatus.WARN, + f"unreadable: {type(e).__name__}", + hint=f"delete {ctx.token_cache_path} and re-authenticate", + ) + + if not isinstance(data, dict) or not data: + return CheckResult("token cache", CheckStatus.INFO, "empty") + + profile_key: str | None = None + if ctx.profile and ctx.profile.tenant_id and ctx.profile.client_id: + profile_key = f"{ctx.profile.tenant_id}:{ctx.profile.client_id}" + + now = datetime.now(timezone.utc) + + if profile_key is not None: + entry = data.get(profile_key) + if not isinstance(entry, dict): + return CheckResult( + "token cache", + CheckStatus.INFO, + "no cached token for active profile", + hint="next call will trigger sign-in", + ) + try: + expires_at = datetime.fromisoformat(entry["expires_at"]) + except (KeyError, ValueError): + return CheckResult( + "token cache", + CheckStatus.WARN, + "cached entry has no parseable expiry", + hint="run `bcli auth clear` then sign in again", + ) + if expires_at <= now: + return CheckResult( + "token cache", + CheckStatus.INFO, + "active profile's token has expired", + hint="next call will trigger sign-in", + ) + minutes = int((expires_at - now).total_seconds() / 60) + return CheckResult( + "token cache", + CheckStatus.OK, + f"valid for active profile, expires in {minutes} min", + ) + + # Fall back to the cache-wide summary when we don't have enough info + # to pick out the active profile's entry (e.g. profile load failed). + valid = 0 + expired = 0 + for entry in data.values(): + if not isinstance(entry, dict): + continue + try: + expires_at = datetime.fromisoformat(entry["expires_at"]) + except (KeyError, ValueError): + continue + if expires_at > now: + valid += 1 + else: + expired += 1 + if valid == 0 and expired > 0: + return CheckResult( + "token cache", + CheckStatus.INFO, + f"all {expired} cached tokens expired (profile-scope unavailable)", + hint="next call will trigger re-authentication", + ) + if valid == 0: + return CheckResult("token cache", CheckStatus.INFO, "no valid entries") + return CheckResult( + "token cache", + CheckStatus.INFO, + f"{valid} cached entries (profile-scope unavailable)", + ) + + +def check_registry(ctx: CheckContext) -> CheckResult: + if ctx.profile is None: + return CheckResult("registry", CheckStatus.INFO, "skipped (no profile)") + + try: + from bcli.registry._registry import EndpointRegistry + + reg = EndpointRegistry( + profile_name=ctx.profile_name, + disable_standard=ctx.profile.disable_standard_api, + allowed_categories=ctx.profile.allowed_categories or None, + allowed_endpoints=ctx.profile.allowed_endpoints or None, + ) + except Exception as e: # noqa: BLE001 — diagnostic, never re-raise + return CheckResult( + "registry", + CheckStatus.FAIL, + f"failed to load: {type(e).__name__}: {e}", + hint="run `bcli endpoint list --debug` for the full traceback", + ) + + custom = reg.custom_count + standard = reg.standard_count + total = custom + standard + if ctx.profile.disable_standard_api and custom == 0: + return CheckResult( + "registry", + CheckStatus.FAIL, + "scoped profile has zero custom endpoints", + hint=( + f"import a registry: `bcli registry import --from-json" + f" --profile {ctx.profile_name}`" + ), + ) + if total == 0: + return CheckResult( + "registry", + CheckStatus.FAIL, + "no endpoints available", + hint="check `disable_standard_api` and custom registry path", + ) + + summary = f"{custom} custom" + if not ctx.profile.disable_standard_api: + summary += f" + {standard} standard" + summary += " endpoints" + return CheckResult("registry", CheckStatus.OK, summary) + + +def check_field_coverage(ctx: CheckContext) -> CheckResult: + """For scoped profiles, count how many custom endpoints have field lists. + + Below 50% coverage is a warn — pre-flight `--filter` validation can't help + the user without field names, and "did you mean" suggestions degrade. + """ + if ctx.profile is None or not ctx.profile.disable_standard_api: + return CheckResult("field coverage", CheckStatus.INFO, "n/a (standard profile)") + + registry_file = ctx.registries_dir / f"{ctx.profile_name}.json" + if not registry_file.is_file(): + return CheckResult( + "field coverage", + CheckStatus.INFO, + "no custom registry file yet", + ) + try: + raw = json.loads(registry_file.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as e: + return CheckResult( + "field coverage", + CheckStatus.WARN, + f"registry unreadable: {type(e).__name__}", + ) + + endpoints = raw.get("endpoints", []) + if not endpoints: + return CheckResult("field coverage", CheckStatus.INFO, "no endpoints") + with_fields = sum(1 for e in endpoints if e.get("field_names")) + pct = int(100 * with_fields / len(endpoints)) + summary = f"{with_fields}/{len(endpoints)} endpoints ({pct}%)" + if pct < 50: + return CheckResult( + "field coverage", + CheckStatus.WARN, + summary, + hint="run `bcli endpoint fields ` on heavy-use endpoints to populate", + ) + return CheckResult("field coverage", CheckStatus.OK, summary) + + +def check_saved_queries(ctx: CheckContext) -> CheckResult: + queries_file = ctx.queries_dir / f"{ctx.profile_name}.yaml" + if not queries_file.is_file(): + if ctx.profile is not None and ctx.profile.disable_standard_api: + return CheckResult( + "saved queries", + CheckStatus.WARN, + "no saved-query file for scoped profile", + hint=f"create {queries_file} or pull from team bundle", + ) + return CheckResult("saved queries", CheckStatus.INFO, "none defined") + try: + import yaml # type: ignore[import-untyped] + + raw = yaml.safe_load(queries_file.read_text(encoding="utf-8")) or {} + except Exception as e: # noqa: BLE001 + return CheckResult( + "saved queries", + CheckStatus.FAIL, + f"unparseable: {type(e).__name__}: {e}", + hint=f"fix YAML syntax in {queries_file}", + ) + queries = raw.get("queries", {}) if isinstance(raw, dict) else {} + return CheckResult("saved queries", CheckStatus.OK, f"{len(queries)} defined") + + +def check_bundle(ctx: CheckContext) -> CheckResult: + """Report on the team bundle state. Bundles are phase-2 territory; until + one is installed this check is informational only — never a fail.""" + manifest = ctx.bundle_dir / f"{ctx.profile_name}.manifest.json" + if not manifest.is_file(): + return CheckResult( + "team bundle", + CheckStatus.INFO, + "not installed (using local registry only)", + hint="run `bcli config refresh` once your team has published a bundle", + ) + try: + data = json.loads(manifest.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as e: + return CheckResult( + "team bundle", + CheckStatus.WARN, + f"manifest unreadable: {type(e).__name__}", + hint="run `bcli config refresh --rollback` or re-import", + ) + + version = data.get("version", "?") + published_at = data.get("published_at", "") + age_hint = "" + try: + if published_at: + ts = datetime.fromisoformat(published_at.replace("Z", "+00:00")) + age_days = (datetime.now(timezone.utc) - ts).days + age_hint = f", {age_days}d old" + if age_days > 30: + return CheckResult( + "team bundle", + CheckStatus.WARN, + f"version {version}{age_hint}", + hint="run `bcli config refresh` to pull the latest", + ) + if age_days > 7: + return CheckResult( + "team bundle", + CheckStatus.WARN, + f"version {version}{age_hint}", + hint="consider running `bcli config refresh`", + ) + except (ValueError, TypeError): + age_hint = " (unparseable timestamp)" + return CheckResult("team bundle", CheckStatus.OK, f"version {version}{age_hint}") + + +def check_bc_connectivity(ctx: CheckContext) -> CheckResult: + """One unauthenticated probe to BC's public host. + + Uses ``httpx`` with ``trust_env=True`` so corporate HTTP/HTTPS proxies + are honored — a previous version went straight to TCP, which falsely + failed on machines that talk to BC via the company proxy. A 401 / 403 + / 404 from BC counts as "host is reachable, BC is fine" — those are + its expected responses to a no-auth GET. Only network/proxy failures + fail the check. + """ + if ctx.skip_network: + return CheckResult("bc connectivity", CheckStatus.INFO, "skipped (--skip-network)") + host = "api.businesscentral.dynamics.com" + url = f"https://{host}/v2.0/" + try: + import httpx + + with httpx.Client(timeout=5.0, trust_env=True, follow_redirects=False) as client: + resp = client.get(url) + return CheckResult( + "bc connectivity", + CheckStatus.OK, + f"{host} reachable (HTTP {resp.status_code})", + ) + except ImportError: + # httpx is a hard dep of bcli, so this branch is paranoia. Falling + # back to a TCP probe still beats refusing to render the report. + try: + with socket.create_connection((host, 443), timeout=5): + return CheckResult( + "bc connectivity", + CheckStatus.OK, + f"{host}:443 TCP reachable (httpx unavailable)", + ) + except OSError as e: + return CheckResult( + "bc connectivity", + CheckStatus.FAIL, + f"{host} unreachable ({type(e).__name__})", + hint="check network / proxy / corporate firewall", + ) + except Exception as e: # noqa: BLE001 + return CheckResult( + "bc connectivity", + CheckStatus.FAIL, + f"{host} unreachable: {type(e).__name__}: {e}", + hint="check network / corporate proxy (HTTPS_PROXY) / VPN", + ) + + +# ─── Orchestration ─────────────────────────────────────────────────── + + +CheckFn = Callable[[CheckContext], CheckResult] + +_DEFAULT_CHECKS: tuple[CheckFn, ...] = ( + check_active_profile, + check_tenant, + check_environment, + check_company, + check_auth_mode, + check_token_cache, + check_registry, + check_field_coverage, + check_saved_queries, + check_bundle, + check_bc_connectivity, +) + + +def run_all_checks( + ctx: CheckContext, + *, + checks: tuple[CheckFn, ...] | None = None, +) -> list[CheckResult]: + """Run every check, swallowing exceptions per check.""" + results: list[CheckResult] = [] + for check in (checks or _DEFAULT_CHECKS): + try: + results.append(check(ctx)) + except Exception as e: # noqa: BLE001 — diagnostic, never re-raise + results.append( + CheckResult( + name=check.__name__.removeprefix("check_").replace("_", " "), + status=CheckStatus.FAIL, + summary=f"check raised {type(e).__name__}: {e}", + hint="this is a bug in `bcli doctor` — please report", + ) + ) + return results diff --git a/src/bcli/workflow/_query_search.py b/src/bcli/workflow/_query_search.py new file mode 100644 index 0000000..c8b5641 --- /dev/null +++ b/src/bcli/workflow/_query_search.py @@ -0,0 +1,154 @@ +"""Discoverability layer over saved queries. + +Pure functions. Takes the dict-shaped queries already on disk and gives +back ranked / filtered views for ``bcli q list`` / ``q search`` / ``q +info``. No embeddings, no Redis — substring + tag/alias scoring, which the +codex consult correctly identified as 80%-of-the-value-without-Redis for a +50-100 query library. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Iterable + + +@dataclass(frozen=True) +class QueryEntry: + """One saved query, normalized for search/listing. + + Built from the raw dict the YAML loader returns. Keeping a normalized + view lets the search code stay pure (it never re-parses YAML or pokes + at None vs. empty list). + """ + + name: str + description: str = "" + aliases: tuple[str, ...] = () + tags: tuple[str, ...] = () + owner: str = "" + freshness: str = "" + examples: tuple[str, ...] = () + related: tuple[str, ...] = () + endpoint: str = "" + params: dict[str, Any] = field(default_factory=dict) + + @classmethod + def from_raw(cls, name: str, raw: dict[str, Any]) -> "QueryEntry": + def _as_tuple(value: Any) -> tuple[str, ...]: + if value is None: + return () + if isinstance(value, (list, tuple)): + return tuple(str(v) for v in value) + return (str(value),) + + return cls( + name=name, + description=str(raw.get("description") or ""), + aliases=_as_tuple(raw.get("aliases")), + tags=_as_tuple(raw.get("tags")), + owner=str(raw.get("owner") or ""), + freshness=str(raw.get("freshness") or ""), + examples=_as_tuple(raw.get("examples")), + related=_as_tuple(raw.get("related")), + endpoint=str(raw.get("endpoint") or ""), + params=dict(raw.get("params") or {}), + ) + + +def normalize_queries(raw_queries: dict[str, dict[str, Any]]) -> list[QueryEntry]: + """Convert a YAML-loaded mapping into sorted entries.""" + return sorted( + (QueryEntry.from_raw(name, body) for name, body in raw_queries.items()), + key=lambda q: q.name, + ) + + +def filter_entries( + entries: Iterable[QueryEntry], + *, + tag: str | None = None, + owner: str | None = None, + freshness: str | None = None, +) -> list[QueryEntry]: + """Filter entries by simple metadata predicates. Case-insensitive.""" + out = list(entries) + if tag: + tag_lower = tag.lower() + out = [q for q in out if any(t.lower() == tag_lower for t in q.tags)] + if owner: + owner_lower = owner.lower() + out = [q for q in out if q.owner.lower() == owner_lower] + if freshness: + fresh_lower = freshness.lower() + out = [q for q in out if q.freshness.lower() == fresh_lower] + return out + + +def search_entries( + entries: Iterable[QueryEntry], + query: str, + *, + score_floor: int = 30, +) -> list[tuple[int, QueryEntry]]: + """Rank entries by a composite score. Returns ``[(score, entry), ...]``. + + Scoring is intentionally chunky — the goal is "this is the query you + want" vs "no, here are some near-misses," not a continuous relevance + function. Cutoff at ``score_floor`` so a query over a wildly-unrelated + string returns nothing rather than the whole catalog. + """ + q = query.strip().lower() + if not q: + return [] + + scored: list[tuple[int, QueryEntry]] = [] + for entry in entries: + score = _score_entry(entry, q) + if score >= score_floor: + scored.append((score, entry)) + + scored.sort(key=lambda pair: (-pair[0], pair[1].name)) + return scored + + +def _score_entry(entry: QueryEntry, q: str) -> int: + score = 0 + name_lower = entry.name.lower() + if name_lower == q: + score = max(score, 100) + elif name_lower.startswith(q): + score = max(score, 90) + elif q in name_lower: + score = max(score, 75) + + for alias in entry.aliases: + alias_lower = alias.lower() + if alias_lower == q: + score = max(score, 95) + elif q in alias_lower: + score = max(score, 70) + + for tag in entry.tags: + if q == tag.lower(): + score = max(score, 60) + elif q in tag.lower(): + score = max(score, 50) + + desc_lower = entry.description.lower() + if desc_lower: + # Token overlap: any non-trivial query token sitting inside the + # description bumps the score, but description matches always rank + # below name/alias hits. + for token in q.split(): + if len(token) >= 3 and token in desc_lower: + score = max(score, 45) + + for example in entry.examples: + if q in example.lower(): + score = max(score, 40) + + if q in entry.endpoint.lower(): + score = max(score, 35) + + return score diff --git a/src/bcli_cli/_state.py b/src/bcli_cli/_state.py index 990d5bc..bb6cd96 100644 --- a/src/bcli_cli/_state.py +++ b/src/bcli_cli/_state.py @@ -21,6 +21,11 @@ class CLIState: env_override: str | None = None company_override: str | None = None format: str = "table" + # ``True`` when the user passed ``--format/-f`` explicitly. Read by + # `format_output` to decide whether to auto-flip wide single-row + # results to vertical-records view: explicit format = honor the + # contract, auto-detected = optimize for readability. + format_explicit: bool = False verbose: bool = False debug: bool = False dry_run: bool = False diff --git a/src/bcli_cli/app.py b/src/bcli_cli/app.py index 059fd31..0760548 100644 --- a/src/bcli_cli/app.py +++ b/src/bcli_cli/app.py @@ -79,6 +79,7 @@ def main( state.company_override = company resolved_format = format or detect_default_format() state.format = resolved_format + state.format_explicit = format is not None state.verbose = verbose state.debug = debug state.dry_run = dry_run @@ -164,6 +165,7 @@ def _emit_command_summary() -> None: config_cmd, context_cmd, delete_cmd, + doctor_cmd, endpoint_cmd, env_cmd, get_cmd, @@ -189,6 +191,7 @@ def _emit_command_summary() -> None: app.command(name="delete")(delete_cmd.delete_command) app.command(name="q", help="Run a saved query (no OData required)")(query_cmd.query_command) app.command(name="ai-context")(context_cmd.ai_context_command) +app.command(name="doctor", help="Diagnose your bcli install (self-rescue for team users)")(doctor_cmd.doctor_command) # ETL command — optional, only available when dlt is installed try: diff --git a/src/bcli_cli/commands/config_cmd.py b/src/bcli_cli/commands/config_cmd.py index d23f564..63a8671 100644 --- a/src/bcli_cli/commands/config_cmd.py +++ b/src/bcli_cli/commands/config_cmd.py @@ -424,6 +424,16 @@ def path() -> None: typer.echo(str(CONFIG_FILE)) +# ─── Bundle commands (registered here so they live under `bcli config`) ── + +from bcli_cli.commands import refresh_cmd as _refresh # noqa: E402 + +app.command("refresh", help="Pull the latest team bundle for a profile")(_refresh.refresh_command) +app.command("rollback", help="Restore the previous bundle for a profile")(_refresh.rollback_command) +app.command("bundle-status", help="Show the currently-installed bundle's manifest")(_refresh.bundle_status_command) +app.command("make-bundle", help="Build a bundle tarball from a directory (admin)")(_refresh.make_bundle_command) + + @app.command() def edit() -> None: """Open the config file in $EDITOR and re-validate on save.""" diff --git a/src/bcli_cli/commands/doctor_cmd.py b/src/bcli_cli/commands/doctor_cmd.py new file mode 100644 index 0000000..3726521 --- /dev/null +++ b/src/bcli_cli/commands/doctor_cmd.py @@ -0,0 +1,171 @@ +"""``bcli doctor`` — self-rescue diagnostics for team installs. + +The command runs a fixed set of independent checks and prints a one-screen +report. Non-zero exit when any check fails; ``--json`` flips to a structured +report for monitoring scripts. Designed to never raise — a totally broken +install must still produce readable output. +""" + +from __future__ import annotations + +import json +import sys +from typing import Optional + +import typer +from rich.console import Console +from rich.table import Table + +from bcli._version import __version__ +from bcli.config._defaults import ( + CONFIG_DIR, + REGISTRIES_DIR, + TOKEN_CACHE_FILE, +) +from bcli.diagnostics import CheckContext, CheckStatus, run_all_checks + +console = Console() +err = Console(stderr=True) + + +def doctor_command( + profile: Optional[str] = typer.Option( + None, "--profile", "-p", help="Profile to inspect (default: active profile)" + ), + output_json: bool = typer.Option( + False, "--json", help="Emit a structured JSON report (machine-readable)" + ), + skip_network: bool = typer.Option( + False, "--skip-network", help="Skip the BC connectivity probe" + ), +) -> None: + """Run diagnostics and print a one-screen verdict. + + Exit codes: + 0 all checks passed (some warnings allowed) + 1 one or more fail-level checks + + \b + Examples: + bcli doctor + bcli doctor --profile finance + bcli doctor --json | jq '.checks[] | select(.status=="fail")' + """ + ctx = _build_context(profile_override=profile, skip_network=skip_network) + results = run_all_checks(ctx) + + if output_json: + _emit_json(ctx, results) + else: + _emit_text(ctx, results) + + if any(r.is_fail for r in results): + raise typer.Exit(1) + + +# ─── Context construction ──────────────────────────────────────────── + + +def _build_context(*, profile_override: str | None, skip_network: bool) -> CheckContext: + """Build a CheckContext that tolerates a fully-broken install. + + Imports are inlined so that a config-loader crash still lets us emit a + single fail result instead of crashing inside the CLI bootstrap. + """ + config = None + profile = None + profile_name = profile_override or "default" + + try: + from bcli.config import load_config + + config = load_config() + profile_name = profile_override or config.defaults.profile + try: + profile = config.get_profile(profile_name) + except Exception: # noqa: BLE001 — captured by check_active_profile + profile = None + except Exception: # noqa: BLE001 — captured by check_active_profile + config = None + + bundle_dir = CONFIG_DIR / "bundles" + + return CheckContext( + config=config, + profile=profile, + profile_name=profile_name, + bundle_dir=bundle_dir, + token_cache_path=TOKEN_CACHE_FILE, + queries_dir=CONFIG_DIR / "queries", + registries_dir=REGISTRIES_DIR, + bcli_version=__version__, + skip_network=skip_network, + ) + + +# ─── Rendering ─────────────────────────────────────────────────────── + + +_STATUS_GLYPH = { + CheckStatus.OK: ("[green]✓[/green]", "OK"), + CheckStatus.WARN: ("[yellow]⚠[/yellow]", "WARN"), + CheckStatus.FAIL: ("[red]✗[/red]", "FAIL"), + CheckStatus.INFO: ("[dim]·[/dim]", "INFO"), +} + + +def _emit_text(ctx: CheckContext, results: list) -> None: + err.print( + f"[bold]bcli doctor[/bold] — profile: [cyan]{ctx.profile_name}[/cyan] " + f"[dim](bcli {ctx.bcli_version})[/dim]" + ) + err.print() + + fails = sum(1 for r in results if r.status is CheckStatus.FAIL) + warns = sum(1 for r in results if r.status is CheckStatus.WARN) + + table = Table(show_header=False, box=None, padding=(0, 1)) + table.add_column(justify="right", no_wrap=True) + table.add_column(no_wrap=True) + table.add_column(overflow="fold") + + for r in results: + glyph, _ = _STATUS_GLYPH[r.status] + table.add_row(glyph, r.name, r.summary) + if r.hint and r.status in (CheckStatus.FAIL, CheckStatus.WARN): + table.add_row("", "", f"[dim]hint: {r.hint}[/dim]") + + console.print(table) + console.print() + + if fails: + verdict = f"[bold red]Verdict: FAIL[/bold red] — {fails} failed, {warns} warnings" + elif warns: + verdict = f"[bold yellow]Verdict: WARN[/bold yellow] — {warns} warnings" + else: + verdict = "[bold green]Verdict: OK[/bold green]" + console.print(verdict) + + +def _emit_json(ctx: CheckContext, results: list) -> None: + payload = { + "profile": ctx.profile_name, + "bcli_version": ctx.bcli_version, + "verdict": ( + "fail" if any(r.is_fail for r in results) + else "warn" if any(r.status is CheckStatus.WARN for r in results) + else "ok" + ), + "checks": [ + { + "name": r.name, + "status": r.status.value, + "summary": r.summary, + "hint": r.hint, + "detail": r.detail, + } + for r in results + ], + } + json.dump(payload, sys.stdout, indent=2, default=str) + sys.stdout.write("\n") diff --git a/src/bcli_cli/commands/get_cmd.py b/src/bcli_cli/commands/get_cmd.py index 5638b67..04dc586 100644 --- a/src/bcli_cli/commands/get_cmd.py +++ b/src/bcli_cli/commands/get_cmd.py @@ -42,6 +42,7 @@ def get_command( """ # Local --format overrides global output_format = format or state.format + explicit_format = (format is not None) or state.format_explicit if output_format in ("json", "csv", "ndjson", "raw"): state.quiet = True @@ -88,7 +89,7 @@ def get_command( publisher=publisher, group=group, version=version, ) ) - format_output(records, output_format) + format_output(records, output_format, auto_format=not explicit_format) except Exception as e: console.print(f"[red]Error:[/red] {e}") raise typer.Exit(1) diff --git a/src/bcli_cli/commands/query_cmd.py b/src/bcli_cli/commands/query_cmd.py index 07aff60..5060cf4 100644 --- a/src/bcli_cli/commands/query_cmd.py +++ b/src/bcli_cli/commands/query_cmd.py @@ -10,6 +10,15 @@ queries: : description: "human description" + # — discoverability metadata (all optional, all additive) — + aliases: [synonym1, synonym2] + tags: [period-close, ap, intercompany] + owner: finance-ops + freshness: live # live | daily | reference + examples: + - bcli q name=Fabrikam + related: [other-query-name] + # — query body — endpoint: params: : @@ -20,12 +29,18 @@ min: 1 # numeric lower bound max: 1000 # numeric upper bound enum: ["Open", "Posted"] # allowed literal set + hint: "BC Vendor No." # shown in `bcli q info` filter: "displayName eq '${{ params.name }}'" select: "number,displayName,email,phoneNumber" orderby: "displayName asc" top: 50 all: false +The metadata fields exist purely for human discoverability — ``bcli q list`` +filters by tag/owner, ``bcli q search`` ranks across name/alias/tag/desc, +and ``bcli q info`` shows the full record. None of the metadata changes +how a query executes. + Defense-in-depth notes: * ``type`` / ``pattern`` / ``min`` / ``max`` / ``enum`` are validated *before* @@ -87,6 +102,8 @@ def query_command( bcli q customer-by-name name=Fabrikam bcli q open-invoices-by-customer customer-id=C00010 limit=20 bcli q customer-by-name name=Fabrikam --show + bcli q search "overdue invoices" # discover by NL phrase + bcli q info customer-by-name # full metadata for one query """ profile_name = state.active_profile_name queries_file = QUERIES_DIR / f"{profile_name}.yaml" @@ -95,14 +112,70 @@ def query_command( _list_queries(profile_name, queries_file) return + # Sub-verb dispatch. + # + # The first positional arg can be a real query name, a reserved + # sub-verb (`list`, `search`, `find`, `info`), or `run` — the + # explicit escape hatch for cases where someone has authored a + # query whose name shadows a sub-verb. Reserved names produce a + # hard error at bundle-load time (see `_check_reserved_names` in + # the saved-query loader) so this branch is never reached for a + # well-formed bundle, but `bcli q run ` ensures users always + # have a way to invoke a hypothetically-misnamed query without + # editing the bundle. + if name == "run": + if not params: + console.print("[red]`bcli q run [key=value …]` expected.[/red]") + raise typer.Exit(2) + # Re-enter with name=, params= — keeps the + # validation path identical to the regular invocation. + run_name, *run_params = params + return query_command( + name=run_name, + params=run_params or None, + show=show, + format=format, + ) + if name in ("search", "find"): + if not params: + console.print("[red]`bcli q search ` expected.[/red]") + raise typer.Exit(2) + _search_queries(profile_name, queries_file, " ".join(params)) + return + if name == "info": + if not params: + console.print("[red]`bcli q info ` expected.[/red]") + raise typer.Exit(2) + _query_info(profile_name, queries_file, params[0]) + return + if name == "list": + # Optional filters live in `params` as `tag=foo owner=bar` pairs. + flt = {k: v for k, _, v in (p.partition("=") for p in (params or []))} + _list_queries( + profile_name, + queries_file, + tag=flt.get("tag"), + owner=flt.get("owner"), + freshness=flt.get("freshness"), + ) + return + saved = _load_saved_queries(queries_file) if name not in saved: - available = ", ".join(sorted(saved.keys())) or "(none)" - console.print( - f"[red]Saved query '{name}' not found.[/red] Available: {available}\n" - f"[dim]Edit {queries_file} to add one.[/dim]" - ) - raise typer.Exit(1) + # Try alias resolution before giving up — a curated bundle uses + # aliases to bridge "the query is called overdue-ic but the user + # typed overdue-intercompany" without forcing duplicate definitions. + alias_hit = _resolve_alias(saved, name) + if alias_hit is not None: + name = alias_hit + else: + available = ", ".join(sorted(saved.keys())) or "(none)" + console.print( + f"[red]Saved query '{name}' not found.[/red] Available: {available}\n" + f"[dim]Edit {queries_file} to add one," + f" or run `bcli q search '{name}'` to find a near match.[/dim]" + ) + raise typer.Exit(1) spec = saved[name] resolved_params = _resolve_params(spec.get("params", {}), params or []) @@ -118,6 +191,7 @@ def query_command( return output_format = format or state.format + explicit_format = (format is not None) or state.format_explicit if output_format in ("json", "csv", "ndjson", "raw"): state.quiet = True @@ -136,7 +210,7 @@ def query_command( started = _time.monotonic() try: records = asyncio.run(_run_saved_query(endpoint, resolved)) - format_output(records, output_format) + format_output(records, output_format, auto_format=not explicit_format) latency_ms = (_time.monotonic() - started) * 1000.0 capture_filter = state.config.telemetry.capture_filter_text sink.emit(*_tev.query( @@ -165,8 +239,18 @@ def query_command( # ─── Helpers ───────────────────────────────────────────────────────── -def _list_queries(profile_name: str, queries_file: Path) -> None: - """Print a table of saved queries for the active profile.""" +def _list_queries( + profile_name: str, + queries_file: Path, + *, + tag: str | None = None, + owner: str | None = None, + freshness: str | None = None, +) -> None: + """Print a table of saved queries for the active profile. + + Optional filters narrow the catalog by tag / owner / freshness. + """ if not queries_file.is_file(): console.print( f"[dim]No saved queries for profile '{profile_name}'.[/dim]\n" @@ -180,27 +264,164 @@ def _list_queries(profile_name: str, queries_file: Path) -> None: console.print(f"[dim]{queries_file} has no queries defined.[/dim]") return - table = Table(show_header=True, header_style="bold", title=f"Saved queries — {profile_name}") + from bcli.workflow._query_search import filter_entries, normalize_queries + + entries = normalize_queries(queries) + entries = filter_entries(entries, tag=tag, owner=owner, freshness=freshness) + if not entries: + active = ", ".join( + f"{k}={v}" for k, v in (("tag", tag), ("owner", owner), ("freshness", freshness)) if v + ) + console.print( + f"[dim]No queries match {active or '(no filters)'}.[/dim]\n" + "[dim]Run `bcli q list` with no filters to see the whole catalog.[/dim]" + ) + return + + title_bits = [f"Saved queries — {profile_name}"] + for label, value in (("tag", tag), ("owner", owner), ("freshness", freshness)): + if value: + title_bits.append(f"{label}={value}") + table = Table( + show_header=True, + header_style="bold", + title=" · ".join(title_bits), + ) table.add_column("Name", style="cyan") table.add_column("Endpoint") - table.add_column("Params") - table.add_column("Description", max_width=60) + table.add_column("Tags") + table.add_column("Owner") + table.add_column("Description", max_width=50, overflow="fold") - for q_name in sorted(queries.keys()): - spec = queries[q_name] - decl_params = spec.get("params") or {} + for entry in entries: + decl_params = entry.params or {} param_summary = ", ".join( f"{k}{'*' if (isinstance(v, dict) and v.get('required', True)) else ''}" for k, v in decl_params.items() ) + endpoint_cell = entry.endpoint or "?" + if param_summary: + endpoint_cell = f"{endpoint_cell} ({param_summary})" + table.add_row( + entry.name, + endpoint_cell, + ", ".join(entry.tags) or "—", + entry.owner or "—", + entry.description, + ) + console.print(table) + console.print( + "[dim]Run with: bcli q [key=value ...] " + "Discover with: bcli q search [/dim]" + ) + + +def _search_queries(profile_name: str, queries_file: Path, phrase: str) -> None: + """Rank queries by name / alias / tag / description match.""" + queries = _load_saved_queries(queries_file) + if not queries: + console.print(f"[dim]No saved queries for profile '{profile_name}'.[/dim]") + return + + from bcli.workflow._query_search import normalize_queries, search_entries + + entries = normalize_queries(queries) + hits = search_entries(entries, phrase) + if not hits: + console.print( + f"[yellow]No queries matched '{phrase}'.[/yellow]\n" + "[dim]Try a shorter phrase or run `bcli q list` to browse.[/dim]" + ) + raise typer.Exit(1) + + table = Table( + show_header=True, + header_style="bold", + title=f"Search — '{phrase}'", + ) + table.add_column("Score", justify="right", style="dim") + table.add_column("Name", style="cyan") + table.add_column("Tags") + table.add_column("Description", max_width=60, overflow="fold") + for score, entry in hits: table.add_row( - q_name, - spec.get("endpoint", "?"), - param_summary or "—", - spec.get("description", ""), + str(score), + entry.name, + ", ".join(entry.tags) or "—", + entry.description, ) console.print(table) - console.print("[dim]Run with: bcli q [key=value ...][/dim]") + console.print( + "[dim]Open one with: bcli q info Run with: bcli q ...[/dim]" + ) + + +def _query_info(profile_name: str, queries_file: Path, name: str) -> None: + """Print full metadata for one query.""" + queries = _load_saved_queries(queries_file) + if name not in queries: + alias_hit = _resolve_alias(queries, name) + if alias_hit is None: + console.print( + f"[red]Saved query '{name}' not found.[/red] " + f"Run `bcli q search '{name}'` to find similar." + ) + raise typer.Exit(1) + name = alias_hit + + from bcli.workflow._query_search import QueryEntry + + entry = QueryEntry.from_raw(name, queries[name]) + + console.print(f"[bold cyan]{entry.name}[/bold cyan]") + if entry.description: + console.print(f" {entry.description}") + console.print() + if entry.aliases: + console.print(f" [dim]aliases:[/dim] {', '.join(entry.aliases)}") + if entry.tags: + console.print(f" [dim]tags:[/dim] {', '.join(entry.tags)}") + if entry.owner: + console.print(f" [dim]owner:[/dim] {entry.owner}") + if entry.freshness: + console.print(f" [dim]freshness:[/dim] {entry.freshness}") + if entry.endpoint: + console.print(f" [dim]endpoint:[/dim] {entry.endpoint}") + if entry.params: + console.print(" [dim]params:[/dim]") + for pname, pdef in entry.params.items(): + if isinstance(pdef, dict): + bits = [] + if pdef.get("required"): + bits.append("required") + if "default" in pdef: + bits.append(f"default={pdef['default']!r}") + if pdef.get("type"): + bits.append(pdef["type"]) + if pdef.get("hint"): + bits.append(f"hint={pdef['hint']!r}") + tail = f" ({', '.join(bits)})" if bits else "" + else: + tail = "" + console.print(f" {pname}{tail}") + if entry.examples: + console.print(" [dim]examples:[/dim]") + for ex in entry.examples: + console.print(f" {ex}") + if entry.related: + console.print(f" [dim]related:[/dim] {', '.join(entry.related)}") + + +def _resolve_alias(queries: dict[str, dict[str, Any]], term: str) -> str | None: + """Return the canonical query name when ``term`` matches an alias.""" + term_lower = term.lower() + for q_name, body in queries.items(): + aliases = body.get("aliases") or [] + if not isinstance(aliases, (list, tuple)): + continue + if any(str(a).lower() == term_lower for a in aliases): + return q_name + return None def _print_starter_example(queries_file: Path) -> None: @@ -219,6 +440,13 @@ def _print_starter_example(queries_file: Path) -> None: ) +# Reserved names that the `bcli q` sub-verb dispatch consumes. A query +# whose name lives here is unreachable except via `bcli q run `, +# so the loader hard-errors at parse time — this is a misconfigured +# bundle and the right place to catch it is at refresh, not at runtime. +_RESERVED_QUERY_NAMES = frozenset({"list", "search", "find", "info", "run"}) + + def _load_saved_queries(queries_file: Path) -> dict[str, dict[str, Any]]: """Parse a saved-queries YAML file. Returns an empty dict if missing.""" if not queries_file.is_file(): @@ -239,6 +467,18 @@ def _load_saved_queries(queries_file: Path) -> dict[str, dict[str, Any]]: if not isinstance(queries, dict): console.print(f"[red]{queries_file}: 'queries' must be a mapping.[/red]") raise typer.Exit(1) + + collisions = sorted(set(queries) & _RESERVED_QUERY_NAMES) + if collisions: + console.print( + f"[red]{queries_file}: reserved query names used: " + f"{', '.join(collisions)}.[/red]\n" + f"[dim]These names collide with `bcli q` sub-verbs. " + f"Rename the queries or invoke them via `bcli q run `. " + f"Reserved: {sorted(_RESERVED_QUERY_NAMES)}[/dim]" + ) + raise typer.Exit(1) + return queries diff --git a/src/bcli_cli/commands/refresh_cmd.py b/src/bcli_cli/commands/refresh_cmd.py new file mode 100644 index 0000000..7b5b92f --- /dev/null +++ b/src/bcli_cli/commands/refresh_cmd.py @@ -0,0 +1,298 @@ +"""``bcli config refresh`` and friends — team bundle pull / rollback / publish. + +Implementation lives here; the commands are registered onto the existing +``bcli config`` Typer app at the bottom of :mod:`config_cmd` so the user +sees ``bcli config refresh``. +""" + +from __future__ import annotations + +import logging +import shutil +from pathlib import Path +from typing import Optional + +import typer +from rich.console import Console + +from bcli.bundle import ( + BundleApplyResult, + BundleFetchError, + BundleVerifyError, + Sha256Verifier, + apply_bundle, + fetch_bundle, + load_local_manifest, + rollback_bundle, + verify_bundle, +) +from bcli.bundle._publish import make_bundle +from bcli.config._defaults import CONFIG_DIR, REGISTRIES_DIR +from bcli_cli._state import state + +logger = logging.getLogger("bcli.bundle.cli") +console = Console(stderr=True) +out = Console() + +BUNDLES_DIR = CONFIG_DIR / "bundles" +QUERIES_DIR = CONFIG_DIR / "queries" + + +def _bundle_url_for_profile(profile_name: str) -> str | None: + """Resolve the bundle URL for ``profile_name`` from config. + + The profile model carries arbitrary extras (``model_config = extra: + "allow"``) so we look for ``bundle_url`` directly on the profile, and + fall back to a top-level ``[bundles] = "..."`` table if + present. Either form keeps admins out of code edits. + """ + cfg = state.config + profile = cfg.get_profile(profile_name) + direct = getattr(profile, "bundle_url", None) + if isinstance(direct, str) and direct.strip(): + return direct.strip() + + extra = getattr(cfg, "model_extra", None) or {} + bundles = extra.get("bundles") if isinstance(extra, dict) else None + if isinstance(bundles, dict): + url = bundles.get(profile_name) + if isinstance(url, str) and url.strip(): + return url.strip() + return None + + +# ─── refresh ────────────────────────────────────────────────────────── + + +def refresh_command( + profile: Optional[str] = typer.Option( + None, "--profile", "-p", + help="Profile to refresh (default: active profile)", + ), + url: Optional[str] = typer.Option( + None, "--url", + help="Override the bundle URL for this run (otherwise read from config)", + ), + dry_run: bool = typer.Option( + False, "--dry-run", + help="Fetch and verify the bundle but do not apply", + ), + skip_verify: bool = typer.Option( + False, "--skip-verify", + help="Skip checksum verification (dev-only — requires BCLI_DEV=1)", + ), +) -> None: + """Pull the latest team bundle for a profile and apply it atomically. + + \b + Examples: + bcli config refresh + bcli config refresh --profile finance + bcli config refresh --dry-run + BCLI_DEV=1 bcli config refresh --url file:///tmp/finance-2026.05.07-1.tar.gz + """ + import os + + dev_mode = os.environ.get("BCLI_DEV") in ("1", "true", "yes") + if skip_verify and not dev_mode: + console.print( + "[red]--skip-verify is dev-only.[/red] Set BCLI_DEV=1 to opt in. " + "Production refreshes must verify; refusing to bypass." + ) + raise typer.Exit(1) + + profile_name = profile or state.active_profile_name + resolved_url = url or _bundle_url_for_profile(profile_name) + if not resolved_url: + console.print( + f"[red]No bundle URL configured for profile '{profile_name}'.[/red]\n" + "[dim]Add `bundle_url = \"https://...\"` to the profile in " + "~/.config/bcli/config.toml, or pass --url for a one-off pull.[/dim]" + ) + raise typer.Exit(1) + + console.print( + f"[bold]Refreshing[/bold] [cyan]{profile_name}[/cyan] from {resolved_url}" + ) + + try: + bundle, raw = fetch_bundle(resolved_url) + except BundleFetchError as e: + console.print(f"[red]Fetch failed:[/red] {e}") + raise typer.Exit(1) from e + + try: + if not skip_verify: + try: + verify_bundle(bundle, verifier=Sha256Verifier(), raw_archive=raw) + except BundleVerifyError as e: + console.print(f"[red]Verify failed:[/red] {e}") + raise typer.Exit(1) from e + console.print(" [green]✓[/green] checksum verified") + + if bundle.manifest.profile != profile_name: + console.print( + f"[yellow]Warning:[/yellow] bundle declares profile " + f"'{bundle.manifest.profile}' but you are refreshing " + f"'{profile_name}'. Refusing to apply mismatched profiles." + ) + raise typer.Exit(1) + + previous = load_local_manifest(BUNDLES_DIR, profile_name) + console.print( + f" Current: [dim]{previous.version if previous else '(none)'}[/dim]" + ) + console.print( + f" Latest: [bold]{bundle.manifest.version}[/bold] " + f"[dim](published {bundle.manifest.published_at:%Y-%m-%d}" + + ( + f" by {bundle.manifest.publisher}" + if bundle.manifest.publisher else "" + ) + + ")[/dim]" + ) + if bundle.manifest.release_notes: + console.print(f" Notes: {bundle.manifest.release_notes}") + + if dry_run: + console.print("[yellow]--dry-run[/yellow] — verified but not applied.") + return + + result = apply_bundle( + bundle, + registries_dir=REGISTRIES_DIR, + queries_dir=QUERIES_DIR, + bundle_dir=BUNDLES_DIR, + ) + _print_apply_summary(result) + finally: + # Always clean up the temp extraction tree — covers verify + # failure, profile mismatch, dry-run exit, and apply crashes. + shutil.rmtree(bundle.root, ignore_errors=True) + + +def _print_apply_summary(result: BundleApplyResult) -> None: + parts = [] + if result.registry_changed: + parts.append("registry") + if result.queries_changed: + parts.append("queries") + if result.field_lists_changed: + parts.append("field lists") + changed = ", ".join(parts) if parts else "no files changed" + console.print( + f" [green]Applied[/green] {result.new_version} ({changed})." + ) + if result.previous_version: + console.print( + f" [dim]Previous version {result.previous_version} retained " + "for `bcli config refresh --rollback`.[/dim]" + ) + + +# ─── rollback ───────────────────────────────────────────────────────── + + +def rollback_command( + profile: Optional[str] = typer.Option( + None, "--profile", "-p", + help="Profile to roll back (default: active profile)", + ), +) -> None: + """Restore the previous bundle for a profile. + + Idempotent — safe to run twice. If no backup exists, the in-place + manifest is wiped so ``bcli doctor`` reverts to "bundle not installed". + """ + profile_name = profile or state.active_profile_name + rolled = rollback_bundle( + profile_name, + registries_dir=REGISTRIES_DIR, + queries_dir=QUERIES_DIR, + bundle_dir=BUNDLES_DIR, + ) + if rolled: + console.print(f"[green]Rolled back[/green] {profile_name}.") + else: + console.print( + f"[dim]Nothing to roll back for {profile_name}.[/dim]" + ) + + +# ─── status ─────────────────────────────────────────────────────────── + + +def bundle_status_command( + profile: Optional[str] = typer.Option( + None, "--profile", "-p", + help="Profile to inspect (default: active profile)", + ), +) -> None: + """Show the currently-applied bundle's manifest, if any.""" + profile_name = profile or state.active_profile_name + manifest = load_local_manifest(BUNDLES_DIR, profile_name) + if not manifest: + console.print( + f"[dim]No bundle installed for {profile_name}.[/dim]\n" + "[dim]Run `bcli config refresh` once your team has published one.[/dim]" + ) + return + out.print(f"[bold]bundle:[/bold] {profile_name}") + out.print(f" version: {manifest.version}") + out.print(f" published_at: {manifest.published_at:%Y-%m-%d %H:%M %Z}") + if manifest.publisher: + out.print(f" publisher: {manifest.publisher}") + if manifest.previous_version: + out.print(f" previous: {manifest.previous_version}") + if manifest.release_notes: + out.print(f" notes: {manifest.release_notes}") + out.print(f" checksum_sha256: {manifest.checksum_sha256[:16]}…") + + +# ─── make-bundle (admin) ────────────────────────────────────────────── + + +def make_bundle_command( + source_dir: Path = typer.Argument( + ..., + help="Directory containing registry.json, queries.yaml, field_lists.json", + ), + profile: str = typer.Option(..., "--profile", "-p", help="Profile name"), + version: str = typer.Option(..., "--version", help="Bundle version (e.g. 2026.05.07-1)"), + publisher: str = typer.Option("", "--publisher", help="Who is publishing this bundle"), + notes: str = typer.Option("", "--notes", help="Short release-notes line"), + previous: str = typer.Option("", "--previous", help="Previous bundle version, for changelog continuity"), + output: Optional[Path] = typer.Option( + None, "--output", "-o", + help="Output tarball path (default: -.tar.gz)", + ), +) -> None: + """Build a tarball bundle from a directory tree. + + \b + Example: + bcli config make-bundle ./bundle-finance \\ + --profile finance --version 2026.05.07-1 \\ + --publisher ops-bcli-bot \\ + --notes "Added overdue-ic and posted-by-id queries" \\ + --output finance-2026.05.07-1.tar.gz + """ + try: + path, manifest = make_bundle( + source_dir, + profile=profile, + version=version, + publisher=publisher, + release_notes=notes, + previous_version=previous, + output_path=output, + ) + except (FileNotFoundError, ValueError) as e: + console.print(f"[red]make-bundle failed:[/red] {e}") + raise typer.Exit(1) from e + + console.print(f"[green]Built[/green] {path}") + console.print(f" profile: {manifest.profile}") + console.print(f" version: {manifest.version}") + console.print(f" checksum: {manifest.checksum_sha256[:16]}…") + console.print(f" files: {len(manifest.contents)}") diff --git a/src/bcli_cli/output/_formatters.py b/src/bcli_cli/output/_formatters.py index 5906599..d688810 100644 --- a/src/bcli_cli/output/_formatters.py +++ b/src/bcli_cli/output/_formatters.py @@ -1,4 +1,4 @@ -"""Output formatters: table, markdown, json, csv, ndjson, raw.""" +"""Output formatters: table, markdown, records, json, csv, ndjson, raw.""" from __future__ import annotations @@ -6,6 +6,7 @@ import io import json import os +import shutil import sys from typing import Any @@ -55,16 +56,66 @@ def detect_default_format() -> str: return "table" -def format_output(records: list[dict[str, Any]], fmt: str = "table") -> None: - """Format and print records.""" +def format_output( + records: list[dict[str, Any]], + fmt: str = "table", + *, + auto_format: bool = True, +) -> None: + """Format and print records. + + When ``auto_format`` is ``True`` (the default for code paths that + chose the format via :func:`detect_default_format`), wide single-row + results in ``table`` / ``markdown`` modes flip to vertical records + view. A user who explicitly passed ``-f markdown`` or set + ``BCLI_FORMAT=markdown`` gets exactly markdown — the contract they + asked for is honored even when the output is too wide to render + cleanly. Pass ``auto_format=False`` from CLI dispatchers when the + user supplied ``-f`` directly. + """ if not records: stderr_console.print("[dim]No records found.[/dim]") return + if auto_format and fmt in ("table", "markdown", "md") and _should_auto_records(records): + fmt = "records" + formatter = _FORMATTERS.get(fmt, _format_table) formatter(records) +def _should_auto_records(records: list[dict[str, Any]]) -> bool: + """Decide whether to flip a wide table into a vertical record view. + + Triggers when: + * the user has not pinned a format explicitly (BCLI_FORMAT respected + upstream — see ``detect_default_format``), + * the record set is small (1-2 rows), AND + * either the column count exceeds 8 OR the rendered first-row width + would exceed the terminal width. + + A small record count is the safety bound: vertical view scales badly + past ~5 records and the user is better served by JSON/CSV at that point. + """ + if os.environ.get("BCLI_NO_AUTO_RECORDS"): + return False + if len(records) > 2: + return False + columns = [k for k in records[0].keys() if not k.startswith("@odata")] + if len(columns) <= 6: + return False + # Always flip when there are very many columns — terminal width can't + # be trusted on Windows Terminal under some shells, and 8+ columns in + # 1-2 rows is the canonical "give me the engine record" lookup shape. + if len(columns) > 8: + return True + width = shutil.get_terminal_size((120, 24)).columns + estimated = sum( + max(len(c), len(_format_cell(records[0].get(c)))) + 3 for c in columns + ) + return estimated > width + + def _format_table(records: list[dict[str, Any]]) -> None: """Rich table output.""" if not records: @@ -188,10 +239,59 @@ def _format_cell_markdown(value: Any) -> str: return cell.replace("|", "\\|").replace("\n", " ").replace("\r", "") +def _format_records(records: list[dict[str, Any]]) -> None: + """Vertical "psql \\x"-style view: one field per line, blank line between rows. + + Output shape (line-wrapping aside, every field stays on its own line): + + record 1 + systemId : b1fc5e63-… + engineSerialNumber : 194108 + engineType : CF34-8C + … + + record 2 + … + + This is the right view for wide records on narrow terminals (Windows + PowerShell with ~120 columns and an engine record with ~30 fields is the + motivating case). It works in any terminal — no box-drawing, no ANSI, + safe to redirect. + """ + if not records: + return + + columns = [k for k in records[0].keys() if not k.startswith("@odata")] + if not columns: + return + label_width = min(40, max(len(c) for c in columns)) + + multi = len(records) > 1 + for idx, record in enumerate(records, start=1): + if multi: + print(f"record {idx}") + for col in columns: + label = col.ljust(label_width) + value = _format_cell(record.get(col)) + # A multi-line cell becomes "label : line1\n | line2" + # rather than re-printing the label, so the grouping stays visible. + lines = value.splitlines() or [""] + print(f" {label} : {lines[0]}") + for extra in lines[1:]: + print(f" {' ' * label_width} {extra}") + if idx != len(records): + print() + stderr_console.print(f"[dim]{len(records)} record(s)[/dim]") + + _FORMATTERS = { "table": _format_table, "markdown": _format_markdown, "md": _format_markdown, + "records": _format_records, + "record": _format_records, + "r": _format_records, + "vertical": _format_records, "json": _format_json, "csv": _format_csv, "ndjson": _format_ndjson, diff --git a/tests/test_bundle/__init__.py b/tests/test_bundle/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_bundle/test_bundle_roundtrip.py b/tests/test_bundle/test_bundle_roundtrip.py new file mode 100644 index 0000000..77c5f9b --- /dev/null +++ b/tests/test_bundle/test_bundle_roundtrip.py @@ -0,0 +1,394 @@ +"""End-to-end bundle round-trip: publish → fetch → verify → apply → rollback.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from bcli.bundle import ( + BundleFetchError, + BundleVerifyError, + Sha256Verifier, + apply_bundle, + fetch_bundle, + load_local_manifest, + rollback_bundle, + verify_bundle, +) +from bcli.bundle._publish import make_bundle + + +def _seed_bundle_source(tmp_path: Path) -> Path: + src = tmp_path / "bundle-finance" + src.mkdir() + (src / "registry.json").write_text( + json.dumps( + { + "endpoints": [ + { + "entity_set_name": "vendorLedgerEntries", + "category": "custom", + "publisher": "team", + "group": "finance", + "version": "v1.0", + "field_names": ["vendorNumber", "amount"], + } + ] + } + ), + encoding="utf-8", + ) + (src / "queries.yaml").write_text( + "queries:\n open-pos:\n endpoint: purchaseOrders\n", + encoding="utf-8", + ) + return src + + +def test_make_bundle_produces_valid_manifest(tmp_path): + src = _seed_bundle_source(tmp_path) + out, manifest = make_bundle( + src, + profile="finance", + version="2026.05.07-1", + publisher="ops-bcli-bot", + release_notes="initial", + ) + assert out.is_file() + assert manifest.profile == "finance" + assert manifest.version == "2026.05.07-1" + assert len(manifest.checksum_sha256) == 64 + assert "registry.json" in manifest.contents + assert "queries.yaml" in manifest.contents + + +def test_round_trip_apply_then_rollback(tmp_path): + src = _seed_bundle_source(tmp_path) + archive_path, _ = make_bundle( + src, profile="finance", version="1.0.0", publisher="x", + output_path=tmp_path / "finance-1.0.0.tar.gz", + ) + + bundle, raw = fetch_bundle(f"file://{archive_path}", allow_file_url=True) + verify_bundle(bundle, verifier=Sha256Verifier(), raw_archive=raw) + + registries_dir = tmp_path / "registries" + queries_dir = tmp_path / "queries" + bundle_dir = tmp_path / "bundles" + + result = apply_bundle( + bundle, + registries_dir=registries_dir, + queries_dir=queries_dir, + bundle_dir=bundle_dir, + ) + assert result.new_version == "1.0.0" + assert result.previous_version == "" + assert result.registry_changed is True + assert result.queries_changed is True + assert (registries_dir / "finance.json").is_file() + assert (queries_dir / "finance.yaml").is_file() + assert load_local_manifest(bundle_dir, "finance").version == "1.0.0" + + # Mutate locally to simulate user-side staleness, then apply v2. + (registries_dir / "finance.json").write_text("{}", encoding="utf-8") + src2 = tmp_path / "bundle-finance-v2" + src2.mkdir() + (src2 / "registry.json").write_text( + json.dumps({"endpoints": [{"entity_set_name": "v2", "category": "custom", "publisher": "x", "group": "y", "version": "v1.0"}]}), + encoding="utf-8", + ) + archive2, _ = make_bundle(src2, profile="finance", version="2.0.0", publisher="x", output_path=tmp_path / "finance-2.0.0.tar.gz") + bundle2, raw2 = fetch_bundle(f"file://{archive2}", allow_file_url=True) + verify_bundle(bundle2, verifier=Sha256Verifier(), raw_archive=raw2) + + result2 = apply_bundle( + bundle2, + registries_dir=registries_dir, + queries_dir=queries_dir, + bundle_dir=bundle_dir, + ) + assert result2.previous_version == "1.0.0" + assert load_local_manifest(bundle_dir, "finance").version == "2.0.0" + # The .previous backup should hold v1.0 content from before we + # locally mutated it… but we mutated it *after* applying v1, so the + # .previous holds the mutated stub, not the v1 bundle. That's a + # property worth documenting: rollback restores the file that was on + # disk just before refresh, not the file the previous refresh wrote. + + # Rollback brings back what was here at v2-apply time. + rolled = rollback_bundle( + "finance", + registries_dir=registries_dir, + queries_dir=queries_dir, + bundle_dir=bundle_dir, + ) + assert rolled is True + assert (registries_dir / "finance.json").read_text(encoding="utf-8") == "{}" + assert load_local_manifest(bundle_dir, "finance").version == "1.0.0" + + +def test_rollback_with_no_backup_removes_manifest(tmp_path): + src = _seed_bundle_source(tmp_path) + archive_path, _ = make_bundle(src, profile="finance", version="1.0.0", publisher="x", output_path=tmp_path / "finance-1.0.0.tar.gz") + bundle, raw = fetch_bundle(f"file://{archive_path}", allow_file_url=True) + verify_bundle(bundle, verifier=Sha256Verifier(), raw_archive=raw) + apply_bundle( + bundle, + registries_dir=tmp_path / "registries", + queries_dir=tmp_path / "queries", + bundle_dir=tmp_path / "bundles", + ) + # First rollback removes everything because there is no ".previous" — only + # one apply happened. + rolled = rollback_bundle( + "finance", + registries_dir=tmp_path / "registries", + queries_dir=tmp_path / "queries", + bundle_dir=tmp_path / "bundles", + ) + assert rolled is True + assert load_local_manifest(tmp_path / "bundles", "finance") is None + + +def test_tampered_file_content_rejected(tmp_path): + """Modifying any file inside an extracted bundle must fail verification.""" + src = _seed_bundle_source(tmp_path) + archive_path, _ = make_bundle(src, profile="finance", version="1.0.0", publisher="x", output_path=tmp_path / "finance-1.0.0.tar.gz") + bundle, raw = fetch_bundle(f"file://{archive_path}", allow_file_url=True) + # Simulate a tampered registry — could be a malicious symlink swap or + # a CDN-side rewrite. The content hash recorded in the manifest must + # catch it. + (bundle.root / "registry.json").write_text("{\"endpoints\": []}", encoding="utf-8") + with pytest.raises(BundleVerifyError, match="content mismatch"): + verify_bundle(bundle, verifier=Sha256Verifier(), raw_archive=raw) + + +def test_tampered_manifest_rejected(tmp_path): + """Swapping a contents hash in the manifest must trip the roll-up check.""" + src = _seed_bundle_source(tmp_path) + archive_path, _ = make_bundle(src, profile="finance", version="1.0.0", publisher="x", output_path=tmp_path / "finance-1.0.0.tar.gz") + bundle, raw = fetch_bundle(f"file://{archive_path}", allow_file_url=True) + # Build a manifest where contents were swapped but the roll hash was + # left intact — the kind of tamper a careless attacker would do. + manifest = bundle.manifest.model_copy( + update={"contents": {**bundle.manifest.contents, "registry.json": "0" * 64}} + ) + bad_bundle = bundle.__class__(manifest=manifest, root=bundle.root) + with pytest.raises(BundleVerifyError): + verify_bundle(bad_bundle, verifier=Sha256Verifier(), raw_archive=raw) + + +def test_fetch_rejects_unsafe_scheme(tmp_path): + from bcli.bundle import BundleFetchError, fetch_bundle as fetch + + with pytest.raises(BundleFetchError): + fetch("http://example.com/bundle.tar.gz") + with pytest.raises(BundleFetchError): + fetch("ftp://example.com/bundle.tar.gz") + + +def test_fetch_rejects_file_url_without_dev_flag(tmp_path): + """file:// is dev-only — production must refuse it without BCLI_DEV=1.""" + src = _seed_bundle_source(tmp_path) + archive_path, _ = make_bundle(src, profile="finance", version="1.0.0", publisher="x", output_path=tmp_path / "finance-1.0.0.tar.gz") + with pytest.raises(BundleFetchError, match="dev-only"): + fetch_bundle(f"file://{archive_path}") # no allow_file_url, no BCLI_DEV + + +def test_fetch_rejects_oversized_archive(tmp_path, monkeypatch): + """Compressed-size guard: a 30MB archive must be rejected.""" + from bcli.bundle import BundleFetchError, _fetch as fetch_mod + + big = tmp_path / "big.tar.gz" + big.write_bytes(b"\x00" * (30 * 1024 * 1024)) + monkeypatch.setattr(fetch_mod, "MAX_COMPRESSED_BYTES", 1024 * 1024) + with pytest.raises(BundleFetchError, match="compressed-size limit"): + fetch_bundle(f"file://{big}", allow_file_url=True) + + +def test_fetch_rejects_path_traversal(tmp_path): + """A malicious bundle with `../../etc/passwd` must not extract.""" + import tarfile + from io import BytesIO + + from bcli.bundle import BundleFetchError, fetch_bundle as fetch + + buf = BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tar: + bad = tarfile.TarInfo("../escape.txt") + bad.size = 0 + tar.addfile(bad, BytesIO(b"")) + archive = tmp_path / "evil.tar.gz" + archive.write_bytes(buf.getvalue()) + + with pytest.raises(BundleFetchError): + fetch(f"file://{archive}", allow_file_url=True) + + +def test_fetch_rejects_unexpected_filename(tmp_path): + """Non-allowlisted filename in the tar must be rejected at extract time.""" + import tarfile + from io import BytesIO + + from bcli.bundle import BundleFetchError, fetch_bundle as fetch + + buf = BytesIO() + with tarfile.open(fileobj=buf, mode="w:gz") as tar: + bad = tarfile.TarInfo("evil.sh") + bad.size = 0 + tar.addfile(bad, BytesIO(b"")) + archive = tmp_path / "rogue.tar.gz" + archive.write_bytes(buf.getvalue()) + + with pytest.raises(BundleFetchError, match="unexpected file"): + fetch(f"file://{archive}", allow_file_url=True) + + +def test_verify_rejects_traversal_in_contents_map(tmp_path): + """Even if a manifest claims `../etc/passwd` is part of the bundle, + the verifier must reject it before reading files.""" + from bcli.bundle import BundleVerifyError, Sha256Verifier + from bcli.bundle._manifest import Bundle, BundleManifest + from datetime import datetime, timezone + + src = _seed_bundle_source(tmp_path) + archive_path, _ = make_bundle(src, profile="finance", version="1.0.0", publisher="x", output_path=tmp_path / "finance-1.0.0.tar.gz") + bundle, _ = fetch_bundle(f"file://{archive_path}", allow_file_url=True) + bad_manifest = BundleManifest( + profile="finance", + version="1.0.0", + published_at=datetime.now(timezone.utc), + checksum_sha256="0" * 64, + contents={"../etc/passwd": "x"}, + ) + bad_bundle = Bundle(manifest=bad_manifest, root=bundle.root) + with pytest.raises(BundleVerifyError, match="unsafe content path"): + Sha256Verifier().verify(bad_bundle) + + +def test_concurrent_apply_serializes_via_lock(tmp_path): + """Two threads applying different bundles to the same profile must not + corrupt each other. The advisory file lock serializes them; both runs + finish, and the on-disk manifest is one of the two versions, never a + half-written mash.""" + import threading + + src = _seed_bundle_source(tmp_path) + a, _ = make_bundle(src, profile="finance", version="1.0.0", publisher="x", output_path=tmp_path / "finance-1.0.0.tar.gz") + b_src = tmp_path / "bundle-finance-b" + b_src.mkdir() + (b_src / "registry.json").write_text( + json.dumps({"endpoints": []}), encoding="utf-8" + ) + b, _ = make_bundle(b_src, profile="finance", version="1.0.1", publisher="x", output_path=tmp_path / "finance-1.0.1.tar.gz") + + registries_dir = tmp_path / "registries" + queries_dir = tmp_path / "queries" + bundle_dir = tmp_path / "bundles" + + bundle_a, ra = fetch_bundle(f"file://{a}", allow_file_url=True) + bundle_b, rb = fetch_bundle(f"file://{b}", allow_file_url=True) + verify_bundle(bundle_a, verifier=Sha256Verifier(), raw_archive=ra) + verify_bundle(bundle_b, verifier=Sha256Verifier(), raw_archive=rb) + + results: list[Exception | str] = [] + + def apply(bundle): + try: + r = apply_bundle( + bundle, + registries_dir=registries_dir, + queries_dir=queries_dir, + bundle_dir=bundle_dir, + ) + results.append(r.new_version) + except Exception as e: # noqa: BLE001 + results.append(e) + + t1 = threading.Thread(target=apply, args=(bundle_a,)) + t2 = threading.Thread(target=apply, args=(bundle_b,)) + t1.start() + t2.start() + t1.join(timeout=10) + t2.join(timeout=10) + + assert all(isinstance(r, str) for r in results), f"unexpected: {results}" + assert set(results) == {"1.0.0", "1.0.1"} + final = load_local_manifest(bundle_dir, "finance") + assert final is not None + assert final.version in {"1.0.0", "1.0.1"} + + +def test_field_lists_merge_into_registry(tmp_path): + """Bundle's field_lists.json must be merged into registry.json on apply.""" + src = tmp_path / "bundle-fl" + src.mkdir() + (src / "registry.json").write_text( + json.dumps( + { + "endpoints": [ + { + "entity_set_name": "vendors", + "category": "custom", + "publisher": "team", + "group": "finance", + "version": "v1.0", + } + ] + } + ), + encoding="utf-8", + ) + (src / "field_lists.json").write_text( + json.dumps({"vendors": ["no", "name", "balance"]}), + encoding="utf-8", + ) + archive_path, _ = make_bundle(src, profile="finance", version="1.0.0", publisher="x", output_path=tmp_path / "finance-1.0.0.tar.gz") + bundle, raw = fetch_bundle(f"file://{archive_path}", allow_file_url=True) + verify_bundle(bundle, verifier=Sha256Verifier(), raw_archive=raw) + + registries_dir = tmp_path / "registries" + apply_bundle( + bundle, + registries_dir=registries_dir, + queries_dir=tmp_path / "queries", + bundle_dir=tmp_path / "bundles", + ) + merged = json.loads((registries_dir / "finance.json").read_text(encoding="utf-8")) + vendors = merged["endpoints"][0] + assert vendors["field_names"] == ["no", "name", "balance"] + + +def test_no_op_refresh_does_not_change_files(tmp_path): + """Re-applying the exact same bundle reports no changes.""" + src = _seed_bundle_source(tmp_path) + archive_path, _ = make_bundle(src, profile="finance", version="1.0.0", publisher="x", output_path=tmp_path / "finance-1.0.0.tar.gz") + + registries_dir = tmp_path / "registries" + queries_dir = tmp_path / "queries" + bundle_dir = tmp_path / "bundles" + + bundle, raw = fetch_bundle(f"file://{archive_path}", allow_file_url=True) + verify_bundle(bundle, verifier=Sha256Verifier(), raw_archive=raw) + first = apply_bundle( + bundle, + registries_dir=registries_dir, + queries_dir=queries_dir, + bundle_dir=bundle_dir, + ) + assert first.registry_changed is True + + # Apply the same bundle again; same content → no_change. + bundle2, raw2 = fetch_bundle(f"file://{archive_path}", allow_file_url=True) + verify_bundle(bundle2, verifier=Sha256Verifier(), raw_archive=raw2) + second = apply_bundle( + bundle2, + registries_dir=registries_dir, + queries_dir=queries_dir, + bundle_dir=bundle_dir, + ) + assert second.registry_changed is False + assert second.queries_changed is False diff --git a/tests/test_cli/test_records_format.py b/tests/test_cli/test_records_format.py new file mode 100644 index 0000000..0732f2a --- /dev/null +++ b/tests/test_cli/test_records_format.py @@ -0,0 +1,139 @@ +"""Tests for the records (vertical) formatter and auto-fallback.""" + +from __future__ import annotations + +import pytest + +from bcli_cli.output import _formatters +from bcli_cli.output._formatters import ( + _format_records, + _should_auto_records, + format_output, +) + + +def _wide_record() -> dict: + """30-column engine record — the canonical "too wide for table" case.""" + return { + "systemId": "b1fc5e63-e150-ef11-bfe3-000d3a7051ef", + "engineSerialNumber": "194108", + "engineType": "CF34-8C", + "engineModel": "CF34-8C5", + "thrustRating": "8C5", + "initialDate": "2024-05-31", + "lastOperationDate": "2025-02-04", + "initialEngineTsn": "16523.96", + "intialEngineCsn": "11440", + "fc": "135", + "fh": "252.12", + "lastInvoiceDate": "2026-01-31", + "currentTsn": "17645.24", + "currentCsn": "12236", + "limiter": "12753", + "tspr": "17645.24", + "cspr": "12236", + "tslsv": "0", + "cslsv": "0", + "aprCycles": "11", + "qec": "Neutral", + "ittMargin": "35", + "ittMarginNotes": "May-2025 ECM", + "serviceable": "Yes", + "notes": "Sold to United on Mar 18, 2026", + "lessee": "", + "operator": "", + "engineStatus": "Sold", + "location": "GO JET", + "systemModifiedAt": "2026-04-06T14:19:25.347Z", + } + + +class TestAutoRecords: + def test_single_wide_record_triggers_auto_records(self): + assert _should_auto_records([_wide_record()]) is True + + def test_two_wide_records_still_triggers(self): + assert _should_auto_records([_wide_record(), _wide_record()]) is True + + def test_three_records_does_not_trigger(self): + # Past 2 records, vertical view starts hurting more than helping. + assert _should_auto_records([_wide_record()] * 3) is False + + def test_few_columns_does_not_trigger(self): + narrow = {"id": "1", "name": "Acme", "status": "open"} + assert _should_auto_records([narrow]) is False + + def test_env_kill_switch(self, monkeypatch): + monkeypatch.setenv("BCLI_NO_AUTO_RECORDS", "1") + assert _should_auto_records([_wide_record()]) is False + + def test_format_output_promotes_table_to_records_for_wide_single(self, capsys): + format_output([_wide_record()], fmt="table") + out = capsys.readouterr().out + assert "engineSerialNumber : 194108" in out + assert "engineType : CF34-8C" in out + + def test_format_output_promotes_markdown_to_records_for_wide_single(self, capsys): + format_output([_wide_record()], fmt="markdown") + out = capsys.readouterr().out + # Markdown table never appears — the auto-fallback fires first. + assert "| systemId" not in out + assert "engineSerialNumber : 194108" in out + + def test_explicit_records_format_works(self, capsys): + format_output([_wide_record()], fmt="records") + out = capsys.readouterr().out + assert "engineSerialNumber : 194108" in out + + def test_records_alias_r_works(self, capsys): + format_output([{"a": "1", "b": "2"}], fmt="r") + out = capsys.readouterr().out + assert "a" in out and "1" in out + + def test_csv_format_is_not_promoted(self, capsys): + # CSV is for pipelines; the auto-fallback would corrupt that. + format_output([_wide_record()], fmt="csv") + out = capsys.readouterr().out + # CSV header line ends with a comma-separated set, not "name : value". + assert "engineSerialNumber" in out + assert "engineSerialNumber : 194108" not in out + + def test_explicit_format_disables_auto_promote(self, capsys): + """`-f markdown` is a contract; honor it even on a wide single row.""" + format_output([_wide_record()], fmt="markdown", auto_format=False) + out = capsys.readouterr().out + # Markdown header pipe present, vertical view absent. + assert "| systemId" in out + assert "engineSerialNumber : 194108" not in out + + +class TestRecordsRendering: + def test_multi_record_output_has_separators(self, capsys): + _format_records([{"a": "1"}, {"a": "2"}]) + out = capsys.readouterr().out + assert "record 1" in out + assert "record 2" in out + + def test_single_record_omits_header(self, capsys): + _format_records([{"a": "1"}]) + out = capsys.readouterr().out + assert "record 1" not in out + assert " a" in out + + def test_multiline_cell_indents_continuation(self, capsys): + _format_records([{"notes": "line one\nline two"}]) + out = capsys.readouterr().out + lines = out.splitlines() + # First line carries "notes : line one"; continuation indents past the colon. + assert any("notes" in line and "line one" in line for line in lines) + assert any(line.lstrip().startswith("line two") for line in lines) + + def test_drops_odata_fields(self, capsys): + _format_records([{"id": "1", "@odata.etag": "abc"}]) + out = capsys.readouterr().out + assert "@odata" not in out + assert "id" in out + + def test_empty_records_is_noop(self, capsys): + _format_records([]) + assert capsys.readouterr().out == "" diff --git a/tests/test_diagnostics/__init__.py b/tests/test_diagnostics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_diagnostics/test_checks.py b/tests/test_diagnostics/test_checks.py new file mode 100644 index 0000000..6f2d9d9 --- /dev/null +++ b/tests/test_diagnostics/test_checks.py @@ -0,0 +1,307 @@ +"""Unit tests for diagnostic check primitives.""" + +from __future__ import annotations + +import json +from datetime import datetime, timedelta, timezone +from pathlib import Path + +import pytest + +from bcli.config import BCConfig, BCProfile, BCDefaults +from bcli.diagnostics._checks import ( + CheckContext, + CheckStatus, + check_active_profile, + check_auth_mode, + check_bundle, + check_company, + check_environment, + check_field_coverage, + check_registry, + check_saved_queries, + check_tenant, + check_token_cache, + run_all_checks, +) + + +def _ctx( + tmp_path: Path, + *, + profile: BCProfile | None = None, + profile_name: str = "default", + skip_network: bool = True, +) -> CheckContext: + config = BCConfig( + defaults=BCDefaults(profile=profile_name), + profiles={profile_name: profile} if profile else {}, + ) + return CheckContext( + config=config, + profile=profile, + profile_name=profile_name, + bundle_dir=tmp_path / "bundles", + token_cache_path=tmp_path / "tokens.json", + queries_dir=tmp_path / "queries", + registries_dir=tmp_path / "registries", + skip_network=skip_network, + ) + + +def _profile(**overrides) -> BCProfile: + base = dict( + tenant_id="tenant-abc", + environment="production", + company_id="00000000-0000-0000-0000-000000000001", + company_name="Acme", + auth_method="device_code", + client_id="client-xyz", + ) + base.update(overrides) + return BCProfile(**base) + + +# ─── individual checks ──────────────────────────────────────────────── + + +def test_active_profile_ok(tmp_path): + ctx = _ctx(tmp_path, profile=_profile()) + assert check_active_profile(ctx).status is CheckStatus.OK + + +def test_active_profile_fail_when_missing(tmp_path): + ctx = _ctx(tmp_path, profile_name="ghost") + result = check_active_profile(ctx) + assert result.status is CheckStatus.FAIL + assert "ghost" in result.summary + + +def test_tenant_fail_when_blank(tmp_path): + p = _profile() + object.__setattr__(p, "tenant_id", "") # bypass pydantic validation for the test + ctx = _ctx(tmp_path, profile=p) + assert check_tenant(ctx).status is CheckStatus.FAIL + + +def test_environment_ok(tmp_path): + ctx = _ctx(tmp_path, profile=_profile()) + assert check_environment(ctx).status is CheckStatus.OK + + +def test_company_warn_when_unset(tmp_path): + ctx = _ctx(tmp_path, profile=_profile(company_id=None, company_name=None)) + assert check_company(ctx).status is CheckStatus.WARN + + +def test_auth_mode_unknown_warns(tmp_path): + ctx = _ctx(tmp_path, profile=_profile(auth_method="hotp")) + assert check_auth_mode(ctx).status is CheckStatus.WARN + + +def test_token_cache_missing_is_info(tmp_path): + ctx = _ctx(tmp_path, profile=_profile()) + assert check_token_cache(ctx).status is CheckStatus.INFO + + +def test_token_cache_corrupt_warns(tmp_path): + cache = tmp_path / "tokens.json" + cache.write_text("{ this is not json", encoding="utf-8") + ctx = _ctx(tmp_path, profile=_profile()) + assert check_token_cache(ctx).status is CheckStatus.WARN + + +def test_token_cache_valid_entry(tmp_path): + cache = tmp_path / "tokens.json" + expires = (datetime.now(timezone.utc) + timedelta(minutes=30)).isoformat() + cache.write_text( + json.dumps( + {"tenant-abc:client-xyz": {"access_token": "x", "expires_at": expires}} + ), + encoding="utf-8", + ) + ctx = _ctx(tmp_path, profile=_profile()) + result = check_token_cache(ctx) + assert result.status is CheckStatus.OK + assert "valid for active profile" in result.summary + + +def test_token_cache_all_expired(tmp_path): + cache = tmp_path / "tokens.json" + expires = (datetime.now(timezone.utc) - timedelta(hours=1)).isoformat() + cache.write_text( + json.dumps( + {"tenant-abc:client-xyz": {"access_token": "x", "expires_at": expires}} + ), + encoding="utf-8", + ) + ctx = _ctx(tmp_path, profile=_profile()) + result = check_token_cache(ctx) + assert result.status is CheckStatus.INFO + assert "expired" in result.summary + + +def test_token_cache_other_tenant_does_not_lie(tmp_path): + """A valid cached token for a *different* tenant must not be reported OK + for the active profile. This is the regression that prompted the + profile-scoped scoping.""" + cache = tmp_path / "tokens.json" + expires = (datetime.now(timezone.utc) + timedelta(minutes=30)).isoformat() + cache.write_text( + json.dumps( + { + "other-tenant:other-client": { + "access_token": "x", + "expires_at": expires, + } + } + ), + encoding="utf-8", + ) + ctx = _ctx(tmp_path, profile=_profile()) + result = check_token_cache(ctx) + assert result.status is CheckStatus.INFO + assert "active profile" in result.summary + + +def test_registry_scoped_with_no_imports_fails(tmp_path): + ctx = _ctx(tmp_path, profile=_profile(disable_standard_api=True)) + result = check_registry(ctx) + assert result.status is CheckStatus.FAIL + assert "scoped profile" in result.summary + + +def test_registry_standard_loads(tmp_path): + ctx = _ctx(tmp_path, profile=_profile(disable_standard_api=False)) + result = check_registry(ctx) + assert result.status is CheckStatus.OK + assert "standard" in result.summary + + +def test_field_coverage_high(tmp_path): + registries = tmp_path / "registries" + registries.mkdir() + (registries / "default.json").write_text( + json.dumps( + { + "endpoints": [ + { + "entity_set_name": f"e{i}", + "category": "custom", + "publisher": "x", + "group": "y", + "version": "v1.0", + "field_names": ["a", "b"], + } + for i in range(8) + ] + + [ + { + "entity_set_name": "missing", + "category": "custom", + "publisher": "x", + "group": "y", + "version": "v1.0", + } + ] + } + ), + encoding="utf-8", + ) + ctx = _ctx( + tmp_path, + profile=_profile(disable_standard_api=True), + ) + result = check_field_coverage(ctx) + assert result.status is CheckStatus.OK + assert "8/9" in result.summary + + +def test_field_coverage_low_warns(tmp_path): + registries = tmp_path / "registries" + registries.mkdir() + (registries / "default.json").write_text( + json.dumps( + { + "endpoints": [ + { + "entity_set_name": f"e{i}", + "category": "custom", + "publisher": "x", + "group": "y", + "version": "v1.0", + } + for i in range(10) + ] + } + ), + encoding="utf-8", + ) + ctx = _ctx(tmp_path, profile=_profile(disable_standard_api=True)) + assert check_field_coverage(ctx).status is CheckStatus.WARN + + +def test_saved_queries_missing_for_scoped_profile_warns(tmp_path): + ctx = _ctx(tmp_path, profile=_profile(disable_standard_api=True)) + assert check_saved_queries(ctx).status is CheckStatus.WARN + + +def test_saved_queries_count(tmp_path): + qdir = tmp_path / "queries" + qdir.mkdir() + (qdir / "default.yaml").write_text( + "queries:\n a:\n endpoint: x\n b:\n endpoint: y\n", + encoding="utf-8", + ) + ctx = _ctx(tmp_path, profile=_profile()) + result = check_saved_queries(ctx) + assert result.status is CheckStatus.OK + assert "2" in result.summary + + +def test_bundle_missing_is_info(tmp_path): + ctx = _ctx(tmp_path, profile=_profile()) + assert check_bundle(ctx).status is CheckStatus.INFO + + +def test_bundle_old_warns(tmp_path): + bundles = tmp_path / "bundles" + bundles.mkdir() + old_ts = (datetime.now(timezone.utc) - timedelta(days=45)).isoformat() + (bundles / "default.manifest.json").write_text( + json.dumps({"version": "1.0.0", "published_at": old_ts}), + encoding="utf-8", + ) + ctx = _ctx(tmp_path, profile=_profile()) + result = check_bundle(ctx) + assert result.status is CheckStatus.WARN + assert "refresh" in result.hint + + +# ─── orchestration ──────────────────────────────────────────────────── + + +def test_run_all_checks_swallows_exceptions(tmp_path): + """A buggy check function must not crash the whole report.""" + ctx = _ctx(tmp_path, profile=_profile()) + + def boom(_ctx): + raise RuntimeError("intentional") + + results = run_all_checks(ctx, checks=(boom,)) + assert len(results) == 1 + assert results[0].status is CheckStatus.FAIL + assert "RuntimeError" in results[0].summary + + +def test_run_all_checks_runs_default_set(tmp_path): + ctx = _ctx(tmp_path, profile=_profile()) + results = run_all_checks(ctx) + # Sanity: at least the documented checks run, in roughly the order we declared. + names = [r.name for r in results] + assert "active profile" in names + assert "registry" in names + assert "bc connectivity" in names + # skip_network=True means connectivity is INFO, not FAIL. + conn = next(r for r in results if r.name == "bc connectivity") + assert conn.status is CheckStatus.INFO diff --git a/tests/test_workflow/test_query_search.py b/tests/test_workflow/test_query_search.py new file mode 100644 index 0000000..455d0cf --- /dev/null +++ b/tests/test_workflow/test_query_search.py @@ -0,0 +1,140 @@ +"""Tests for the saved-query discoverability layer (list / search / info).""" + +from __future__ import annotations + +import pytest + +from bcli.workflow._query_search import ( + QueryEntry, + filter_entries, + normalize_queries, + search_entries, +) + + +_SAMPLE = { + "overdue-ic": { + "description": "Overdue intercompany invoices for a vendor", + "aliases": ["overdue-intercompany", "ic-overdue"], + "tags": ["period-close", "ap", "intercompany"], + "owner": "finance-ops", + "freshness": "live", + "endpoint": "vendorLedgerEntries", + "params": {"vendor": {"required": True, "hint": "BC Vendor No."}}, + "examples": ["bcli q overdue-ic vendor=ACME-IC"], + }, + "open-pos": { + "description": "Open purchase orders by vendor", + "tags": ["ap", "purchasing"], + "owner": "finance-ops", + "freshness": "live", + "endpoint": "purchaseOrders", + }, + "engine-by-esn": { + "description": "Engine record by serial number", + "tags": ["engine", "ops"], + "owner": "engine-tech", + "freshness": "live", + "endpoint": "enginesView", + }, +} + + +def test_normalize_handles_missing_metadata(): + bare = {"x": {"endpoint": "foo"}} + entries = normalize_queries(bare) + assert len(entries) == 1 + assert entries[0].name == "x" + assert entries[0].tags == () + assert entries[0].aliases == () + + +def test_normalize_sorts_alphabetically(): + entries = normalize_queries(_SAMPLE) + assert [e.name for e in entries] == ["engine-by-esn", "open-pos", "overdue-ic"] + + +# ─── filter_entries ─────────────────────────────────────────────────── + + +def test_filter_by_tag(): + entries = normalize_queries(_SAMPLE) + out = filter_entries(entries, tag="period-close") + assert [e.name for e in out] == ["overdue-ic"] + + +def test_filter_by_owner(): + entries = normalize_queries(_SAMPLE) + out = filter_entries(entries, owner="engine-tech") + assert [e.name for e in out] == ["engine-by-esn"] + + +def test_filter_combined(): + entries = normalize_queries(_SAMPLE) + out = filter_entries(entries, tag="ap", owner="finance-ops") + assert {e.name for e in out} == {"open-pos", "overdue-ic"} + + +def test_filter_case_insensitive(): + entries = normalize_queries(_SAMPLE) + assert len(filter_entries(entries, tag="AP")) == 2 + + +# ─── search_entries ─────────────────────────────────────────────────── + + +def test_search_exact_name_wins(): + entries = normalize_queries(_SAMPLE) + hits = search_entries(entries, "overdue-ic") + assert hits[0][1].name == "overdue-ic" + assert hits[0][0] == 100 + + +def test_search_alias_hit(): + entries = normalize_queries(_SAMPLE) + hits = search_entries(entries, "ic-overdue") + assert hits[0][1].name == "overdue-ic" + assert hits[0][0] >= 90 + + +def test_search_description_substring(): + entries = normalize_queries(_SAMPLE) + hits = search_entries(entries, "intercompany") + assert any(e.name == "overdue-ic" for _, e in hits) + + +def test_search_floor_drops_unrelated(): + entries = normalize_queries(_SAMPLE) + hits = search_entries(entries, "completely unrelated phrase about cats") + assert hits == [] + + +def test_search_partial_phrase_finds_query(): + entries = normalize_queries(_SAMPLE) + hits = search_entries(entries, "engine serial") + assert any(e.name == "engine-by-esn" for _, e in hits) + + +def test_search_ranks_name_match_above_tag_match(): + """A query whose name contains the term must outrank one that only tags it.""" + entries = normalize_queries( + { + "ap-summary": {"description": "x", "tags": []}, + "vendor-balances": {"description": "y", "tags": ["ap"]}, + } + ) + hits = search_entries(entries, "ap") + assert hits[0][1].name == "ap-summary" + + +def test_query_entry_from_raw_handles_string_aliases(): + """Tolerate scalar `aliases: foo` instead of a list — YAML drift is real.""" + entry = QueryEntry.from_raw("x", {"aliases": "only-one"}) + assert entry.aliases == ("only-one",) + + +def test_query_entry_from_raw_handles_none_fields(): + entry = QueryEntry.from_raw("x", {"description": None, "tags": None, "params": None}) + assert entry.description == "" + assert entry.tags == () + assert entry.params == {} From 30e04246eb62ac5805af4a0d9d36450b8a46f626 Mon Sep 17 00:00:00 2001 From: igor-ctrl Date: Fri, 8 May 2026 12:19:04 -0500 Subject: [PATCH 2/4] fix(tests): drop unused imports in new test modules (CI lint) CI runs `ruff check src/ tests/`; my local pre-commit only checked `src/`. Removed `pytest` and `_formatters` imports that became unused after refactoring. No behavioral change. --- tests/test_cli/test_records_format.py | 2 -- tests/test_diagnostics/test_checks.py | 1 - tests/test_workflow/test_query_search.py | 1 - 3 files changed, 4 deletions(-) diff --git a/tests/test_cli/test_records_format.py b/tests/test_cli/test_records_format.py index 0732f2a..2b1b41d 100644 --- a/tests/test_cli/test_records_format.py +++ b/tests/test_cli/test_records_format.py @@ -2,9 +2,7 @@ from __future__ import annotations -import pytest -from bcli_cli.output import _formatters from bcli_cli.output._formatters import ( _format_records, _should_auto_records, diff --git a/tests/test_diagnostics/test_checks.py b/tests/test_diagnostics/test_checks.py index 6f2d9d9..b82430a 100644 --- a/tests/test_diagnostics/test_checks.py +++ b/tests/test_diagnostics/test_checks.py @@ -6,7 +6,6 @@ from datetime import datetime, timedelta, timezone from pathlib import Path -import pytest from bcli.config import BCConfig, BCProfile, BCDefaults from bcli.diagnostics._checks import ( diff --git a/tests/test_workflow/test_query_search.py b/tests/test_workflow/test_query_search.py index 455d0cf..49a7cb3 100644 --- a/tests/test_workflow/test_query_search.py +++ b/tests/test_workflow/test_query_search.py @@ -2,7 +2,6 @@ from __future__ import annotations -import pytest from bcli.workflow._query_search import ( QueryEntry, From a3268859b12aa236606d153640ca9e2fc2e975d6 Mon Sep 17 00:00:00 2001 From: igor-ctrl Date: Fri, 8 May 2026 12:28:10 -0500 Subject: [PATCH 3/4] docs: reframe team-deployment as Beautech bootstrap, not OSS roadmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The team-bundle infra ships in the OSS bcli; how Beautech rolls it out to finance/engine-tech is internal business. Drop "ship-blocker for finance rollout" framing in the plan + verifier docstring — the OSS project doesn't owe Beautech's deployment a publisher-signing implementation, that's tracked separately as a Beautech rollout gate (#11). Sha256Verifier is correctly an integrity check; operators who need authenticity plug Ed25519Verifier into the same Verifier seam when they're ready. --- docs/plans/team-deployment.md | 23 ++++++++++++-------- src/bcli/bundle/_verify.py | 40 +++++++++++++++++------------------ 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/docs/plans/team-deployment.md b/docs/plans/team-deployment.md index 4d80a44..406c0fe 100644 --- a/docs/plans/team-deployment.md +++ b/docs/plans/team-deployment.md @@ -1,6 +1,8 @@ -# Team Deployment Plan +# Beautech Team Deployment Plan -Status: draft. Target: finance team (~10 users) + engine technical team (~10 users) on the existing scoped-profile substrate. +Status: draft. **Beautech-internal bootstrap document, not part of the OSS bcli roadmap.** The bcli OSS tool ships independently of this plan; the work below describes how Beautech rolls bundles + diagnostics to its own finance and engine-tech teams on top of the upstream substrate. + +Target: finance team (~10 users) + engine technical team (~10 users) on the existing scoped-profile substrate. ## Why this plan exists @@ -266,17 +268,20 @@ Backend: pluggable `cache_backend` with `redis` extra, mirroring the existing `[ ## Risks and open questions -- **Phase 2 signing is a ship-blocker for finance rollout.** The current +- **Beautech rollout gate: publisher signing.** The current `Sha256Verifier` only proves internal consistency: each file matches its declared hash, and the manifest's roll-up matches the contents map. It does NOT authenticate the publisher. A compromised CDN can mint a malicious `registry.json`, recompute the hashes, and pass - verification. Before bundles go to finance, either ship a real - cryptographic signer (`minisign` / `cosign` / ed25519 + pinned key) - at the `bcli.bundle.Verifier` seam, or restrict bundle distribution - to private blob storage with org-level auth and treat HTTPS+auth as - the trust boundary. Document the choice; do not ship "verified" - without one of the two. + verification. Before Beautech rolls bundles to finance / engine-tech, + either ship a real cryptographic signer (`minisign` / `cosign` / + ed25519 + pinned key) at the `bcli.bundle.Verifier` seam, or restrict + bundle distribution to private blob storage with org-level auth and + treat HTTPS+auth as the trust boundary. Document the choice + internally; do not enable `bcli config refresh` for finance/engine- + tech without one of the two. Note: this is a Beautech deployment + gate, not an OSS bcli release gate — the upstream tool can ship the + bundle infra without dictating how operators use it. - **Signing key custody.** Who owns the bundle signing key, and how is it rotated when an owner leaves? Decide before phase 2 ships. - **Bundle URL discovery.** First-time install needs to know where to refresh from. Likely `bcli config init --scoped --bundle-url ` extends the existing wizard. Verify this fits the wizard's current shape. - **Field discovery in scoped profiles.** Today `bcli endpoint fields` writes back to the local registry. With overlay-off-by-default, sandboxed users can't improve their own setup. The plan: those discoveries get logged to a "candidate fields" file the user can email to their bundle owner. Better mechanism welcome. diff --git a/src/bcli/bundle/_verify.py b/src/bcli/bundle/_verify.py index b239158..055efa7 100644 --- a/src/bcli/bundle/_verify.py +++ b/src/bcli/bundle/_verify.py @@ -1,26 +1,24 @@ """Bundle verification — content checksum today, pluggable signature later. -.. warning:: - - :class:`Sha256Verifier` is **not** an authenticity check. It proves - that the bundle on disk is internally consistent (each file matches - its declared hash, and the contents-roll-up hash matches the manifest - field). It does **not** prove the bundle came from the publisher. - - A compromised CDN can mint a malicious ``registry.json``, recompute - the per-file hashes, recompute the roll-up, and pass verification. - Until a real cryptographic signer (minisign / cosign / ed25519 + - pinned public key) lands at this seam, the team must distribute - bundles only via private blob storage with org-level auth and an - HTTPS-only contract. **Do not roll out to production without that - constraint, or without landing the signing upgrade.** - -The :class:`Verifier` protocol is the seam for cryptographic signing. -A real signer plugs in here without touching apply / fetch / publish — -refusing to ship phase 2 without a signer would have blocked the rest -of the rollout on a tooling decision that doesn't need to be made yet, -but tracking that decision as a phase-2-extension ship-blocker (see -``docs/plans/team-deployment.md`` "Risks") is mandatory. +.. note:: + + :class:`Sha256Verifier` is an **integrity** check, not an + **authenticity** check. It proves that the bundle on disk is + internally consistent (each file matches its declared hash, and + the contents-roll-up hash matches the manifest field). It does + not prove the bundle came from any particular publisher. + + Operators distributing bundles via a trusted private channel + (org-authenticated HTTPS to a private blob, or an internal + artifact registry) get authenticity from the channel itself; the + integrity check then catches in-flight corruption. Operators who + want defense-in-depth — or who are distributing over a path they + do not fully trust — should plug in :class:`Ed25519Verifier` + (tracked separately) at this seam. + +The :class:`Verifier` protocol is the seam for plugging in stronger +verification (cryptographic signing, transparency logs, etc.) without +touching the apply / fetch / publish code paths. Why content hashes instead of "hash the tarball wire bytes": tarballs encode file mtimes, ownership, and ordering, so the wire bytes aren't From bdf37aca7b2f255b9865c9945be6b951a6e64949 Mon Sep 17 00:00:00 2001 From: igor-ctrl Date: Fri, 8 May 2026 14:02:40 -0500 Subject: [PATCH 4/4] =?UTF-8?q?chore:=20rename=20engine-tech=20=E2=86=92?= =?UTF-8?q?=20technical=20in=20OSS=20docs/tests/diagnostics?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cross-repo terminology cleanup matching the Beautech bootstrap rename (bcli-beautech-installer#14): role label changes from "engine-tech" to "technical" everywhere it referred to the role/team. The OSS bcli tool itself is unchanged behaviorally — these are pure string renames in internal-Beautech planning notes, a test fixture, and a diagnostic hint. - docs/plans/team-deployment.md: 12 occurrences. Plan doc is already flagged as Beautech-internal (not OSS roadmap) per earlier framing fix. - tests/test_workflow/test_query_search.py: 2 occurrences in fixtures ("owner": "engine-tech" → "technical"). Tests still pass — the metadata is opaque strings. - src/bcli/diagnostics/_checks.py: 1 hint string in check_auth_mode. 593 tests pass. --- docs/plans/team-deployment.md | 12 ++++++------ src/bcli/diagnostics/_checks.py | 2 +- tests/test_workflow/test_query_search.py | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/plans/team-deployment.md b/docs/plans/team-deployment.md index 406c0fe..8414606 100644 --- a/docs/plans/team-deployment.md +++ b/docs/plans/team-deployment.md @@ -1,6 +1,6 @@ # Beautech Team Deployment Plan -Status: draft. **Beautech-internal bootstrap document, not part of the OSS bcli roadmap.** The bcli OSS tool ships independently of this plan; the work below describes how Beautech rolls bundles + diagnostics to its own finance and engine-tech teams on top of the upstream substrate. +Status: draft. **Beautech-internal bootstrap document, not part of the OSS bcli roadmap.** The bcli OSS tool ships independently of this plan; the work below describes how Beautech rolls bundles + diagnostics to its own finance and technical teams on top of the upstream substrate. Target: finance team (~10 users) + engine technical team (~10 users) on the existing scoped-profile substrate. @@ -17,7 +17,7 @@ This plan ships three boring high-leverage things to address (1)–(3), explicit ## Phase 0 — pre-flight (this sprint) - Decide bundle storage backend: pick whichever of `S3`, `Azure Blob`, or `GitHub Releases` the org already authenticates to cleanly. Decision lives in `docs/plans/team-deployment.md` once made. -- Identify two bundle owners: one finance, one engine-tech. They are the publish path. +- Identify two bundle owners: one finance, one technical. They are the publish path. - Land a minimal telemetry sink config so phase 4 has data when it's time. The pluggable `[telemetry]` substrate already exists at `src/bcli/telemetry/`; pick `console` for dev, set up Azure Monitor or a custom HTTP sink for prod. Capture `bcli.command`, `bcli.query`, `bcli.error` at minimum. **Do not** capture filter text or UPN unless privacy review approves. ## Phase 1 — `bcli doctor` (ships first) @@ -124,7 +124,7 @@ finance-2026.05.07-1.tar.gz ``` bcli config refresh # refresh active profile -bcli config refresh --profile engine-tech # explicit profile +bcli config refresh --profile technical # explicit profile bcli config refresh --dry-run # show diff vs local, no writes bcli config refresh --rollback # restore previous version bcli config refresh --check # exit code only, no output (cron-friendly) @@ -273,7 +273,7 @@ Backend: pluggable `cache_backend` with `redis` extra, mirroring the existing `[ its declared hash, and the manifest's roll-up matches the contents map. It does NOT authenticate the publisher. A compromised CDN can mint a malicious `registry.json`, recompute the hashes, and pass - verification. Before Beautech rolls bundles to finance / engine-tech, + verification. Before Beautech rolls bundles to finance / technical, either ship a real cryptographic signer (`minisign` / `cosign` / ed25519 + pinned key) at the `bcli.bundle.Verifier` seam, or restrict bundle distribution to private blob storage with org-level auth and @@ -285,14 +285,14 @@ Backend: pluggable `cache_backend` with `redis` extra, mirroring the existing `[ - **Signing key custody.** Who owns the bundle signing key, and how is it rotated when an owner leaves? Decide before phase 2 ships. - **Bundle URL discovery.** First-time install needs to know where to refresh from. Likely `bcli config init --scoped --bundle-url ` extends the existing wizard. Verify this fits the wizard's current shape. - **Field discovery in scoped profiles.** Today `bcli endpoint fields` writes back to the local registry. With overlay-off-by-default, sandboxed users can't improve their own setup. The plan: those discoveries get logged to a "candidate fields" file the user can email to their bundle owner. Better mechanism welcome. -- **Bundle drift between teams.** If finance and engine-tech import the same standard endpoint into both bundles and they diverge, which wins? Today: each profile is isolated. Keep it that way. +- **Bundle drift between teams.** If finance and technical import the same standard endpoint into both bundles and they diverge, which wins? Today: each profile is isolated. Keep it that way. - **Telemetry privacy.** Phase 0 telemetry must not capture filter text or UPN by default. Confirm with the legal/privacy reviewer. ## Validation gates per phase | Phase | Gate | |---|---| -| 1 | `bcli doctor` runs on engine-tech and finance laptops cold; output makes sense to a non-developer | +| 1 | `bcli doctor` runs on technical and finance laptops cold; output makes sense to a non-developer | | 2 | Bundle round-trip works: admin publishes, user runs `refresh`, signature verifies, rollback works | | 3 | Existing queries unchanged; `q search "overdue invoices"` finds `overdue-ic` | | 4 | Telemetry triggers met before any code is written | diff --git a/src/bcli/diagnostics/_checks.py b/src/bcli/diagnostics/_checks.py index d34eee4..46821e3 100644 --- a/src/bcli/diagnostics/_checks.py +++ b/src/bcli/diagnostics/_checks.py @@ -147,7 +147,7 @@ def check_auth_mode(ctx: CheckContext) -> CheckResult: "auth", CheckStatus.FAIL, "auth_method missing", - hint="set `auth_method = \"device_code\"` for finance/engine-tech profiles", + hint="set `auth_method = \"device_code\"` for finance/technical profiles", ) return CheckResult( "auth", diff --git a/tests/test_workflow/test_query_search.py b/tests/test_workflow/test_query_search.py index 49a7cb3..99853a2 100644 --- a/tests/test_workflow/test_query_search.py +++ b/tests/test_workflow/test_query_search.py @@ -32,7 +32,7 @@ "engine-by-esn": { "description": "Engine record by serial number", "tags": ["engine", "ops"], - "owner": "engine-tech", + "owner": "technical", "freshness": "live", "endpoint": "enginesView", }, @@ -64,7 +64,7 @@ def test_filter_by_tag(): def test_filter_by_owner(): entries = normalize_queries(_SAMPLE) - out = filter_entries(entries, owner="engine-tech") + out = filter_entries(entries, owner="technical") assert [e.name for e in out] == ["engine-by-esn"]