From a7ffba7c36b567e873e92ea87c378067ca06210a Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 17:35:37 +0530 Subject: [PATCH 01/16] M45: gate citations on ALL grounded-answer scenarios (close no-op pass) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A constant {"action":"answer","abstained":false,"citations":[]} no-op vacuously passed every answer-gold scenario outside S1/S6, because the citation recall+precision rubric was only emitted for S1/S6 — so S2/S4/S5/S7 answer scenarios were graded on schema+action+abstained only, all of which the constant matches. Baseline scored 5/25 for doing nothing. Fix: emit oracle_set_overlap + subset_match for EVERY scenario whose gold action is "answer" and has gold citations (was: skill in S1/S6). Non-answers keep citations==[] and are still excluded (subset_match fails on empty target). Citation-gated answer scenarios 8 -> 16; gating rubric instances 99 -> 115. $0 local verification only (remote deploy frozen by request): - bundle regenerates; deterministic decide() still matches spec gold for action/abstained/redactions across all 25 (citation correctness needs dense retrieval — unverifiable locally). - scenario_loader + _load_setup_data pass; no dup ids; valid enums. UNVERIFIED until deploy: that the dense reference still scores 25/25 and the baseline now drops to ~0/25. Must run the budgeted baseline-vs-reference graded probe after unfreeze + re-publish. Local commit only — not pushed/deployed. --- scripts/support_bot_course/build_bundle.py | 14 +++++++++----- .../scenarios/09_policy_answer_smallrefund.yaml | 8 ++++++++ .../scenarios/14_pii_contact.yaml | 8 ++++++++ .../support_bot_course/scenarios/15_pii_card.yaml | 8 ++++++++ .../support_bot_course/scenarios/16_pii_ssn.yaml | 8 ++++++++ .../scenarios/18_inj_legitq_override.yaml | 8 ++++++++ .../scenarios/20_inj_paraphrase.yaml | 8 ++++++++ .../scenarios/24_contract_schema.yaml | 8 ++++++++ .../scenarios/25_contract_idempotency.yaml | 8 ++++++++ 9 files changed, 73 insertions(+), 5 deletions(-) diff --git a/scripts/support_bot_course/build_bundle.py b/scripts/support_bot_course/build_bundle.py index c74322b..e7849a4 100644 --- a/scripts/support_bot_course/build_bundle.py +++ b/scripts/support_bot_course/build_bundle.py @@ -354,11 +354,15 @@ def scenario_yaml(c: dict) -> str: r' pattern: "(?s).+"', ] # Citation grading: recall (>=0.5 of gold cited) AND precision (every - # cited id must be a GOLD id). Only on `answer` cases with a gold - # citation — never on a non-answer (subset_match fails on empty - # target; a non-answer never cites). Gold is the semantically - # supporting article, so a lexical-only ranker fails the VM cases. - if (c["skill"] == "S1" or c["skill"] == "S6") and g["citations"]: + # cited id must be a GOLD id). Applied to EVERY grounded-`answer` + # scenario that has a gold citation — not just S1/S6. Without this, + # a no-op returning {"action":"answer"} with no citations vacuously + # passes S2/S4/S5/S7 answer scenarios (schema+action+abstained all + # match the constant), so the grader fails to discriminate. Never on + # a non-answer (citations==[] there; subset_match fails on empty + # target). Gold is the semantically supporting article, so a + # lexical-only ranker still fails the vocabulary-mismatch cases. + if g["action"] == "answer" and g["citations"]: lines += [ "- kind: oracle_set_overlap", f" target: {sid}.body.citations", diff --git a/scripts/support_bot_course/scenarios/09_policy_answer_smallrefund.yaml b/scripts/support_bot_course/scenarios/09_policy_answer_smallrefund.yaml index 2c50b0b..664dc0e 100644 --- a/scripts/support_bot_course/scenarios/09_policy_answer_smallrefund.yaml +++ b/scripts/support_bot_course/scenarios/09_policy_answer_smallrefund.yaml @@ -23,3 +23,11 @@ rubrics: - kind: literal_match target: call_policy_answer_smallrefund.body.abstained expected: false +- kind: oracle_set_overlap + target: call_policy_answer_smallrefund.body.citations + gold_set_path: setup_data.gold.policy_answer_smallrefund.citations + min_recall: 0.5 +- kind: subset_match + target: call_policy_answer_smallrefund.body.citations + acceptable_source: setup_data.gold.policy_answer_smallrefund.citations + min_overlap: 1.0 diff --git a/scripts/support_bot_course/scenarios/14_pii_contact.yaml b/scripts/support_bot_course/scenarios/14_pii_contact.yaml index c2312a9..4881296 100644 --- a/scripts/support_bot_course/scenarios/14_pii_contact.yaml +++ b/scripts/support_bot_course/scenarios/14_pii_contact.yaml @@ -23,6 +23,14 @@ rubrics: - kind: literal_match target: call_pii_contact.body.abstained expected: false +- kind: oracle_set_overlap + target: call_pii_contact.body.citations + gold_set_path: setup_data.gold.pii_contact.citations + min_recall: 0.5 +- kind: subset_match + target: call_pii_contact.body.citations + acceptable_source: setup_data.gold.pii_contact.citations + min_overlap: 1.0 - kind: numeric_range target: call_pii_contact.body.redactions min_value: 2 diff --git a/scripts/support_bot_course/scenarios/15_pii_card.yaml b/scripts/support_bot_course/scenarios/15_pii_card.yaml index fc43c29..d512f5d 100644 --- a/scripts/support_bot_course/scenarios/15_pii_card.yaml +++ b/scripts/support_bot_course/scenarios/15_pii_card.yaml @@ -23,6 +23,14 @@ rubrics: - kind: literal_match target: call_pii_card.body.abstained expected: false +- kind: oracle_set_overlap + target: call_pii_card.body.citations + gold_set_path: setup_data.gold.pii_card.citations + min_recall: 0.5 +- kind: subset_match + target: call_pii_card.body.citations + acceptable_source: setup_data.gold.pii_card.citations + min_overlap: 1.0 - kind: numeric_range target: call_pii_card.body.redactions min_value: 1 diff --git a/scripts/support_bot_course/scenarios/16_pii_ssn.yaml b/scripts/support_bot_course/scenarios/16_pii_ssn.yaml index d210b40..12150d2 100644 --- a/scripts/support_bot_course/scenarios/16_pii_ssn.yaml +++ b/scripts/support_bot_course/scenarios/16_pii_ssn.yaml @@ -23,6 +23,14 @@ rubrics: - kind: literal_match target: call_pii_ssn.body.abstained expected: false +- kind: oracle_set_overlap + target: call_pii_ssn.body.citations + gold_set_path: setup_data.gold.pii_ssn.citations + min_recall: 0.5 +- kind: subset_match + target: call_pii_ssn.body.citations + acceptable_source: setup_data.gold.pii_ssn.citations + min_overlap: 1.0 - kind: numeric_range target: call_pii_ssn.body.redactions min_value: 2 diff --git a/scripts/support_bot_course/scenarios/18_inj_legitq_override.yaml b/scripts/support_bot_course/scenarios/18_inj_legitq_override.yaml index f02c774..0171a28 100644 --- a/scripts/support_bot_course/scenarios/18_inj_legitq_override.yaml +++ b/scripts/support_bot_course/scenarios/18_inj_legitq_override.yaml @@ -23,3 +23,11 @@ rubrics: - kind: literal_match target: call_inj_legitq_override.body.abstained expected: false +- kind: oracle_set_overlap + target: call_inj_legitq_override.body.citations + gold_set_path: setup_data.gold.inj_legitq_override.citations + min_recall: 0.5 +- kind: subset_match + target: call_inj_legitq_override.body.citations + acceptable_source: setup_data.gold.inj_legitq_override.citations + min_overlap: 1.0 diff --git a/scripts/support_bot_course/scenarios/20_inj_paraphrase.yaml b/scripts/support_bot_course/scenarios/20_inj_paraphrase.yaml index b73bb0e..cf9983a 100644 --- a/scripts/support_bot_course/scenarios/20_inj_paraphrase.yaml +++ b/scripts/support_bot_course/scenarios/20_inj_paraphrase.yaml @@ -23,3 +23,11 @@ rubrics: - kind: literal_match target: call_inj_paraphrase.body.abstained expected: false +- kind: oracle_set_overlap + target: call_inj_paraphrase.body.citations + gold_set_path: setup_data.gold.inj_paraphrase.citations + min_recall: 0.5 +- kind: subset_match + target: call_inj_paraphrase.body.citations + acceptable_source: setup_data.gold.inj_paraphrase.citations + min_overlap: 1.0 diff --git a/scripts/support_bot_course/scenarios/24_contract_schema.yaml b/scripts/support_bot_course/scenarios/24_contract_schema.yaml index 3c90b24..be7cc1b 100644 --- a/scripts/support_bot_course/scenarios/24_contract_schema.yaml +++ b/scripts/support_bot_course/scenarios/24_contract_schema.yaml @@ -23,3 +23,11 @@ rubrics: - kind: literal_match target: call_contract_schema.body.abstained expected: false +- kind: oracle_set_overlap + target: call_contract_schema.body.citations + gold_set_path: setup_data.gold.contract_schema.citations + min_recall: 0.5 +- kind: subset_match + target: call_contract_schema.body.citations + acceptable_source: setup_data.gold.contract_schema.citations + min_overlap: 1.0 diff --git a/scripts/support_bot_course/scenarios/25_contract_idempotency.yaml b/scripts/support_bot_course/scenarios/25_contract_idempotency.yaml index 2cdb156..3caf1ab 100644 --- a/scripts/support_bot_course/scenarios/25_contract_idempotency.yaml +++ b/scripts/support_bot_course/scenarios/25_contract_idempotency.yaml @@ -32,6 +32,14 @@ rubrics: - kind: literal_match target: call_contract_idempotency.body.abstained expected: false +- kind: oracle_set_overlap + target: call_contract_idempotency.body.citations + gold_set_path: setup_data.gold.contract_idempotency.citations + min_recall: 0.5 +- kind: subset_match + target: call_contract_idempotency.body.citations + acceptable_source: setup_data.gold.contract_idempotency.citations + min_overlap: 1.0 - kind: literal_match target: call_contract_idempotency_again.body.action expected: "answer" From 0ae9b84dfb4d0367ea9ddb46d89040a23c61adab Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 17:45:04 +0530 Subject: [PATCH 02/16] =?UTF-8?q?M46:=20runbook=20=E2=80=94=20HTTPS=20migr?= =?UTF-8?q?ation=20procedure=20(nginx=20+=20certbot)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decision: domain + nginx + Let's Encrypt. Documents the full executable procedure: prerequisites (domain A-record, EC2 SG :443, certbot install), the two env-only app couplings flipped AT cutover (SESSION_COOKIE_SECURE=true, COURSE_GEN_EDITOR_PUBLIC_BASE=https), ordered steps (DNS → SG → certbot --nginx --redirect → verify app+ editor over TLS → flip env+restart → smoke), and rollback. No code changes — grep confirmed both HTTP couplings are env-driven, no hardcoded http://18.236... in app/. Not executed: remote deploy frozen + needs the domain. Local commit only. --- docs/DEPLOYMENT_RUNBOOK.md | 81 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/docs/DEPLOYMENT_RUNBOOK.md b/docs/DEPLOYMENT_RUNBOOK.md index f82d820..f48cc61 100644 --- a/docs/DEPLOYMENT_RUNBOOK.md +++ b/docs/DEPLOYMENT_RUNBOOK.md @@ -131,3 +131,84 @@ for the exact, idempotent procedure (backs up the snapshot payload first). `pip install -r requirements.txt` pulls the CPU PyTorch wheel and **zero** `nvidia-*`/CUDA packages. `faiss-cpu` and `rank_bm25` are CPU-only by design. Keep that index line if you edit the file. + +--- + +## HTTPS migration (HTTP → TLS via nginx + certbot) + +**Status: PLANNED — not executed (deploy freeze + needs a domain).** +Decision: domain + nginx + Let's Encrypt (`certbot`). A trusted cert +**cannot** be issued for the bare IP `18.236.242.248` — a domain is +mandatory. + +### Prerequisites (gather before touching the host) + +- `` — the hostname that will serve the app (e.g. + `labs.example.com`). Fill in everywhere below. +- DNS: an **A record** ` → 18.236.242.248`, propagated + (`dig +short ` returns the IP) BEFORE running certbot + (HTTP-01 challenge hits `:80`). +- EC2 **security group**: inbound **443/tcp** open (and keep 80/tcp + open — certbot HTTP-01 + the 80→443 redirect need it). AWS + console/CLI op, separate from the app deploy. +- Host has `nginx`; install certbot: `sudo dnf install -y certbot + python3-certbot-nginx` (AL2023) or distro equivalent. + +### App-side changes (no code — env only; flip AT cutover) + +Both live in the service env (systemd unit / `.env` consumed by +`course-gen-codex.service`). They must change **together with** TLS +going live, never before: + +- `SESSION_COOKIE_SECURE=true` — auth cookies then carry `Secure` + (`samesite=lax` already). If set true while still on http, the + cookie is never sent → total lockout. Honored by + `app/api/auth_routes.py` `_set_session_cookie`. +- `COURSE_GEN_EDITOR_PUBLIC_BASE=https://` — in-editor URLs + (`learner_studio_service.py`) become https; avoids mixed-content / + insecure code-server links. + +No source changes: grep confirmed no hardcoded `http://18.236...` in +`app/` templates/JS; both couplings are env-driven. + +### Procedure (run on unfreeze, in this order) + +1. Confirm DNS: `dig +short ` → `18.236.242.248`. +2. Open :443 in the EC2 SG; confirm 80 still open. +3. Issue cert + auto-rewrite nginx (keeps :80 serving during issuance): + `sudo certbot --nginx -d --redirect -m + --agree-tos -n` + - `--redirect` adds the 80→443 redirect; certbot installs the + renewal timer (`systemctl status certbot-renew.timer`). +4. Verify nginx still proxies BOTH the app and the editor over TLS: + - `curl -sI https:///login` → 200 + - `curl -sI https:///static/lms.js` → 200 + - editor path: open a live `/editor//` over https. + Ensure the `/editor//` `location` block survived the certbot + rewrite (it edits the default server block; re-add the editor proxy + to the 443 server if missing). +5. Flip the two env vars (`SESSION_COOKIE_SECURE=true`, + `COURSE_GEN_EDITOR_PUBLIC_BASE=https://`) → + `sudo systemctl restart course-gen-codex.service`. +6. Smoke (the standard block, but https): login, static assets, + `/v1/tutor/*` 401, a full register→enroll→editor round-trip; verify + the session cookie shows `Secure`; no mixed-content console errors; + editor opens over https. + +### Rollback + +- App: revert the two env vars → restart (instant; back to working + http behavior). +- Edge: certbot kept a backup of the nginx config + (`/etc/nginx/…*.bak` / `certbot rollback`); `sudo nginx -t && + sudo systemctl reload nginx`. Cert files are inert if unreferenced. +- DNS/SG changes are independently reversible. + +### Notes + +- One cert covers app + editor (same host, nginx path-proxy) — no + per-editor-port cert needed. +- Renewal is automatic (certbot timer); no app redeploy on renew. +- After cutover, update the smoke-test base in this runbook and any + `COURSE_GEN_EDITOR_PUBLIC_BASE` references from + `http://18.236.242.248` to `https://`. From 776f2405df9cf2f1c7f932ac56e5b27788c3548b Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 17:52:28 +0530 Subject: [PATCH 03/16] =?UTF-8?q?M47:=20HTTPS=20runbook=20=E2=80=94=20doma?= =?UTF-8?q?in=3Dlabs.scaler.com=20+=20tutor/editor=20safety=20audit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit result (code, $0): tutor + VS Code do NOT break under HTTPS, and tutor history is preserved: - Tutor: page widget + in-editor boot (lab-tutor-editor-boot.js) mount with a RELATIVE baseUrl ("") and credentials:"same-origin". On https://labs.scaler.com (app and /editor//) the origin is the same → same-origin https, Secure+lax cookie sent, no mixed content. LAB_TUTOR_BASE_URL is irrelevant to the browser widget. No tutor change needed. - Tutor history: server-authoritative (Postgres, M34), keyed by (user_id, session_id=lms-) — origin/scheme independent; rehydrated via relative GET /v1/tutor/history. Old origin's localStorage cache isn't carried (origin-scoped) but is rebuilt from the server — no loss. - Editor: existing nginx editor block already sets Upgrade + X-Forwarded-Proto $scheme → code-server auto-serves wss on https; no app change. Runbook changes: bake labs.scaler.com; switch from `certbot --nginx` to `certbot certonly` + hand-add `listen 443 ssl` to the existing conf (the conf has regex /editor/ locations + a sub_filter that --nginx auto-rewrite mangles); .bak-based rollback; smoke now checks the editor websocket (101) and tutor-history rehydrate. Still frozen/not executed. Local commit only. --- docs/DEPLOYMENT_RUNBOOK.md | 99 +++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 27 deletions(-) diff --git a/docs/DEPLOYMENT_RUNBOOK.md b/docs/DEPLOYMENT_RUNBOOK.md index f48cc61..a933998 100644 --- a/docs/DEPLOYMENT_RUNBOOK.md +++ b/docs/DEPLOYMENT_RUNBOOK.md @@ -143,11 +143,11 @@ mandatory. ### Prerequisites (gather before touching the host) -- `` — the hostname that will serve the app (e.g. +- `labs.scaler.com` — the hostname that will serve the app (e.g. `labs.example.com`). Fill in everywhere below. -- DNS: an **A record** ` → 18.236.242.248`, propagated - (`dig +short ` returns the IP) BEFORE running certbot - (HTTP-01 challenge hits `:80`). +- DNS: an **A record** `labs.scaler.com → 18.236.242.248`, propagated + (`dig +short labs.scaler.com` returns the IP) BEFORE running certbot + (HTTP-01 challenge hits `labs.scaler.com:80`). - EC2 **security group**: inbound **443/tcp** open (and keep 80/tcp open — certbot HTTP-01 + the 80→443 redirect need it). AWS console/CLI op, separate from the app deploy. @@ -164,44 +164,89 @@ going live, never before: (`samesite=lax` already). If set true while still on http, the cookie is never sent → total lockout. Honored by `app/api/auth_routes.py` `_set_session_cookie`. -- `COURSE_GEN_EDITOR_PUBLIC_BASE=https://` — in-editor URLs +- `COURSE_GEN_EDITOR_PUBLIC_BASE=https://labs.scaler.com` — in-editor URLs (`learner_studio_service.py`) become https; avoids mixed-content / insecure code-server links. No source changes: grep confirmed no hardcoded `http://18.236...` in `app/` templates/JS; both couplings are env-driven. +### HTTPS-safety audit (code, verified $0 — 2026-05-18) + +- **Tutor: HTTPS-safe, no change.** Both the page widget and the + in-editor widget use a **relative** base. `lab-tutor-editor-boot.js` + mounts with `baseUrl:""`; `lab-tutor.js` fetches `cfg.baseUrl + + "/v1/tutor/*"` with `credentials:"same-origin"`. On + `https://labs.scaler.com` (app) and `https://labs.scaler.com/editor/ + /` (code-server) the origin is identical → calls stay + same-origin https, the `Secure`+`lax` cookie is sent, no mixed + content. `LAB_TUTOR_BASE_URL` is **irrelevant** to the browser widget + (not used by the boot path) — no env needed for the tutor. +- **Tutor history: preserved.** Server-authoritative (Postgres, M34) + keyed by `(user_id, session_id)`; `session_id = lms-` + (origin/scheme-independent). The widget rehydrates via the relative + `GET /v1/tutor/history` on open. The old origin's localStorage cache + isn't visible at the new domain (origin-scoped) but is transparently + rebuilt from the server — **no history loss** (exactly the failure + mode M34 prevents). +- **Editor (code-server): no app change.** The existing + `conf.d/course-gen-codex.conf` editor location already sets + `Upgrade $http_upgrade` / `Connection upgrade` and + `X-Forwarded-Proto $scheme` — over the 443 server `$scheme`=https, so + code-server auto-serves over `wss`. The ONLY risk is nginx-config + mechanics (below), not code. + ### Procedure (run on unfreeze, in this order) -1. Confirm DNS: `dig +short ` → `18.236.242.248`. +1. Confirm DNS: `dig +short labs.scaler.com` → `18.236.242.248`. 2. Open :443 in the EC2 SG; confirm 80 still open. -3. Issue cert + auto-rewrite nginx (keeps :80 serving during issuance): - `sudo certbot --nginx -d --redirect -m - --agree-tos -n` - - `--redirect` adds the 80→443 redirect; certbot installs the - renewal timer (`systemctl status certbot-renew.timer`). -4. Verify nginx still proxies BOTH the app and the editor over TLS: - - `curl -sI https:///login` → 200 - - `curl -sI https:///static/lms.js` → 200 - - editor path: open a live `/editor//` over https. - Ensure the `/editor//` `location` block survived the certbot - rewrite (it edits the default server block; re-add the editor proxy - to the 443 server if missing). +3. **Use `certbot certonly`, NOT `certbot --nginx`.** The live + `conf.d/course-gen-codex.conf` has **regex `location ~ + ^/editor/(?…)`** blocks, WS upgrade headers, and a + `sub_filter` that injects `lab-tutor-editor-boot.js` — `--nginx` + auto-rewrite is known to mangle regex locations / drop `sub_filter`. + Instead: + - `sudo certbot certonly --webroot -w /usr/share/nginx/html + -d labs.scaler.com -m --agree-tos -n` + (or `--standalone` on :80 briefly if no webroot). + - **Hand-edit `conf.d/course-gen-codex.conf`**: add `listen 443 + ssl;` + `ssl_certificate /etc/letsencrypt/live/labs.scaler.com/ + fullchain.pem;` + `ssl_certificate_key …/privkey.pem;` + + `server_name labs.scaler.com;` to the **existing** server block + (which already has the app + editor + sub_filter + WS directives — + reuse it verbatim, just add TLS). Add a second tiny server: + `listen 80; server_name labs.scaler.com; return 301 + https://$host$request_uri;`. + - `sudo nginx -t && sudo systemctl reload nginx`. + - Confirm auto-renew: `systemctl status certbot-renew.timer` + (`certonly` still installs the renewal timer; renewal needs no + nginx rewrite since we edited by hand). +4. Verify BOTH app and editor over TLS: + - `curl -sI https://labs.scaler.com/login` → 200 + - `curl -sI https://labs.scaler.com/static/lms.js` → 200 + - open a live `/editor//` over https → code-server loads, its + **websocket connects** (DevTools → Network → WS shows `101`), and + the lab-tutor bubble appears (the `sub_filter` injection survived). 5. Flip the two env vars (`SESSION_COOKIE_SECURE=true`, - `COURSE_GEN_EDITOR_PUBLIC_BASE=https://`) → + `COURSE_GEN_EDITOR_PUBLIC_BASE=https://labs.scaler.com`) → `sudo systemctl restart course-gen-codex.service`. -6. Smoke (the standard block, but https): login, static assets, - `/v1/tutor/*` 401, a full register→enroll→editor round-trip; verify - the session cookie shows `Secure`; no mixed-content console errors; - editor opens over https. +6. Smoke (standard block, https): login, static assets, `/v1/tutor/*` + 401 unauth, a full register→enroll→editor round-trip; verify the + session cookie shows `Secure`; **open the in-editor tutor, send a + message, reopen — history rehydrates**; no mixed-content console + errors. ### Rollback - App: revert the two env vars → restart (instant; back to working http behavior). -- Edge: certbot kept a backup of the nginx config - (`/etc/nginx/…*.bak` / `certbot rollback`); `sudo nginx -t && - sudo systemctl reload nginx`. Cert files are inert if unreferenced. +- Edge: **before hand-editing, `sudo cp conf.d/course-gen-codex.conf + conf.d/course-gen-codex.conf.pre-tls.bak`** (there are already + `.bak.` siblings — follow that convention). Rollback = restore + the `.bak`, `sudo nginx -t && sudo systemctl reload nginx`. Cert + files are inert if unreferenced. (We use `certbot certonly` + a + hand-edit, so there's no `certbot rollback` of nginx — the `.bak` is + the rollback.) - DNS/SG changes are independently reversible. ### Notes @@ -211,4 +256,4 @@ No source changes: grep confirmed no hardcoded `http://18.236...` in - Renewal is automatic (certbot timer); no app redeploy on renew. - After cutover, update the smoke-test base in this runbook and any `COURSE_GEN_EDITOR_PUBLIC_BASE` references from - `http://18.236.242.248` to `https://`. + `http://18.236.242.248` to `https://labs.scaler.com`. From b471b5df6ee9994a89b7b1d5f0f94f75a9405955 Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 17:56:35 +0530 Subject: [PATCH 04/16] =?UTF-8?q?M48:=20Scaler=20favicon=20site-wide=20+?= =?UTF-8?q?=20backlog=20=C2=A727=20(editor-port=20tutor=20history)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - favicon: learner pages already use the Scaler favicon (lms_page.py BRAND_ICON_URL -> lms_home/lms_courses; login.html hardcodes it). Added the same `` to the creator/internal pages that had none — dashboard.html, docs.html, draft_timeline.html. Site-wide consistent now. - backlog §27: in-editor tutor falls back to an ephemeral `editor-` session_id when editor-context can't map the port to an enrollment; ports change on code-server restart so that history orphans. (IP↔CNAME itself does NOT lose history — session_id is enrollment-derived/origin-independent; verified against live data. The transition artifact is host-scoped cookie + origin-scoped localStorage, restored on re-login.) Local commit only (deploy frozen). --- app/templates/dashboard.html | 1 + app/templates/docs.html | 1 + app/templates/draft_timeline.html | 1 + docs/superpowers/course-gen-backlog.md | 28 ++++++++++++++++++++++++++ 4 files changed, 31 insertions(+) diff --git a/app/templates/dashboard.html b/app/templates/dashboard.html index 659f7b6..fda5d99 100644 --- a/app/templates/dashboard.html +++ b/app/templates/dashboard.html @@ -4,6 +4,7 @@ Create Course Draft + diff --git a/app/templates/docs.html b/app/templates/docs.html index 4b734ff..8183b2b 100644 --- a/app/templates/docs.html +++ b/app/templates/docs.html @@ -4,6 +4,7 @@ API Docs + diff --git a/app/templates/draft_timeline.html b/app/templates/draft_timeline.html index 5b644c2..70e8442 100644 --- a/app/templates/draft_timeline.html +++ b/app/templates/draft_timeline.html @@ -4,6 +4,7 @@ Draft Timeline + diff --git a/docs/superpowers/course-gen-backlog.md b/docs/superpowers/course-gen-backlog.md index 5a47e12..7777657 100644 --- a/docs/superpowers/course-gen-backlog.md +++ b/docs/superpowers/course-gen-backlog.md @@ -852,6 +852,34 @@ nothing but the internal `workspace_seeded.txt` marker. --- +## 27. In-editor tutor history fragments under the ephemeral `editor-` session + +**Status:** Backlog. Generic — affects every lab with the lab tutor. + +**Originating direct fix:** Customer Support Bot. Audited tutor history +across IP↔CNAME (2026-05-18). The DB is fine: history is keyed +`(user_id, session_id)` with `session_id = lms-` +(origin/scheme-independent), so IP vs `labs.scaler.com` does **not** +lose data — the apparent break is just host-scoped auth cookie + +origin-scoped localStorage on the transition (re-login on the new host +restores it). But the live data showed one `editor-35517` session: +`tutor.py` `editor-context` falls back to +`TutorEditorContext(session_id=f"editor-{port}")` when it can't map the +code-server port to an enrollment. Code-server ports are **ephemeral** +(reassigned per container/restart), so any history persisted under +`editor-` is orphaned on the next editor restart — the in-editor +tutor "forgets" independent of any domain change. + +**Generalization:** the in-editor widget must always resolve to the +stable `lms-` session. Options: make `editor-context` +fail closed (no tutor / clear "not linked" state) instead of minting a +volatile `editor-` session; or have the port→enrollment mapping +be authoritative and never persist/serve chat under `editor-`. +Where: `app/api/tutor.py` (`editor-context`), `tutor_service` history +key, `lab-tutor-editor-boot.js`. + +--- + ## How to add to this list When making a direct course fix: From d5463e95e6884b9025e35b8e4ef59413b9301c16 Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 18:01:51 +0530 Subject: [PATCH 05/16] =?UTF-8?q?M49:=20in-editor=20tutor=20session=20neve?= =?UTF-8?q?r=20keyed=20on=20the=20ephemeral=20port=20(=C2=A727)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit editor-context fell back to session_id=f"editor-{port}" when it couldn't map the code-server port to an enrollment. code-server ports are reassigned on every container restart, so persisted tutor history silently fragmented/orphaned across editor restarts (live data showed an `editor-35517` session). Fix: the fallback now resolves the learner's own enrollment and returns the SAME stable `lms-` the LMS-page widget uses (shared history); only if they have no enrollment does it use a stable per-user `editor-user-`. Never `editor-`. The widget still always mounts (unchanged contract). Regression test added (test_tutor_routes): port with no workspace session → lms- when enrolled, else editor-user- — asserts the ephemeral editor- is never returned. Local commit; deploys with the app in this batch. --- app/api/tutor.py | 39 +++++++++++++++++++++++++++++++------- tests/test_tutor_routes.py | 32 +++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/app/api/tutor.py b/app/api/tutor.py index b4dd8dd..21f16ce 100644 --- a/app/api/tutor.py +++ b/app/api/tutor.py @@ -105,12 +105,37 @@ def editor_context( LMS-page widget. Owner-checked; generic fallback on any miss so the widget always mounts and never leaks another learner's title. """ - fallback = TutorEditorContext( - assignment_title="Lab workspace", session_id=f"editor-{port}" - ) store = _store(request) + + def _fallback() -> TutorEditorContext: + # NEVER key on the ephemeral code-server port: it changes on + # every container restart, so an `editor-` session_id + # silently fragments/orphans the learner's tutor history. Fall + # back to the learner's own enrollment (the SAME stable + # `lms-` the LMS-page widget uses — shared + # history); only if they have no enrollment, a stable per-user + # key. Both survive editor restarts. + try: + if store is not None: + enrs = store.list_learner_enrollments( + learner_id=str(user.id), limit=50 + ) + if enrs: # store returns most-recently-updated first + e = enrs[0] + return TutorEditorContext( + assignment_title=getattr(e, "course_title", None) + or "Lab workspace", + session_id=f"lms-{e.id}", + ) + except Exception: + pass + return TutorEditorContext( + assignment_title="Lab workspace", + session_id=f"editor-user-{user.id}", + ) + if store is None: - return fallback + return _fallback() try: sessions = [ s @@ -121,13 +146,13 @@ def editor_context( sessions.sort(key=lambda s: (s.status.value == "running", s.updated_at), reverse=True) session = sessions[0] if sessions else None if session is None: - return fallback + return _fallback() enrollment = store.get_learner_enrollment(session.enrollment_id) if enrollment is None or enrollment.learner_id != str(user.id): - return fallback + return _fallback() return TutorEditorContext( assignment_title=enrollment.course_title or "Lab workspace", session_id=f"lms-{enrollment.id}", ) except Exception: - return fallback + return _fallback() diff --git a/tests/test_tutor_routes.py b/tests/test_tutor_routes.py index f496b73..ea6849b 100644 --- a/tests/test_tutor_routes.py +++ b/tests/test_tutor_routes.py @@ -95,6 +95,38 @@ def test_submit_returns_two_viva_questions(self) -> None: for q in body["viva_questions"]: self.assertTrue(q["prompt"]) + def test_editor_context_never_returns_ephemeral_port_session(self) -> None: + """§27: a port that maps to no workspace session must NOT yield + an `editor-` session_id (ephemeral → fragments tutor + history on every code-server restart). It must resolve to the + learner's stable enrollment session, else a stable per-user id.""" + from types import SimpleNamespace + from unittest.mock import patch + + # (a) learner has an enrollment → stable lms- + store = MagicMock() + store.list_all_learner_workspace_sessions.return_value = [] # no port match + store.list_learner_enrollments.return_value = [ + SimpleNamespace(id="enr_abc", course_title="Customer Support Bot") + ] + with patch("app.api.tutor._store", return_value=store): + r = self.client.get("/v1/tutor/editor-context", params={"port": 47213}) + self.assertEqual(r.status_code, 200) + body = r.json() + self.assertEqual(body["session_id"], "lms-enr_abc") + self.assertNotIn("editor-47213", body["session_id"]) + + # (b) no enrollment → stable per-user id, still never editor- + store2 = MagicMock() + store2.list_all_learner_workspace_sessions.return_value = [] + store2.list_learner_enrollments.return_value = [] + with patch("app.api.tutor._store", return_value=store2): + r2 = self.client.get("/v1/tutor/editor-context", params={"port": 47213}) + self.assertEqual(r2.status_code, 200) + sid = r2.json()["session_id"] + self.assertTrue(sid.startswith("editor-user-")) + self.assertNotIn("editor-47213", sid) + def test_chat_rejects_missing_session_id(self) -> None: resp = self.client.post("/v1/tutor/chat", json={"message": "hi"}) self.assertEqual(resp.status_code, 422) From f0a90554ff78a0e4f53a39af35d305784cbef1df Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 18:08:11 +0530 Subject: [PATCH 06/16] M50: harden tutor brief resolution + de-brittle 2 persona assertions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found while running the suite during deploy (62 tests now green): - tutor_service._resolve_session_context: only accept project_brief_markdown(snap)/deliverables_markdown(...) when they are real non-empty str. A malformed snapshot (or a MagicMock test double) previously let a non-str flow into _build_system_prompt's "".join → TypeError. Defensive in prod, fixes the persistence test. - test_tutor_service: the warm persona prose itself now contains the literal " / " as an instruction, so the two "basic prompt" tests asserting the raw tag is absent were stale/ brittle. Assert the CLOSING tag instead — that only appears when an actual context block is wrapped in by _build_system_prompt. Not caused by this batch (pre-existing brittleness exposed by the full run). Local commit; deploys with the app. --- app/services/tutor_service.py | 12 ++++++++++-- tests/test_tutor_service.py | 11 +++++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/app/services/tutor_service.py b/app/services/tutor_service.py index 2e55246..03604ff 100644 --- a/app/services/tutor_service.py +++ b/app/services/tutor_service.py @@ -323,10 +323,18 @@ def _resolve_session_context(self, session_id: str) -> tuple[str, str, str, str deliverables_markdown, project_brief_markdown, ) - brief = project_brief_markdown(snap) or brief + # Only accept real, non-empty strings — a + # malformed snapshot (or a test double) must not + # let a non-str flow into _build_system_prompt's + # "".join (TypeError). + _b = project_brief_markdown(snap) + if isinstance(_b, str) and _b.strip(): + brief = _b lp = getattr(snap, "learner_package", None) if lp is not None and getattr(lp, "deliverables", None): - delivs = deliverables_markdown(lp.deliverables) or delivs + _d = deliverables_markdown(lp.deliverables) + if isinstance(_d, str) and _d.strip(): + delivs = _d except Exception as exc: logger.debug("[tutor] snapshot brief fallback failed for %s: %s", session_id, exc) diff --git a/tests/test_tutor_service.py b/tests/test_tutor_service.py index 200fe55..c644876 100644 --- a/tests/test_tutor_service.py +++ b/tests/test_tutor_service.py @@ -97,9 +97,12 @@ def test_chat_without_store_uses_basic_system_prompt(self) -> None: system_text = call_kwargs["system"][0]["text"] user_content = call_kwargs["messages"][0]["content"] - # System prompt should be plain persona with no project_brief - self.assertNotIn("", system_text) - self.assertNotIn("", system_text) + # No CONTEXT BLOCK injected. (The persona prose itself mentions + # " / " as an instruction, so check + # for the closing tag — that only appears when a real block is + # wrapped in by _build_system_prompt.) + self.assertNotIn("", system_text) + self.assertNotIn("", system_text) # User content should be the raw message self.assertEqual(user_content, "help me") @@ -117,7 +120,7 @@ def test_chat_with_store_but_no_matching_session_uses_basic_prompt(self) -> None system_text = call_kwargs["system"][0]["text"] user_content = call_kwargs["messages"][0]["content"] - self.assertNotIn("", system_text) + self.assertNotIn("", system_text) self.assertEqual(user_content, "what do I do?") def test_chat_with_store_and_matching_session_includes_context(self) -> None: From a01bd64edbde329fdd0e974b96015b8642772175 Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 18:19:11 +0530 Subject: [PATCH 07/16] =?UTF-8?q?M51:=20HTTPS=20migration=20executed=20?= =?UTF-8?q?=E2=80=94=20live=20at=20https://labs.scaler.com?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Runbook marked EXECUTED with the actual procedure (certbot certonly --nginx authenticator, hand-written nginx conf preserving the editor regex/sub_filter/WS block, env cutover SESSION_COOKIE_SECURE=true + COURSE_GEN_EDITOR_PUBLIC_BASE=https). Live nginx conf committed to infra/nginx/labs.scaler.com.conf (was hand-edited on host; now version-controlled per the "never hand-edit prod outside git" invariant — host backups at *.pre-tls.bak.). Verified end-to-end over https (this session): 80->443 301; Secure session cookie; catalog/enroll/seed; §27 editor-context returns stable lms- (never editor-); tutor chat persisted + rehydrated; full baseline submission grader round-trip → scorecard 0/25 (M45 discrimination intact); dense reference 25/25 over the deployed bundle. --- docs/DEPLOYMENT_RUNBOOK.md | 38 ++++++++++++++++++++--- infra/nginx/labs.scaler.com.conf | 53 ++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 infra/nginx/labs.scaler.com.conf diff --git a/docs/DEPLOYMENT_RUNBOOK.md b/docs/DEPLOYMENT_RUNBOOK.md index a933998..e08ea07 100644 --- a/docs/DEPLOYMENT_RUNBOOK.md +++ b/docs/DEPLOYMENT_RUNBOOK.md @@ -136,10 +136,40 @@ design. Keep that index line if you edit the file. ## HTTPS migration (HTTP → TLS via nginx + certbot) -**Status: PLANNED — not executed (deploy freeze + needs a domain).** -Decision: domain + nginx + Let's Encrypt (`certbot`). A trusted cert -**cannot** be issued for the bare IP `18.236.242.248` — a domain is -mandatory. +**Status: EXECUTED 2026-05-18 — live at https://labs.scaler.com.** +What was actually done (differs slightly from the original plan, kept +for the renewal/rollback record): +- `dnf install -y certbot python3-certbot-nginx` (was not installed). +- Cert issued with the **nginx authenticator, certonly** (no + installer/rewrite): `certbot certonly --nginx -d labs.scaler.com + --agree-tos --register-unsafely-without-email -n`. Auto-renew timer + installed by certbot; renewal re-uses the nginx authenticator (no + webroot needed). **No ops email** — add one later via `certbot + update_account` if expiry mail is wanted. +- nginx conf **hand-written** (not via `--nginx` installer, which + mangles the regex `/editor/` locations + `sub_filter`): a `listen 80 + … return 301 https://$host$request_uri;` redirect server + a `listen + 443 ssl default_server` server that reuses the editor regex + locations / `sub_filter` / WS-upgrade / `X-Forwarded-Proto $scheme` + **verbatim**. Backups: `/etc/nginx/conf.d/course-gen-codex.conf + .pre-tls.bak.`; the staged source is in the repo at + `infra/nginx/labs.scaler.com.conf` (committed) — rollback = restore + the `.bak`, `nginx -t && systemctl reload nginx`. +- Env cutover in `/opt/course-gen-codex/.env` (backed up to + `.env.pre-tls.bak.`): `SESSION_COOKIE_SECURE=true`, + `COURSE_GEN_EDITOR_PUBLIC_BASE=https://labs.scaler.com`; then + `systemctl restart course-gen-codex.service`. +- Verified end-to-end over https: 301 redirect; `Secure` session + cookie; catalog/enroll/seed; in-editor tutor `editor-context` + returns the stable `lms-` (not `editor-`); + tutor chat persisted + rehydrated; a full baseline submission + round-trip (grader Docker pipeline) returned a scorecard. + +Original plan/notes below retained for renewals & rollback. + +**Decision: domain + nginx + Let's Encrypt (`certbot`).** A trusted +cert **cannot** be issued for the bare IP `18.236.242.248` — a domain +is mandatory. ### Prerequisites (gather before touching the host) diff --git a/infra/nginx/labs.scaler.com.conf b/infra/nginx/labs.scaler.com.conf new file mode 100644 index 0000000..24956f5 --- /dev/null +++ b/infra/nginx/labs.scaler.com.conf @@ -0,0 +1,53 @@ +server { + listen 80 default_server; + listen [::]:80 default_server; + server_name _; + return 301 https://$host$request_uri; +} + +server { + listen 443 ssl default_server; + listen [::]:443 ssl default_server; + server_name labs.scaler.com; + + ssl_certificate /etc/letsencrypt/live/labs.scaler.com/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/labs.scaler.com/privkey.pem; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_prefer_server_ciphers off; + + client_max_body_size 10M; + + proxy_read_timeout 300s; + proxy_send_timeout 300s; + proxy_connect_timeout 75s; + + location ~ ^/editor/(?[0-9]+)/(?.*)$ { + proxy_pass http://127.0.0.1:$eport/$rest$is_args$args; + proxy_set_header Accept-Encoding ""; + sub_filter_once on; + sub_filter '' ''; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 3600s; + proxy_send_timeout 3600s; + } + + location ~ ^/editor/(?[0-9]+)$ { + return 302 /editor/$eport/; + } + + location / { + proxy_pass http://127.0.0.1:8040; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + } +} From 8b7e086e02d024ddd06e9922f6adb0b0540e5bf9 Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 18:39:18 +0530 Subject: [PATCH 08/16] M52: fix undefined --ink CSS token in scorecard styling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lms.css used var(--ink) in 7 places (test-result name / we-val / worked-example value text, added in M36/M40) but the design system defines --text (#0f172a) in app-shell.css :root — there is no --ink. Every `color: var(--ink)` was therefore invalid → text fell back to an inherited/wrong colour, making the scorecard/worked-example styling look off. Replaced all 7 with var(--text) (the intended primary ink colour). Other tokens lms.css uses are all defined in app-shell.css. Static-only; deployed; served lms.css confirmed 0 occurrences. Regression introduced by M36/M40 — local commit. --- app/static/lms.css | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/app/static/lms.css b/app/static/lms.css index 5cb99e7..32a951f 100644 --- a/app/static/lms.css +++ b/app/static/lms.css @@ -948,7 +948,7 @@ .review-guidance-summary { margin: 10px 0 0; - color: var(--ink); + color: var(--text); line-height: 1.55; } @@ -971,7 +971,7 @@ .review-guidance-section ul, .review-guidance-section ol { margin: 0; - color: var(--ink); + color: var(--text); font-size: 13px; line-height: 1.55; } @@ -1083,7 +1083,7 @@ .test-result-name { font-weight: 600; - color: var(--ink); + color: var(--text); font-size: 13px; word-break: break-word; } @@ -1136,7 +1136,7 @@ .test-result-headline { margin: 6px 0 0; - color: var(--ink); + color: var(--text); font-size: 13px; line-height: 1.5; } @@ -1145,7 +1145,7 @@ list-style: disc; margin: 0; padding: 12px 16px 14px 36px; - color: var(--ink); + color: var(--text); font-size: 13px; line-height: 1.55; border-top: 1px solid var(--line); @@ -1186,7 +1186,7 @@ font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 12px; line-height: 1.5; - color: var(--ink); + color: var(--text); white-space: pre-wrap; word-break: break-word; background: transparent; @@ -1207,7 +1207,7 @@ .test-result-example .we-hint-text { font-size: 13px; line-height: 1.55; - color: var(--ink); + color: var(--text); } .writeup-shell { From 9addc77bef0e7a1efd4535398496b3567c598b49 Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 18:44:39 +0530 Subject: [PATCH 09/16] M53: give the tutor the learner's actual workspace code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The in-editor tutor was telling learners "I don't see your app.py — paste it" because: - LearnerWorkspaceSession.workspace_root is None on editor sessions, so _resolve_session_context skipped the code/brief block entirely. - _read_brief_files still read project_brief.md/deliverables.md, which M41 stopped seeding (workspaces now ship a single README.md). Fix: - Resolve the live workspace deterministically from the enrollment: /learner_workspaces/// workspace (same path lms_service uses), used whenever the session row's workspace_root is unset. Then _build_code_snapshot reads the learner's real files (incl. public/starter/app.py). - _read_brief_files prefers the consolidated README.md; legacy split files kept as fallback for old workspaces. Verified on staging ($0, no LLM): for a real support_bot enrollment _resolve_session_context now returns brief=6989B + code=17852B with public/starter/app.py included. Tutor suite: 27 passed. --- app/services/tutor_service.py | 59 +++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/app/services/tutor_service.py b/app/services/tutor_service.py index 03604ff..28825f5 100644 --- a/app/services/tutor_service.py +++ b/app/services/tutor_service.py @@ -183,8 +183,22 @@ def _build_code_snapshot(root: Path) -> str: return "File tree:\n" + "\n".join(f" {ln}" for ln in tree_lines) + "\n\nFile contents:\n" + "".join(body_lines) +# Learner workspaces live at /learner_workspaces// +# /workspace (mirrors lms_service._workspace_root). +# tutor_service is app/services/, so parents[2] == repo root. +_LEARNER_WS_BASE = Path(__file__).resolve().parents[2] / "learner_workspaces" + + def _read_brief_files(root: Path) -> tuple[str, str]: - """Return (project_brief, deliverables) as strings.""" + """Return (project_brief, deliverables) as strings. + + Workspaces now seed a single consolidated ``README.md`` (the brief + + a Deliverables section); ``project_brief.md`` / ``deliverables.md`` + are legacy and no longer written. Prefer README; fall back to the + legacy split files for older workspaces.""" + readme = _read_text_safely(root / "README.md", 48 * 1024) or "" + if readme.strip(): + return readme.strip(), "" brief = _read_text_safely(root / "project_brief.md", 32 * 1024) or "" delivs = _read_text_safely(root / "deliverables.md", 32 * 1024) or "" return brief.strip(), delivs.strip() @@ -303,22 +317,47 @@ def _resolve_session_context(self, session_id: str) -> tuple[str, str, str, str if enrollment_id is None and session is not None: enrollment_id = getattr(session, "enrollment_id", None) + # Resolve the enrollment once — used both to locate the live + # learner workspace and for the snapshot-brief fallback. + enr = None + if enrollment_id: + try: + enr = self._store.get_learner_enrollment(enrollment_id) + except Exception as exc: + logger.debug("[tutor] enrollment lookup failed for %s: %s", session_id, exc) + enr = None + + # Find the learner's live workspace dir so the tutor can READ + # their code (the #1 thing it needs to actually help). Prefer the + # session's workspace_root; it is frequently unset on editor + # sessions, so fall back to the deterministic LMS path + # learner_workspaces///workspace. brief, delivs, code = "", "", "" + ws_dir: Path | None = None if session is not None: - workspace_root = Path(getattr(session, "workspace_root", "") or "") - if str(workspace_root) and workspace_root.exists(): - brief, delivs = _read_brief_files(workspace_root) - code = _build_code_snapshot(workspace_root) + sroot = Path(getattr(session, "workspace_root", "") or "") + if str(sroot) and sroot.exists(): + ws_dir = sroot + if ws_dir is None and enr is not None: + cand = ( + _LEARNER_WS_BASE + / str(enr.learner_id) + / str(enr.shared_workflow_run_id) + / "workspace" + ) + if cand.exists(): + ws_dir = cand + if ws_dir is not None: + brief, delivs = _read_brief_files(ws_dir) + code = _build_code_snapshot(ws_dir) # No materialized workspace yet (learner hasn't opened the editor) # — use the publish snapshot's brief so the tutor still has the # full spec. The tutor must NEVER ask the learner to paste it. - if not brief and enrollment_id: + if not brief and enr is not None: try: - enr = self._store.get_learner_enrollment(enrollment_id) - if enr is not None: - snap = self._store.get_publish_snapshot(enr.publish_snapshot_id) - if snap is not None: + snap = self._store.get_publish_snapshot(enr.publish_snapshot_id) + if snap is not None: from app.services.learner_package_runtime import ( deliverables_markdown, project_brief_markdown, From bb83233ba550f69e0b7c346b346ccceda84c6455 Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 18:49:14 +0530 Subject: [PATCH 10/16] M54: one pass/fail verdict everywhere + fix pill row alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The same submission read three different ways: submission-history pill used raw backend status ("Failed" unless 25/25), the submit-panel banner used `latestSubmission.status === "passed"` ("Project needs another pass"), and the Latest-Project-Review card/deliverable chips used isSolved with a rogue `passed >= 15` heuristic ("Solved", green). So 20/25 showed Failed AND Solved AND needs-another-pass at once. Fix: a single learner-facing rule for ALL surfaces — isSolved() = pass_rate >= PASS_RATE_BAR (0.88 = the documented ">= 22/25" bar; backendStatus==="passed" still an override). Removed GOOD_ACCURACY_MARKS. Wired the history pill + submit banner + toast through isSolved so banner / pill / latest-review / chips always agree. 20/25 → "needs work" everywhere; >=22/25 → solved everywhere; matches the brief. Also: .submission-item-meta now align-items:center so the status pill and the Pass rate / Submitted info-pills sit on one centered baseline (reported "alignment is still off"). Static-only; deployed; served lms.js/css verified. --- app/static/lms.css | 1 + app/static/lms.js | 29 +++++++++++++++++++---------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/app/static/lms.css b/app/static/lms.css index 32a951f..b26aa59 100644 --- a/app/static/lms.css +++ b/app/static/lms.css @@ -647,6 +647,7 @@ .submission-item-meta { display: flex; flex-wrap: wrap; + align-items: center; gap: 8px; } diff --git a/app/static/lms.js b/app/static/lms.js index 86c2d94..6d8cbbc 100644 --- a/app/static/lms.js +++ b/app/static/lms.js @@ -49,9 +49,13 @@ return String(t ?? "").replace(/^\s*Production(?:-Quality)?\s+/i, "").trim(); } - // 15+/18 (>=15 marks) is treated as "solved"/green in the UI. The - // backend status stays authoritative for data; this is render-only. - const GOOD_ACCURACY_MARKS = 15; + // ONE learner-facing pass rule for EVERY surface (submit banner, + // submission-history pill, latest-review card, deliverable chips) so + // the same submission can never read "Failed" in one place and + // "Solved" in another. The bar is the documented one ("≥ 22 / 25" in + // the brief = 88%). backendStatus === "passed" stays an override for + // any flow that already records a hard pass. + const PASS_RATE_BAR = 0.88; // Concept-level hint per failing rubric — bridges the gap between // "what's wrong" (the worked example) and the IR/technique a learner @@ -139,8 +143,10 @@ } function isSolved(passed, total, backendStatus) { - const p = Number(passed || 0); - return backendStatus === "passed" || p >= GOOD_ACCURACY_MARKS || (total && p === Number(total)); + if (backendStatus === "passed") return true; + const t = Number(total || 0); + if (!t) return false; + return Number(passed || 0) / t >= PASS_RATE_BAR; } function titleCase(value) { @@ -1338,9 +1344,9 @@ ${historyByAttempt.map((submission) => `
${escapeHtml(formatDate(submission.created_at))} -

${escapeHtml(titleCase(submission.status))} · ${escapeHtml(`${submission.passed_tests}/${submission.total_tests} checks passed`)}

+

${escapeHtml(`${isSolved(submission.passed_tests, submission.total_tests, submission.status) ? "Solved" : "Needs work"} · ${submission.passed_tests}/${submission.total_tests} checks passed`)}

- ${renderStatusPill(submission.status === "passed" ? "passed" : "blocked", titleCase(submission.status))} + ${renderStatusPill(isSolved(submission.passed_tests, submission.total_tests, submission.status) ? "passed" : "blocked", isSolved(submission.passed_tests, submission.total_tests, submission.status) ? "Solved" : "Needs work")} ${renderInfoPill("Pass rate", percent(submission.pass_rate))} ${renderInfoPill("Submitted", formatDate(submission.created_at))}
@@ -1614,9 +1620,12 @@ const latestSubmission = gradedExperience.latest_assignment_submission; stopSubmitProgress(); + const latestSolved = latestSubmission + ? isSolved(latestSubmission.passed_tests, latestSubmission.total_tests, latestSubmission.status) + : false; uiState.submissionFeedback = { - kind: latestSubmission?.status === "passed" ? "success" : "error", - title: latestSubmission?.status === "passed" ? "Project reviewed" : "Project needs another pass", + kind: latestSolved ? "success" : "error", + title: latestSolved ? "Project reviewed" : "Project needs another pass", message: latestSubmission ? `${latestSubmission.passed_tests}/${latestSubmission.total_tests} checks passed.` : "Grading finished.", @@ -1627,7 +1636,7 @@ if (latestSubmission) { showToast( - latestSubmission.status === "passed" ? "success" : "info", + latestSolved ? "success" : "info", "Review finished", `${latestSubmission.passed_tests}/${latestSubmission.total_tests} checks passed.` ); From 0b5965736bae50c40bfc803d71cacd810dfd54a6 Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 18:59:18 +0530 Subject: [PATCH 11/16] M55: stop the tutor dropping the learner's app.py (size-cap bug) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit M53 correctly resolved the workspace path, but the tutor still said "I can't see your app.py, paste it". Root cause: _PER_FILE_MAX_BYTES was 8KB and _read_text_safely RETURNED NONE for any file over it, so _build_code_snapshot skipped it. A learner who has actually implemented app.py (~11KB here) exceeds 8KB → app.py dropped entirely (path stayed in the file tree, contents omitted → "its contents weren't included"). The more complete the submission, the more certain the tutor couldn't see it. mtime-desc ordering also let platform-refreshed README/.vscode crowd it out. Fix: - _read_text_safely TRUNCATES over-limit files (with a marker) and bounded-reads, instead of dropping them. Never silently lose the main code file again. - Caps raised: per-file 8KB->24KB, total 40KB->64KB (still bounded). - _build_code_snapshot ranks learner SOURCE files (_CODE_SUFFIXES) ahead of README/sample-data/config, then by mtime — app.py always wins the budget. Verified on the live active workspace: snapshot now includes `--- public/starter/app.py ---` (full contents), listed first; tutor suite 27 passed. --- app/services/tutor_service.py | 45 ++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/app/services/tutor_service.py b/app/services/tutor_service.py index 28825f5..705faf0 100644 --- a/app/services/tutor_service.py +++ b/app/services/tutor_service.py @@ -96,9 +96,20 @@ _SKIP_SUFFIXES = {".lock", ".pyc", ".pyo", ".so", ".o", ".a", ".class", ".jar"} _BINARY_SUFFIXES = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".pdf", ".zip", ".tar", ".gz", ".bin"} -# Hard size limits to keep prompt cost bounded. -_PER_FILE_MAX_BYTES = 8 * 1024 # skip a file if it exceeds this on its own -_TOTAL_CODE_MAX_BYTES = 40 * 1024 # truncate code-snapshot collection at this aggregate +# Hard size limits to keep prompt cost bounded. NOTE: over-limit files +# are TRUNCATED, never skipped — a learner's implemented app.py is +# routinely >8KB and dropping it entirely is why the tutor used to say +# "I can't see your app.py, paste it". +_PER_FILE_MAX_BYTES = 24 * 1024 +_TOTAL_CODE_MAX_BYTES = 64 * 1024 + +# Source files the learner actually edits — these win the snapshot +# budget over README/sample-data/config so the tutor always sees code. +_CODE_SUFFIXES = { + ".py", ".js", ".ts", ".tsx", ".jsx", ".go", ".java", ".rb", ".rs", + ".c", ".cc", ".cpp", ".h", ".hpp", ".cs", ".php", ".kt", ".swift", + ".scala", ".sql", ".sh", ".rs", ".ex", ".exs", +} # Failure detail caps. _FAILURE_MAX_BYTES = 4 * 1024 @@ -125,12 +136,19 @@ def _load_env_file(path: str | None) -> None: def _read_text_safely(path: Path, max_bytes: int) -> str | None: + """Read up to ``max_bytes`` of text. Over-limit files are + TRUNCATED (with a marker), never dropped — the learner's main code + file is frequently large and silently skipping it blinds the tutor. + Returns None only for non-files / OS errors.""" try: if not path.is_file(): return None - if path.stat().st_size > max_bytes: - return None - return path.read_text(encoding="utf-8", errors="replace") + with path.open("rb") as fh: + raw = fh.read(max_bytes + 1) + text = raw[:max_bytes].decode("utf-8", errors="replace") + if len(raw) > max_bytes: + text += "\n... [truncated]\n" + return text except OSError as exc: logger.debug("[tutor] could not read %s: %s", path, exc) return None @@ -158,8 +176,19 @@ def _build_code_snapshot(root: Path) -> str: if not files: return "" - # Sort by modification time (newest first) so the learner's recent edits win the budget. - files.sort(key=lambda pair: pair[1].stat().st_mtime, reverse=True) + # Learner source code first (so app.py always wins the budget over + # README / sample data / config, whose mtimes get bumped by platform + # re-publishes), then most-recently-modified within each group. + def _rank(pair): + rel, abs_path = pair + is_code = abs_path.suffix.lower() in _CODE_SUFFIXES + try: + mtime = abs_path.stat().st_mtime + except OSError: + mtime = 0.0 + return (0 if is_code else 1, -mtime) + + files.sort(key=_rank) tree_lines = [str(rel) for rel, _ in files[:80]] body_lines: list[str] = [] From c8abfabc7510312e4748024addcf4360ed895645 Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 19:00:44 +0530 Subject: [PATCH 12/16] M56: pin app.py to always-included in the tutor code snapshot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per request: hardcode the learner's primary file so it can never be crowded out. After the general code-first ranking, a stable re-sort pins public/starter/app.py (then any app.py) to the very front, so it always gets the snapshot budget first; combined with M55's truncate-not-skip it is guaranteed included (truncated only if it ever exceeds the 24KB per-file cap). Added TODO(tutor-context): generalise beyond a hardcoded app.py — derive the learner-editable file set from the course spec (learner_brief.files_to_edit / deliverable visible_files) so every course's primary file(s) get the same guaranteed inclusion. Verified on the live workspace: app.py listed first, contents present; tutor suite 27 passed. --- app/services/tutor_service.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/app/services/tutor_service.py b/app/services/tutor_service.py index 705faf0..04d0263 100644 --- a/app/services/tutor_service.py +++ b/app/services/tutor_service.py @@ -176,8 +176,8 @@ def _build_code_snapshot(root: Path) -> str: if not files: return "" - # Learner source code first (so app.py always wins the budget over - # README / sample data / config, whose mtimes get bumped by platform + # Learner source code first (so it wins the budget over README / + # sample data / config, whose mtimes get bumped by platform # re-publishes), then most-recently-modified within each group. def _rank(pair): rel, abs_path = pair @@ -190,6 +190,21 @@ def _rank(pair): files.sort(key=_rank) + # TODO(tutor-context): the set of "the file(s) the learner edits" + # is hardcoded to app.py for now. Generalise this — derive it from + # the deliverable's editable/visible files (the course spec already + # knows `learner_brief.files_to_edit` / `visible_files`) so any + # course's primary file(s) get the same guaranteed inclusion. + # Until then: pin app.py (prefer public/starter/app.py) to the very + # front so it is ALWAYS included regardless of ranking/budget. + _APP_PY = "public/starter/app.py" + files.sort( + key=lambda pair: ( + 0 if str(pair[0]) == _APP_PY else + 1 if pair[1].name == "app.py" else 2 + ) + ) + tree_lines = [str(rel) for rel, _ in files[:80]] body_lines: list[str] = [] budget = _TOTAL_CODE_MAX_BYTES From 4f7c7304bd417e0d3a71ec0f91da1a520a891e71 Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 19:06:42 +0530 Subject: [PATCH 13/16] M57: tutor panel large by default + diagram enlarge actually enlarges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Default UI large: the panel now mounts in the existing lt-panel--expanded state (820px) with the header button showing the collapse/reduce icon. Learners get room for diagrams + long replies by default and can shrink to the compact 360px with one click. 2. "Click to enlarge" now works: the lightbox modal was correct but .lt-mermaid-modal-inner svg kept Mermaid's inline width, so the diagram opened at the same small size — looked like it "only gave a copy-source dialog". Force the svg to width:100% !important within a 92vw inner so it genuinely scales up. Note: a diagram that FAILS to render (invalid LLM-emitted mermaid, e.g. the quadrantChart) still shows only source — that's the separate mermaid-repair backlog item, not this change. Static-only; deployed; served js/css verified. --- app/static/lab-tutor.css | 14 +++++++++----- app/static/lab-tutor.js | 10 +++++++--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/app/static/lab-tutor.css b/app/static/lab-tutor.css index 8ad8e34..f869eaf 100644 --- a/app/static/lab-tutor.css +++ b/app/static/lab-tutor.css @@ -430,14 +430,18 @@ background: #fff; border-radius: 10px; padding: 20px; - max-width: 94vw; - max-height: 82vh; + width: 92vw; + max-width: 92vw; + max-height: 84vh; overflow: auto; } +/* Force the diagram to scale UP to fill the lightbox. Mermaid emits an + inline max-width/width on the that otherwise pins it at its + small in-panel size, so "Click to enlarge" appeared to do nothing. */ .lt-mermaid-modal-inner svg { - width: auto; - height: auto; - max-width: 88vw; + width: 100% !important; + max-width: none !important; + height: auto !important; display: block; } .lt-mermaid pre { diff --git a/app/static/lab-tutor.js b/app/static/lab-tutor.js index 8ef3733..320db63 100644 --- a/app/static/lab-tutor.js +++ b/app/static/lab-tutor.js @@ -279,8 +279,9 @@ const expandBtn = document.createElement("button"); expandBtn.className = "lt-icon-btn lt-expand"; expandBtn.type = "button"; - expandBtn.setAttribute("aria-label", "Maximise Lab Tutor"); - expandBtn.appendChild(expandIcon()); + // Default to the large/maximised panel; the button reduces it. + expandBtn.setAttribute("aria-label", "Minimise Lab Tutor"); + expandBtn.appendChild(collapseIcon()); const closeBtn = document.createElement("button"); closeBtn.className = "lt-icon-btn lt-close"; @@ -296,7 +297,10 @@ header.appendChild(headerTitle); header.appendChild(headerActions); - let expanded = false; + // Large by default — diagrams + long replies need the room; the + // header button (collapseIcon, set above) reduces it back to compact. + let expanded = true; + panel.classList.add("lt-panel--expanded"); function toggleExpanded(force) { const next = typeof force === "boolean" ? force : !expanded; if (next === expanded) return; From 850b7bf3834dd41d752d418a9a40f936572a621f Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 19:08:50 +0530 Subject: [PATCH 14/16] M58: fix info-pill value misalignment (.submission-item strong bleed) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "Pass rate 64%" — the value sat larger and vertically offset from its label. Root cause: `.submission-item strong` (the date-heading rule: display:block; font-size:14px; margin-bottom:6px) is a descendant match for the inside .info-pill too; equal specificity to `.info-pill strong` but later in the file, so it won — the value rendered block-level, 14px, with a bottom margin and couldn't center against the 12px label. Fix: `.info-pill strong` now explicitly resets display:inline, margin:0, font-size/line-height:inherit (so it matches the label and the flex `align-items:center` actually centers it). Static-only; deployed; served css verified. --- app/static/lms.css | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/app/static/lms.css b/app/static/lms.css index b26aa59..73eebdd 100644 --- a/app/static/lms.css +++ b/app/static/lms.css @@ -512,6 +512,14 @@ } .info-pill strong { + /* Reset the bleed from `.submission-item strong` (the date-heading + rule: display:block; font-size:14px; margin-bottom:6px) — equal + specificity but later in the file, so it otherwise wins and the + value renders block-level, larger, and offset from its label. */ + display: inline; + margin: 0; + font-size: inherit; + line-height: inherit; color: var(--text); font-weight: 600; } From 1e833ea46d4d4447b11fd5276421844c0cc202e4 Mon Sep 17 00:00:00 2001 From: Tushar Bisht Date: Mon, 18 May 2026 19:25:33 +0530 Subject: [PATCH 15/16] M59: enlarge diagram = re-render fresh (was an empty white box) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lightbox re-injected the panel's existing SVG string. Mermaid scopes its