diff --git a/.github/workflows/accessibility-regression.yml b/.github/workflows/accessibility-regression.yml index 003a483..6d2bd7d 100644 --- a/.github/workflows/accessibility-regression.yml +++ b/.github/workflows/accessibility-regression.yml @@ -56,6 +56,7 @@ jobs: - name: Install Python dependencies run: | python -m pip install --upgrade pip + python -m pip install --upgrade --force-reinstall "git+https://github.com/Community-Access/quill-glow-core.git@main" python -m pip install -e ./desktop python -m pip install -e ./web diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index e6d37a6..a51ec6c 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -60,6 +60,7 @@ jobs: done } + install_with_retry 4 python -m pip install --upgrade --force-reinstall "git+https://github.com/Community-Access/quill-glow-core.git@main" install_with_retry 4 python -m pip install -e ./desktop install_with_retry 4 python -m pip install -e ./web install_with_retry 4 python -m pip install -r ./mcp_server/requirements.txt diff --git a/.github/workflows/feature-flags-ci.yml b/.github/workflows/feature-flags-ci.yml index c0dd6bb..9979437 100644 --- a/.github/workflows/feature-flags-ci.yml +++ b/.github/workflows/feature-flags-ci.yml @@ -39,14 +39,17 @@ jobs: - name: Install dependencies (desktop and web dev) run: | python -m pip install --upgrade pip setuptools wheel + python -m pip install --upgrade --force-reinstall "git+https://github.com/Community-Access/quill-glow-core.git@main" python -m pip install -e desktop/ python -m pip install -e web[dev] - name: Run feature flags migration run: python web/tools/run_migrate.py - - name: Run web tests - run: python -m pytest web/ -q + - name: Run focused feature-flag regression tests + run: | + python -m pytest web/tests/test_admin_flags.py -q + python -m pytest web/tests/test_ai_feature_gates.py -q env: # ensure sqlite backend for audit tests FEATURE_FLAGS_BACKEND: sqlite diff --git a/README.md b/README.md index e78640d..2e00555 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,27 @@ Current rules are intentionally low-noise and focus on: - Produces PowerShell scripts for configuring Word document styles - Detects and uses external tools (markdownlint, Pandoc) when available +## Shared core architecture (retrofit baseline) + +GLOW now includes a shared service package at: + +- `desktop/src/acb_large_print_core/` + +This package is the canonical dispatch layer for **audit**, **fix**, and **MarkItDown conversion** across CLI, desktop, and web surfaces. + +Current shared entry points: + +- `acb_large_print_core.services.audit_by_extension(...)` +- `acb_large_print_core.services.fix_by_extension(...)` +- `acb_large_print_core.services.convert_to_markdown(...)` +- `acb_large_print_core.versions.get_component_versions()` + +Design intent: + +- Keep UX-specific behavior in each interface (web routes, GUI dialogs, CLI output formatting). +- Keep business logic dispatch and component-version provenance in shared core APIs. +- Enable future extraction to an external reusable package without changing app-layer behavior. + ## Recent Fix Workflow Updates (April 2026) - Fix Results now suppresses `ACB-FAUX-HEADING` from post-fix scoring when heading detection is explicitly disabled, and shows a "Suppressed by your settings" note for transparency. @@ -169,6 +190,7 @@ lp/ announcement.md Press release / announcement prd.md Canonical web app product requirements document deployment.md Step-by-step server deployment guide + shared-core-retrofit.md Shared service-core architecture and migration status samples/ *.md Example Markdown source files *.html Converted HTML output files @@ -181,7 +203,8 @@ lp/ Dockerfile Production container image docker-compose.yml Compose file for deployment desktop/ Desktop CLI + GUI (Python) - src/acb_large_print/ Core library (canonical source of truth) + src/acb_large_print_core/ Shared service core (canonical dispatch layer) + src/acb_large_print/ App-layer CLI/GUI + format engines office-addin/ Office.js Word Add-in (TypeScript) src/ TypeScript port of audit/fix/template vendor/ Vendored third-party source diff --git a/desktop/pyproject.toml b/desktop/pyproject.toml index 43d1304..c600c66 100644 --- a/desktop/pyproject.toml +++ b/desktop/pyproject.toml @@ -28,6 +28,7 @@ classifiers = [ "Topic :: Adaptive Technologies", ] dependencies = [ + "quill-glow-core>=0.1.0", "python-docx>=1.2.0", "mammoth>=1.11.0,<1.12.0", "markitdown[all]>=0.1.5", diff --git a/desktop/requirements.txt b/desktop/requirements.txt index aa37bd0..2ec7294 100644 --- a/desktop/requirements.txt +++ b/desktop/requirements.txt @@ -1,4 +1,5 @@ # Core dependencies +quill-glow-core>=0.1.0 python-docx>=1.1.0 mammoth>=1.11.0,<1.12.0 markitdown[all]>=0.1.5 diff --git a/desktop/src/acb_large_print/__init__.py b/desktop/src/acb_large_print/__init__.py index fa34ddc..27ccdbd 100644 --- a/desktop/src/acb_large_print/__init__.py +++ b/desktop/src/acb_large_print/__init__.py @@ -1,5 +1,13 @@ -"""ACB Large Print Tool -- audit, fix, and template Word documents for ACB compliance.""" +"""ACB Large Print Tool -- audit, fix, and convert accessible documents.""" + +from __future__ import annotations -__version__ = "5.0.0" __app_name__ = "ACB Large Print Tool" __author__ = "BITS (Blind Information Technology Solutions)" + +try: + from .version import get_version as _get_version + + __version__ = _get_version() +except Exception: + __version__ = "unknown" diff --git a/desktop/src/acb_large_print/cli.py b/desktop/src/acb_large_print/cli.py index fc3464c..1e63c4a 100644 --- a/desktop/src/acb_large_print/cli.py +++ b/desktop/src/acb_large_print/cli.py @@ -713,28 +713,14 @@ def _build_parser() -> argparse.ArgumentParser: # ── Supported file extensions ───────────────────────────────────────── -SUPPORTED_EXTENSIONS = {".docx", ".xlsx", ".pptx", ".epub"} +SUPPORTED_EXTENSIONS = {".docx", ".xlsx", ".pptx", ".md", ".pdf", ".epub"} def _audit_by_extension(file_path: Path): """Dispatch to the correct auditor based on file extension.""" - ext = file_path.suffix.lower() - if ext == ".xlsx": - from .xlsx_auditor import audit_workbook + from quill_glow_core import audit_by_extension - return audit_workbook(file_path) - elif ext == ".pptx": - from .pptx_auditor import audit_presentation - - return audit_presentation(file_path) - elif ext == ".epub": - from .epub_auditor import audit_epub - - return audit_epub(file_path) - else: - from .auditor import audit_document - - return audit_document(file_path) + return audit_by_extension(file_path) def _resolve_list_indent(args: argparse.Namespace) -> tuple[float, float]: @@ -819,61 +805,17 @@ def _fix_by_extension( Returns (output_path, total_fixes, fix_records, post_audit, warnings). """ - ext = file_path.suffix.lower() - if ext == ".xlsx": - from .xlsx_auditor import audit_workbook - - post_audit = audit_workbook(file_path) - return ( - file_path, - 0, - [], - post_audit, - [ - "Excel workbooks cannot be auto-fixed yet. " - "Review the audit findings and fix them manually in Excel." - ], - ) - elif ext == ".pptx": - from .pptx_auditor import audit_presentation - - post_audit = audit_presentation(file_path) - return ( - file_path, - 0, - [], - post_audit, - [ - "PowerPoint presentations cannot be auto-fixed yet. " - "Review the audit findings and fix them manually in PowerPoint." - ], - ) - elif ext == ".epub": - from .epub_auditor import audit_epub + from quill_glow_core import fix_by_extension - post_audit = audit_epub(file_path) - return ( - file_path, - 0, - [], - post_audit, - [ - "ePub files cannot be auto-fixed yet. " - "Review the audit findings and fix them in your ePub editor." - ], - ) - else: - from .fixer import fix_document - - return fix_document( - file_path, - output_path=output_path, - bound=bound, - list_indent_in=list_indent_in, - list_hanging_in=list_hanging_in, - para_indent_in=para_indent_in, - first_line_indent_in=first_line_indent_in, - ) + return fix_by_extension( + file_path, + output_path=output_path, + bound=bound, + list_indent_in=list_indent_in, + list_hanging_in=list_hanging_in, + para_indent_in=para_indent_in, + first_line_indent_in=first_line_indent_in, + ) def _cmd_audit(args: argparse.Namespace) -> int: @@ -885,9 +827,10 @@ def _cmd_audit(args: argparse.Namespace) -> int: return 1 ext = args.file.suffix.lower() - if ext not in (".docx", ".xlsx", ".pptx", ".epub"): + if ext not in SUPPORTED_EXTENSIONS: print( - f"Error: Unsupported file type '{ext}'. Use .docx, .xlsx, .pptx, or .epub.", + "Error: Unsupported file type " + f"'{ext}'. Use .docx, .xlsx, .pptx, .md, .pdf, or .epub.", file=sys.stderr, ) return 1 @@ -927,7 +870,8 @@ def _cmd_fix(args: argparse.Namespace) -> int: ext = args.file.suffix.lower() if ext not in SUPPORTED_EXTENSIONS: print( - f"Error: Unsupported file type '{ext}'. Use .docx, .xlsx, .pptx, or .epub.", + "Error: Unsupported file type " + f"'{ext}'. Use .docx, .xlsx, .pptx, .md, .pdf, or .epub.", file=sys.stderr, ) return 1 @@ -1291,7 +1235,8 @@ def _print_wcag_language_report(report) -> None: def _cmd_convert(args: argparse.Namespace) -> int: """Execute the convert command.""" - from .converter import CONVERTIBLE_EXTENSIONS, convert_to_markdown + from .converter import CONVERTIBLE_EXTENSIONS + from quill_glow_core import convert_to_markdown from .wcag_language import analyze_text_for_wcag_language if not args.file.exists(): diff --git a/desktop/src/acb_large_print/cli_main.py b/desktop/src/acb_large_print/cli_main.py index 2bc5ff3..a0dacad 100644 --- a/desktop/src/acb_large_print/cli_main.py +++ b/desktop/src/acb_large_print/cli_main.py @@ -6,6 +6,13 @@ def main() -> None: + try: + from quill_glow_core import configure_default_services as _configure_shared_core_default + + _configure_shared_core_default() + except Exception: + pass + from acb_large_print.cli import main as cli_main sys.exit(cli_main(force_cli=True)) diff --git a/desktop/src/acb_large_print_core/__init__.py b/desktop/src/acb_large_print_core/__init__.py new file mode 100644 index 0000000..c99c73a --- /dev/null +++ b/desktop/src/acb_large_print_core/__init__.py @@ -0,0 +1,23 @@ +"""Shared core services for GLOW audit/fix/convert workflows.""" + +from .services import ( + CONVERTIBLE_EXTENSIONS, + MARKITDOWN_AUDIO_EXTENSIONS, + SUPPORTED_AUDIT_EXTENSIONS, + SUPPORTED_FIX_EXTENSIONS, + audit_by_extension, + convert_to_markdown, + fix_by_extension, +) +from .versions import get_component_versions + +__all__ = [ + "CONVERTIBLE_EXTENSIONS", + "MARKITDOWN_AUDIO_EXTENSIONS", + "SUPPORTED_AUDIT_EXTENSIONS", + "SUPPORTED_FIX_EXTENSIONS", + "audit_by_extension", + "convert_to_markdown", + "fix_by_extension", + "get_component_versions", +] diff --git a/desktop/src/acb_large_print_core/services.py b/desktop/src/acb_large_print_core/services.py new file mode 100644 index 0000000..abcd96d --- /dev/null +++ b/desktop/src/acb_large_print_core/services.py @@ -0,0 +1,173 @@ +"""Canonical shared dispatch services for GLOW desktop/web/CLI.""" + +from __future__ import annotations + +from pathlib import Path + +SUPPORTED_AUDIT_EXTENSIONS = {".docx", ".xlsx", ".pptx", ".md", ".pdf", ".epub"} +SUPPORTED_FIX_EXTENSIONS = set(SUPPORTED_AUDIT_EXTENSIONS) + + +def audit_by_extension( + file_path: str | Path, + *, + list_indent_in: float | None = None, + list_level_indents: dict[int, float] | None = None, + para_indent_in: float | None = None, + first_line_indent_in: float | None = None, + style_size_overrides: dict[str, float] | None = None, +): + """Run the appropriate auditor for the file extension.""" + path = Path(file_path) + ext = path.suffix.lower() + + if ext == ".xlsx": + from acb_large_print.xlsx_auditor import audit_workbook + + return audit_workbook(path) + if ext == ".pptx": + from acb_large_print.pptx_auditor import audit_presentation + + return audit_presentation(path) + if ext == ".md": + from acb_large_print.md_auditor import audit_markdown + + return audit_markdown(path) + if ext == ".pdf": + from acb_large_print.pdf_auditor import audit_pdf + + return audit_pdf(path) + if ext == ".epub": + from acb_large_print.epub_auditor import audit_epub + + return audit_epub(path) + + from acb_large_print.auditor import audit_document + + return audit_document( + path, + list_indent_in=list_indent_in, + list_level_indents=list_level_indents, + para_indent_in=para_indent_in, + first_line_indent_in=first_line_indent_in, + style_size_overrides=style_size_overrides, + ) + + +def fix_by_extension( + file_path: str | Path, + output_path: str | Path | None = None, + *, + bound: bool = False, + list_indent_in: float = 0.0, + list_hanging_in: float = 0.0, + list_level_indents: dict[int, float] | None = None, + para_indent_in: float = 0.0, + first_line_indent_in: float = 0.0, + preserve_heading_alignment: bool = False, + detect_headings: bool = False, + ai_provider: object | None = None, + heading_threshold: int | None = None, + confirmed_headings: list | None = None, + heading_accuracy_level: str = "balanced", + style_size_overrides: dict[str, float] | None = None, +): + """Run fixer workflow for the extension. + + Returns (output_path, total_fixes, fix_records, post_audit, warnings). + """ + path = Path(file_path) + out = Path(output_path) if output_path is not None else None + ext = path.suffix.lower() + + if ext == ".xlsx": + post_audit = audit_by_extension(path) + return ( + path, + 0, + [], + post_audit, + [ + "Excel workbooks cannot be auto-fixed yet. " + "Review the audit findings and fix them manually in Excel." + ], + ) + if ext == ".pptx": + post_audit = audit_by_extension(path) + return ( + path, + 0, + [], + post_audit, + [ + "PowerPoint presentations cannot be auto-fixed yet. " + "Review the audit findings and fix them manually in PowerPoint." + ], + ) + if ext == ".md": + post_audit = audit_by_extension(path) + return ( + path, + 0, + [], + post_audit, + [ + "Markdown auto-fix is coming soon. " + "Review the audit findings and fix them in your text editor." + ], + ) + if ext == ".pdf": + post_audit = audit_by_extension(path) + return ( + path, + 0, + [], + post_audit, + [ + "PDF files cannot be auto-fixed. " + "Use Adobe Acrobat Pro or re-export from the source application." + ], + ) + if ext == ".epub": + post_audit = audit_by_extension(path) + return ( + path, + 0, + [], + post_audit, + [ + "ePub files cannot be auto-fixed yet. " + "Review the audit findings and fix them in your ePub editor." + ], + ) + + from acb_large_print.fixer import fix_document + + return fix_document( + path, + output_path=out, + bound=bound, + list_indent_in=list_indent_in, + list_hanging_in=list_hanging_in, + list_level_indents=list_level_indents, + para_indent_in=para_indent_in, + first_line_indent_in=first_line_indent_in, + preserve_heading_alignment=preserve_heading_alignment, + detect_headings=detect_headings, + ai_provider=ai_provider, + heading_threshold=heading_threshold, + confirmed_headings=confirmed_headings, + heading_accuracy_level=heading_accuracy_level, + style_size_overrides=style_size_overrides, + ) + + +from acb_large_print import converter as _converter # noqa: E402 + +CONVERTIBLE_EXTENSIONS = _converter.CONVERTIBLE_EXTENSIONS +MARKITDOWN_AUDIO_EXTENSIONS = _converter.MARKITDOWN_AUDIO_EXTENSIONS + + +def convert_to_markdown(src_path: str | Path, output_path: str | Path | None = None): + """Convert source document to Markdown via shared MarkItDown pipeline.""" + return _converter.convert_to_markdown(src_path, output_path=output_path) diff --git a/desktop/src/acb_large_print_core/versions.py b/desktop/src/acb_large_print_core/versions.py new file mode 100644 index 0000000..460f8bf --- /dev/null +++ b/desktop/src/acb_large_print_core/versions.py @@ -0,0 +1,42 @@ +"""Component version manifest for support and diagnostics.""" + +from __future__ import annotations + +from importlib.metadata import PackageNotFoundError, version + + +def _pkg_version(name: str) -> str: + try: + return version(name) + except PackageNotFoundError: + return "not-installed" + + +def _module_version(module_name: str) -> str: + try: + module = __import__(module_name) + except Exception: + return "not-installed" + return str(getattr(module, "__version__", "unknown")) + + +def get_component_versions() -> dict[str, str]: + """Return a normalized component version map used by desktop/web surfaces.""" + try: + from acb_large_print.version import get_version as _get_release_version + + release_version = _get_release_version() + except Exception: + release_version = _pkg_version("acb-large-print") + + return { + "release_version": release_version, + "desktop_package": _pkg_version("acb-large-print"), + "web_package": _pkg_version("acb-large-print-web"), + "markitdown": _pkg_version("markitdown"), + "pymupdf": _module_version("fitz"), + "python_docx": _pkg_version("python-docx"), + "mammoth": _pkg_version("mammoth"), + "requests": _pkg_version("requests"), + } + diff --git a/docs/deployment.md b/docs/deployment.md index 0c399e3..4fb2b61 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -7,6 +7,7 @@ - **Automated Deployment Script:** `bash ~/app/scripts/deploy-app.sh` - **Manual Maintenance Toggle:** `bash ~/app/scripts/maintenance-mode.sh {on|off|status}` - **Complete Strategy & Troubleshooting:** The full deployment strategy is maintained in the repository documentation for operators working from the source tree. +- **Shared-core retrofit rollout checklist:** `docs/shared-core-retrofit.md` (server deployment readiness and smoke tests) --- diff --git a/docs/shared-core-retrofit.md b/docs/shared-core-retrofit.md new file mode 100644 index 0000000..7ffa1fb --- /dev/null +++ b/docs/shared-core-retrofit.md @@ -0,0 +1,94 @@ +# Shared Core Retrofit Plan and Status + +## Goal + +Retrofit GLOW so desktop CLI, desktop GUI, and web routes use a shared internal service layer for document audit/fix/convert dispatch, preparing extraction into a standalone cross-product core. + +## Implemented in this retrofit + +## 1) Shared package introduced + +- Added `desktop/src/acb_large_print_core/` with: + - `services.py`: canonical extension-based dispatch for audit/fix/convert + - `versions.py`: normalized component-version manifest for diagnostics + - `__init__.py`: exported core APIs + +## 2) Desktop CLI wired to shared services + +- `acb_large_print.cli` now routes audit/fix/convert dispatch through: + - `acb_large_print_core.services.audit_by_extension` + - `acb_large_print_core.services.fix_by_extension` + - `acb_large_print_core.services.convert_to_markdown` + +## 3) Web routes/tasks wired to shared services + +- `routes/audit.py` now uses shared `audit_by_extension`. +- `routes/fix.py` now uses shared `audit_by_extension` + `fix_by_extension`. +- `routes/convert.py`, `upload.py`, `routes/speech.py`, and `magic_features.py` now use shared conversion entry points. +- `tasks/convert_tasks.py` now uses shared audit/markdown conversion dispatch. +- `chat_handler.py` now routes live audit via shared `audit_by_extension`. + +## 4) Version-provenance plumbing + +- Added `acb_large_print_core.versions.get_component_versions()`. +- Web template context now includes `component_versions` for About/support surfaces. +- Desktop package `acb_large_print.__version__` now resolves from repository `VERSION` file. + +## 5) MCP server wiring + +- `mcp_server/glow_mcp_utils.py` now dispatches docx/markdown audit, docx fix, and markdown conversion via `acb_large_print_core.services`. + +## Contract surface (internal v1) + +Use these as canonical internal service contracts: + +- `audit_by_extension(path, **policy_overrides) -> AuditResult` +- `fix_by_extension(path, output_path=None, **fix_options) -> tuple` +- `convert_to_markdown(path, output_path=None) -> tuple[Path, str]` +- `get_component_versions() -> dict[str, str]` + +## Next extraction step + +When ready to externalize for QUILL integration: + +1. Move `acb_large_print_core` to its own package/repo. +2. Preserve function signatures and return types. +3. Keep thin adapter modules in GLOW (`acb_large_print`) to prevent breaking consumers. +4. Add semver and compatibility matrix (`glow_min`, `quill_min`) to version manifest. + +## Server deployment readiness checklist (for PR review and rollout) + +Use this checklist before promoting `feature/shared-core-retrofit` to production: + +1. **Branch + PR readiness** + - Confirm PR #81 is approved and mergeable. + - Confirm deployment target references the merged commit SHA (not a stale image/revision). + +2. **Build/runtime alignment** + - Rebuild application image/environment from the merged commit. + - Verify runtime imports resolve to repository source used in deployment (avoid stale site-packages path drift). + +3. **Automated verification gates** + - Desktop targeted checks: + - `python -m pytest tests/test_conversion_format_support.py tests/test_pdf_table_extraction.py -q` + - Web targeted checks: + - `python -m pytest tests/test_upload.py tests/test_fix_routes.py -q` + - Version surface checks (from repo root with source paths): + - `PYTHONPATH=desktop/src;web/src python -m pytest web/tests/test_app.py -k release_from_version_file -q` + +4. **Runtime smoke tests (staging first)** + - Audit flow works for `.docx`, `.md`, `.pdf`. + - Fix flow works for `.docx`; advisory/manual paths remain correct for unsupported auto-fix formats. + - Convert flow produces markdown via shared service dispatch. + - Chat audit summary still functions. + - MCP utility endpoints still run docx/markdown audit/fix/convert paths. + +5. **Observability + metadata** + - About page renders component versions (`release`, `markitdown`, `pymupdf`, core package). + - Logs show no import-dispatch errors for `acb_large_print_core`. + +6. **Rollout strategy** + - Deploy to staging. + - Run smoke checks above. + - Promote to production. + - Monitor first 24h for audit/fix/convert error-rate regressions. diff --git a/mcp_server/glow_mcp_utils.py b/mcp_server/glow_mcp_utils.py index cbb63e4..5748bbb 100644 --- a/mcp_server/glow_mcp_utils.py +++ b/mcp_server/glow_mcp_utils.py @@ -6,20 +6,30 @@ """ from pathlib import Path import tempfile -import shutil import sys - -from acb_large_print.auditor import audit_document -from acb_large_print.md_auditor import audit_markdown -from acb_large_print.fixer import fix_document -from acb_large_print.converter import convert_to_markdown from acb_large_print.pandoc_converter import convert_to_html, convert_to_docx from acb_large_print.reporter import generate_json_report, generate_text_report, generate_html_report SUPPORTED_FORMATS = {"markdown", "md", "docx", "html"} +def _resolve_core_services(): + """Return (audit_by_extension, convert_to_markdown, fix_by_extension). + + Import lazily so module import does not fail in environments where shared + core packages are not installed but endpoints that need them are not used. + """ + try: + from quill_glow_core import audit_by_extension, convert_to_markdown, fix_by_extension + + return audit_by_extension, convert_to_markdown, fix_by_extension + except Exception as exc: + raise RuntimeError( + "Shared core services unavailable. Install quill-glow-core for MCP operations." + ) from exc + + def run_page_flow_extract(source_url: str, *, max_pages: int = 5, follow_pagination: bool = True): """Extract readable article text from a web URL using PageFlow logic. @@ -66,24 +76,25 @@ def run_page_flow_extract(source_url: str, *, max_pages: int = 5, follow_paginat def run_audit(file_path: Path, fmt: str): """Dispatch to the correct audit function based on format.""" + audit_by_extension, _convert_to_markdown, _fix_by_extension = _resolve_core_services() fmt = fmt.lower() - if fmt in ("markdown", "md"): - return audit_markdown(file_path) - if fmt == "docx": - return audit_document(file_path) + if fmt in ("markdown", "md", "docx"): + return audit_by_extension(file_path) raise ValueError(f"Unsupported format for audit: {fmt}") def run_fix(file_path: Path, fmt: str, output_path: Path = None): """Dispatch to the correct fix function based on format.""" + _audit_by_extension, _convert_to_markdown, fix_by_extension = _resolve_core_services() fmt = fmt.lower() if fmt == "docx": - return fix_document(file_path, output_path) + return fix_by_extension(file_path, output_path=output_path) raise ValueError(f"Unsupported format for fix: {fmt}") def run_convert(file_path: Path, from_fmt: str, to_fmt: str, output_path: Path = None): """Dispatch to the correct convert function based on formats.""" + _audit_by_extension, convert_to_markdown, _fix_by_extension = _resolve_core_services() from_fmt = from_fmt.lower() to_fmt = to_fmt.lower() if from_fmt == "docx" and to_fmt in ("markdown", "md"): diff --git a/mcp_server/main.py b/mcp_server/main.py index 545fe16..1b91c10 100644 --- a/mcp_server/main.py +++ b/mcp_server/main.py @@ -18,7 +18,6 @@ from fastapi import FastAPI, UploadFile, File, Form from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware -from typing import Optional import tempfile from pathlib import Path try: @@ -26,6 +25,27 @@ except ImportError: # Support direct module execution in tests. from glow_mcp_utils import run_audit, run_report, run_page_flow_extract +try: + from quill_glow_core import ( + configure_default_services as _configure_shared_core_default, + get_startup_telemetry_dict as _get_shared_core_startup_telemetry, + ) +except Exception: + _configure_shared_core_default = None + + def _get_shared_core_startup_telemetry() -> dict: + return { + "backend": "unknown", + "configured_by": "unknown", + "auto_selected": None, + } + +if _configure_shared_core_default is not None: + try: + _configure_shared_core_default() + except Exception: + pass + app = FastAPI(title="GLOW MCP Server", description="Accessibility audit/fix/convert/report API for agent integration.", version="7.2.0") app.add_middleware( @@ -39,7 +59,10 @@ @app.get("/health") def health(): """Health check endpoint.""" - return {"status": "ok"} + return { + "status": "ok", + "shared_core": _get_shared_core_startup_telemetry(), + } @app.post("/audit") diff --git a/mcp_server/requirements.txt b/mcp_server/requirements.txt index dccfa79..987e07c 100644 --- a/mcp_server/requirements.txt +++ b/mcp_server/requirements.txt @@ -1,4 +1,5 @@ # requirements.txt for GLOW MCP Server +quill-glow-core>=0.1.0 fastapi==0.110.2 uvicorn==0.29.0 python-multipart==0.0.28 diff --git a/web/pyproject.toml b/web/pyproject.toml index 825968b..396dc8f 100644 --- a/web/pyproject.toml +++ b/web/pyproject.toml @@ -22,6 +22,7 @@ classifiers = [ "Topic :: Adaptive Technologies", ] dependencies = [ + "quill-glow-core>=0.1.0", "flask>=3.1", "flask-wtf>=1.2", "flask-limiter>=4.0", diff --git a/web/requirements.txt b/web/requirements.txt index 01ba60e..636f718 100644 --- a/web/requirements.txt +++ b/web/requirements.txt @@ -1,3 +1,4 @@ +quill-glow-core>=0.1.0 flask>=3.1 flask-wtf>=1.2 flask-limiter>=3.5 diff --git a/web/src/acb_large_print_web/app.py b/web/src/acb_large_print_web/app.py index 1ebeadf..5fbb66a 100644 --- a/web/src/acb_large_print_web/app.py +++ b/web/src/acb_large_print_web/app.py @@ -17,6 +17,15 @@ from .rules import get_help_urls_map, get_rules_by_category, get_rules_by_severity +try: + from quill_glow_core import ( + configure_default_services as _configure_shared_core_default, + get_startup_telemetry_dict as _get_shared_core_startup_telemetry, + ) +except Exception: + _configure_shared_core_default = None + _get_shared_core_startup_telemetry = None + csrf = CSRFProtect() limiter = Limiter( key_func=get_remote_address, @@ -60,6 +69,13 @@ def create_app(config: dict | None = None) -> Flask: if config: app.config.update(config) + # Initialize shared-core wiring once at process startup when available. + if _configure_shared_core_default is not None: + try: + _configure_shared_core_default() + except Exception: + app.logger.exception("Failed to initialize quill-glow-core shared services") + # Extensions csrf.init_app(app) limiter.init_app(app) @@ -151,6 +167,7 @@ def _log_request(response): @app.context_processor def inject_rules(): from flask import g as _g + from quill_glow_core import get_component_versions as _get_component_versions from .ai_features import get_all_flags as _get_ai_flags from .branding import get_branding_context as _get_branding_context from .version import get_version as _get_release_version @@ -173,6 +190,7 @@ def inject_rules(): "web_version": web_ver, "desktop_version": desktop_ver, "release_version": release_ver, + "component_versions": _get_component_versions(), "csp_nonce": getattr(_g, "csp_nonce", ""), } # Inject AI flags (from ai_features) @@ -801,6 +819,15 @@ def _build_health_payload() -> tuple[dict, bool]: "timestamp_utc": datetime.now(UTC).isoformat(), "duration_ms": _hduration_ms, } + if _get_shared_core_startup_telemetry is not None: + try: + payload["shared_core"] = _get_shared_core_startup_telemetry() + except Exception: + payload["shared_core"] = { + "backend": "unknown", + "configured_by": "unknown", + "auto_selected": None, + } return payload, all_ok # Health check diff --git a/web/src/acb_large_print_web/chat_handler.py b/web/src/acb_large_print_web/chat_handler.py index e1fe321..72173a6 100644 --- a/web/src/acb_large_print_web/chat_handler.py +++ b/web/src/acb_large_print_web/chat_handler.py @@ -488,9 +488,9 @@ def run_accessibility_audit(self) -> str: if self.context.doc_path is None: return self._heuristic_compliance_summary() try: - from acb_large_print.auditor import audit_document + from quill_glow_core import audit_by_extension - result = audit_document(str(self.context.doc_path)) + result = audit_by_extension(str(self.context.doc_path)) self.context._audit_cache = { "findings": [ { diff --git a/web/src/acb_large_print_web/magic_features.py b/web/src/acb_large_print_web/magic_features.py index a3521f0..3a41dac 100644 --- a/web/src/acb_large_print_web/magic_features.py +++ b/web/src/acb_large_print_web/magic_features.py @@ -320,7 +320,7 @@ def extract_text_for_compare(path: Path) -> str: return path.read_text(encoding="utf-8", errors="replace") # Use existing MarkItDown extractor for binary docs. - from acb_large_print.converter import convert_to_markdown + from quill_glow_core import convert_to_markdown out_path = path.with_suffix(path.suffix + ".cmp.md") md_path, _ = convert_to_markdown(path, output_path=out_path) diff --git a/web/src/acb_large_print_web/routes/audit.py b/web/src/acb_large_print_web/routes/audit.py index 7f67af8..d889347 100644 --- a/web/src/acb_large_print_web/routes/audit.py +++ b/web/src/acb_large_print_web/routes/audit.py @@ -264,31 +264,9 @@ def _compute_audit_diff( def _audit_by_extension(saved_path: Path): """Dispatch to the correct auditor based on file extension.""" - ext = saved_path.suffix.lower() - if ext == ".xlsx": - from acb_large_print.xlsx_auditor import audit_workbook + from quill_glow_core import audit_by_extension - return audit_workbook(saved_path) - elif ext == ".pptx": - from acb_large_print.pptx_auditor import audit_presentation - - return audit_presentation(saved_path) - elif ext == ".md": - from acb_large_print.md_auditor import audit_markdown - - return audit_markdown(saved_path) - elif ext == ".pdf": - from acb_large_print.pdf_auditor import audit_pdf - - return audit_pdf(saved_path) - elif ext == ".epub": - from acb_large_print.epub_auditor import audit_epub - - return audit_epub(saved_path) - else: - from acb_large_print.auditor import audit_document - - return audit_document(saved_path) + return audit_by_extension(saved_path) def _format_from_path(saved_path: Path) -> str: diff --git a/web/src/acb_large_print_web/routes/convert.py b/web/src/acb_large_print_web/routes/convert.py index 9044a85..da16779 100644 --- a/web/src/acb_large_print_web/routes/convert.py +++ b/web/src/acb_large_print_web/routes/convert.py @@ -28,11 +28,9 @@ from werkzeug.utils import secure_filename as _secure_filename -from acb_large_print.converter import ( - CONVERTIBLE_EXTENSIONS, - MARKITDOWN_AUDIO_EXTENSIONS, - convert_to_markdown, -) +from acb_large_print.converter import CONVERTIBLE_EXTENSIONS, MARKITDOWN_AUDIO_EXTENSIONS + +from quill_glow_core import convert_to_markdown from acb_large_print.wcag_language import ( analyze_text_for_wcag_language, ) diff --git a/web/src/acb_large_print_web/routes/fix.py b/web/src/acb_large_print_web/routes/fix.py index 2638ff2..7986801 100644 --- a/web/src/acb_large_print_web/routes/fix.py +++ b/web/src/acb_large_print_web/routes/fix.py @@ -126,107 +126,36 @@ def _fix_by_extension( Returns (output_path, total_fixes, fix_records, post_audit, warnings). """ - ext = saved_path.suffix.lower() - if ext == ".xlsx": - from acb_large_print.xlsx_auditor import audit_workbook - - post_audit = audit_workbook(saved_path) - return ( - saved_path, - 0, - [], - post_audit, - [ - "Excel workbooks cannot be auto-fixed yet. " - "Review the audit findings and fix them manually in Excel." - ], - ) - elif ext == ".pptx": - from acb_large_print.pptx_auditor import audit_presentation + from quill_glow_core import fix_by_extension - post_audit = audit_presentation(saved_path) - return ( - saved_path, - 0, - [], - post_audit, - [ - "PowerPoint presentations cannot be auto-fixed yet. " - "Review the audit findings and fix them manually in PowerPoint." - ], - ) - elif ext == ".md": - from acb_large_print.md_auditor import audit_markdown - - post_audit = audit_markdown(saved_path) - return ( - saved_path, - 0, - [], - post_audit, - [ - "Markdown auto-fix is coming soon. " - "Review the audit findings and fix them in your text editor." - ], - ) - elif ext == ".pdf": - from acb_large_print.pdf_auditor import audit_pdf - - post_audit = audit_pdf(saved_path) - return ( - saved_path, - 0, - [], - post_audit, - [ - "PDF files cannot be auto-fixed. " - "Use Adobe Acrobat Pro or re-export from the source application." - ], - ) - elif ext == ".epub": - from acb_large_print.epub_auditor import audit_epub - - post_audit = audit_epub(saved_path) - return ( - saved_path, - 0, - [], - post_audit, - [ - "ePub files cannot be auto-fixed yet. " - "Review the audit findings and fix them in your ePub editor." - ], - ) - else: - from acb_large_print.fixer import fix_document + ai_provider = None + if detect_headings and use_ai: + try: + from acb_large_print.ai_provider import get_provider - ai_provider = None - if detect_headings and use_ai: - try: - from acb_large_print.ai_provider import get_provider + ai_provider = get_provider() + except Exception: + pass # Fall back to heuristic-only - ai_provider = get_provider() - except Exception: - pass # Fall back to heuristic-only - result = fix_document( - saved_path, - output_path, - bound=bound, - list_indent_in=list_indent_in, - list_hanging_in=list_hanging_in, - list_level_indents=list_level_indents, - para_indent_in=para_indent_in, - first_line_indent_in=first_line_indent_in, - preserve_heading_alignment=preserve_heading_alignment, - detect_headings=detect_headings, - ai_provider=ai_provider, - heading_threshold=heading_threshold, - confirmed_headings=confirmed_headings, - heading_accuracy_level=heading_accuracy, - style_size_overrides=style_size_overrides, - ) - # Tag whether AI was actually used (ai_provider set and invoked) - return result[:5] + ({"ai_used": ai_provider is not None},) if len(result) == 5 else result + result = fix_by_extension( + saved_path, + output_path, + bound=bound, + list_indent_in=list_indent_in, + list_hanging_in=list_hanging_in, + list_level_indents=list_level_indents, + para_indent_in=para_indent_in, + first_line_indent_in=first_line_indent_in, + preserve_heading_alignment=preserve_heading_alignment, + detect_headings=detect_headings, + ai_provider=ai_provider, + heading_threshold=heading_threshold, + confirmed_headings=confirmed_headings, + heading_accuracy_level=heading_accuracy, + style_size_overrides=style_size_overrides, + ) + # Tag whether AI was actually used (ai_provider set and invoked) + return result[:5] + ({"ai_used": ai_provider is not None},) if len(result) == 5 else result def _audit_by_extension( @@ -239,38 +168,16 @@ def _audit_by_extension( style_size_overrides: dict[str, float] | None = None, ): """Dispatch to the correct auditor based on file extension.""" - ext = saved_path.suffix.lower() - if ext == ".xlsx": - from acb_large_print.xlsx_auditor import audit_workbook - - return audit_workbook(saved_path) - elif ext == ".pptx": - from acb_large_print.pptx_auditor import audit_presentation - - return audit_presentation(saved_path) - elif ext == ".md": - from acb_large_print.md_auditor import audit_markdown + from quill_glow_core import audit_by_extension - return audit_markdown(saved_path) - elif ext == ".pdf": - from acb_large_print.pdf_auditor import audit_pdf - - return audit_pdf(saved_path) - elif ext == ".epub": - from acb_large_print.epub_auditor import audit_epub - - return audit_epub(saved_path) - else: - from acb_large_print.auditor import audit_document - - return audit_document( - saved_path, - list_indent_in=list_indent_in, - list_level_indents=list_level_indents, - para_indent_in=para_indent_in, - first_line_indent_in=first_line_indent_in, - style_size_overrides=style_size_overrides, - ) + return audit_by_extension( + saved_path, + list_indent_in=list_indent_in, + list_level_indents=list_level_indents, + para_indent_in=para_indent_in, + first_line_indent_in=first_line_indent_in, + style_size_overrides=style_size_overrides, + ) def _format_from_path(saved_path: Path) -> str: diff --git a/web/src/acb_large_print_web/routes/speech.py b/web/src/acb_large_print_web/routes/speech.py index 5194994..9cefe87 100644 --- a/web/src/acb_large_print_web/routes/speech.py +++ b/web/src/acb_large_print_web/routes/speech.py @@ -30,7 +30,7 @@ url_for, ) -from acb_large_print.converter import convert_to_markdown +from quill_glow_core import convert_to_markdown from acb_large_print.pandoc_converter import pandoc_available from ..app import limiter diff --git a/web/src/acb_large_print_web/tasks/convert_tasks.py b/web/src/acb_large_print_web/tasks/convert_tasks.py index cd242b4..b3f42d7 100644 --- a/web/src/acb_large_print_web/tasks/convert_tasks.py +++ b/web/src/acb_large_print_web/tasks/convert_tasks.py @@ -376,13 +376,15 @@ def _dispatch_conversion( options: dict[str, Any], ) -> str: """Run the conversion and return the absolute path of the result file.""" - from ..upload import get_temp_dir - from acb_large_print.converter import CONVERTIBLE_EXTENSIONS, convert_to_markdown + from acb_large_print.converter import CONVERTIBLE_EXTENSIONS from acb_large_print.pandoc_converter import ( - PANDOC_INPUT_EXTENSIONS, LIBREOFFICE_CONVERSIONS, + PANDOC_INPUT_EXTENSIONS, preconvert_via_libreoffice, ) + from quill_glow_core import convert_to_markdown + + from ..upload import get_temp_dir _progress(job_id, 10, "Locating source file…") temp_dir = get_temp_dir(upload_token) @@ -460,8 +462,7 @@ def _dispatch_conversion( def _run_pipeline(job_id, source, out_dir, options): - from acb_large_print.pipeline_converter import convert_with_pipeline - from acb_large_print.pipeline_converter import get_available_conversions + from acb_large_print.pipeline_converter import convert_with_pipeline, get_available_conversions _progress(job_id, 30, "Sending to DAISY Pipeline…") conversion_key = options.get("pipeline_conversion", "") available = get_available_conversions() @@ -480,7 +481,7 @@ def _run_pipeline(job_id, source, out_dir, options): def _run_to_markdown(job_id, source, out_dir, options): - from acb_large_print.converter import convert_to_markdown + from quill_glow_core import convert_to_markdown _progress(job_id, 40, "Extracting content to Markdown…") dest = out_dir / (source.stem + ".md") output_path, _ = convert_to_markdown(source, output_path=dest) @@ -489,6 +490,7 @@ def _run_to_markdown(job_id, source, out_dir, options): def _run_to_html(job_id, source, out_dir, options): import re + from acb_large_print.pandoc_converter import convert_to_html _progress(job_id, 40, "Converting to HTML…") css_path = None if options.get("acb_format", True) else Path("__no_acb_css__") @@ -692,15 +694,15 @@ def _run_speech( output_format: str, ) -> str: """Run full-document speech synthesis and return the output file path.""" - from ..upload import get_temp_dir from acb_large_print_web.speech import ( SpeechError, normalize_document_text, synthesize_document_text, wav_bytes_to_mp3, - wav_duration_seconds, ) + from ..upload import get_temp_dir + _progress(job_id, 10, "Locating extracted document text…") # load pre-extracted text written by /speech/prepare @@ -727,8 +729,8 @@ def _run_speech( # Optionally apply pronunciation dictionary if enabled try: - from acb_large_print_web.magic_features import apply_pronunciation_dictionary from acb_large_print_web import feature_flags as _ff + from acb_large_print_web.magic_features import apply_pronunciation_dictionary if _ff.get_all_flags().get("GLOW_ENABLE_SPEECH_PRONUNCIATION_DICTIONARY", True): text = apply_pronunciation_dictionary(text) except Exception: @@ -840,10 +842,12 @@ def run_export_job( token: str, options: dict[str, Any], ) -> dict[str, Any]: - from ..upload import get_temp_dir - from acb_large_print.exporter import export_cms_fragment, export_standalone_html import zipfile + from acb_large_print.exporter import export_cms_fragment, export_standalone_html + + from ..upload import get_temp_dir + status = read_status(job_id) max_attempts = max(1, int(status.get("max_attempts", 1))) start_attempt = max(1, int(status.get("attempt", 0)) + 1) @@ -896,28 +900,10 @@ def run_audit_job( input_filename: str, options: dict[str, Any], ) -> dict[str, Any]: - from ..upload import get_temp_dir - from acb_large_print.auditor import audit_document - from acb_large_print.md_auditor import audit_markdown - from acb_large_print.pptx_auditor import audit_presentation - from acb_large_print.pdf_auditor import audit_pdf - from acb_large_print.epub_auditor import audit_epub - from acb_large_print.xlsx_auditor import audit_workbook from acb_large_print.constants import AUDIT_RULES + from quill_glow_core import audit_by_extension - def _audit_by_ext(path: Path): - ext = path.suffix.lower() - if ext == ".xlsx": - return audit_workbook(path) - if ext == ".pptx": - return audit_presentation(path) - if ext == ".md": - return audit_markdown(path) - if ext == ".pdf": - return audit_pdf(path) - if ext == ".epub": - return audit_epub(path) - return audit_document(path) + from ..upload import get_temp_dir status = read_status(job_id) max_attempts = max(1, int(status.get("max_attempts", 1))) @@ -934,7 +920,7 @@ def _audit_by_ext(path: Path): if not src.exists(): raise FileNotFoundError(f"Source file not found: {src}") write_status(job_id, state="PROGRESS", progress=25, message="Running audit…", retryable=True) - result = _audit_by_ext(src) + result = audit_by_extension(src) findings = _serialize_findings(getattr(result, "findings", [])) summary = { "filename": input_filename, @@ -969,8 +955,9 @@ def run_fix_job( input_filename: str, options: dict[str, Any], ) -> dict[str, Any]: + from acb_large_print_web.routes.fix import _fix_by_extension + from ..upload import get_temp_dir - from acb_large_print_web.routes.fix import _fix_by_extension, _audit_by_extension status = read_status(job_id) max_attempts = max(1, int(status.get("max_attempts", 1))) @@ -1046,14 +1033,15 @@ def run_speech_prepare_job( input_filename: str, speed: float, ) -> dict[str, Any]: - from ..upload import get_temp_dir + from acb_large_print_web.routes.speech import _DOC_EXTRACT_NAME, _DOC_RENDERED_NAME, _extract_document_text from acb_large_print_web.speech import ( - normalize_document_text, estimate_audio_seconds_from_text, estimate_processing_seconds_from_text, first_sentences, + normalize_document_text, ) - from acb_large_print_web.routes.speech import _extract_document_text, _DOC_EXTRACT_NAME, _DOC_RENDERED_NAME + + from ..upload import get_temp_dir status = read_status(job_id) max_attempts = max(1, int(status.get("max_attempts", 1))) diff --git a/web/src/acb_large_print_web/templates/about.html b/web/src/acb_large_print_web/templates/about.html index 3f748a7..9b008a3 100644 --- a/web/src/acb_large_print_web/templates/about.html +++ b/web/src/acb_large_print_web/templates/about.html @@ -435,6 +435,15 @@