diff --git a/agtech-ops/.dockerignore b/agtech-ops/.dockerignore
new file mode 100644
index 0000000000..b55811d1b8
--- /dev/null
+++ b/agtech-ops/.dockerignore
@@ -0,0 +1,8 @@
+.venv/
+__pycache__/
+*.pyc
+*.db
+*.db-journal
+*.egg-info/
+.pytest_cache/
+tests/
diff --git a/agtech-ops/.gitignore b/agtech-ops/.gitignore
new file mode 100644
index 0000000000..7e2898127b
--- /dev/null
+++ b/agtech-ops/.gitignore
@@ -0,0 +1,6 @@
+.venv/
+__pycache__/
+*.pyc
+*.db
+*.egg-info/
+.pytest_cache/
diff --git a/agtech-ops/Dockerfile b/agtech-ops/Dockerfile
new file mode 100644
index 0000000000..8faf72d94d
--- /dev/null
+++ b/agtech-ops/Dockerfile
@@ -0,0 +1,18 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Install the package with file-processing + dashboard extras.
+COPY pyproject.toml README.md ./
+COPY agtech_ops ./agtech_ops
+RUN pip install --no-cache-dir -e ".[files,dashboard]"
+
+ENV AGTECH_FORCE_RULE_BASED=1 \
+    AGTECH_DATABASE_URL=sqlite:////data/agtech_ops.db
+
+# SQLite lives on a volume so data survives container restarts.
+VOLUME ["/data"]
+EXPOSE 8501
+
+# $PORT is honored when present (e.g. on Render), else default to 8501.
+CMD ["sh", "-c", "streamlit run agtech_ops/dashboard.py --server.port ${PORT:-8501} --server.address 0.0.0.0 --server.headless true --browser.gatherUsageStats false"]
diff --git a/agtech-ops/README.md b/agtech-ops/README.md
new file mode 100644
index 0000000000..fc4262e5d8
--- /dev/null
+++ b/agtech-ops/README.md
@@ -0,0 +1,140 @@
+# AgTech Ops Hub
+
+A contextualized operations hub for farming businesses. It ingests data from
+several messy real-world sources, resolves them onto shared farm entities, and
+turns the combined picture into **summaries and action items** for ops teams.
+
+```
+INGEST  →  NORMALIZE + STORE  →  CONTEXTUALIZE + SUMMARIZE  →  DELIVER to ops
+```
+
+This is **Sprint 1**: a working vertical slice that proves the full loop
+end-to-end with zero external services or API keys required.
+
+## What works today (Sprint 1)
+
+- **Ingest a range of files** — one intake that dispatches on file type:
+  - **Tabular** (carry their own farm/asset columns): `.csv`, `.tsv`,
+    `.xlsx`/`.xls`, `.json`. Flexible/aliased column names, per-row error
+    reporting (one bad row never aborts the file), extra columns preserved.
+  - **Free text** (converted into data): `.txt`, `.md`, `.log`, `.pdf`,
+    `.docx`. Each paragraph becomes an event; an optional leading date is used
+    as its timestamp; the asset is inferred from known asset names.
+  - **WhatsApp** chat exports are auto-detected (both `[date, time] Name:` and
+    `date, time - Name:` formats, multi-line messages supported).
+- **Video/clip metadata & tags** — clip metadata (from an upstream vision model,
+  e.g. YOLO on a Jetson) with `tags`, `duration`, `camera` columns is
+  auto-detected and ingested as `media` events. Tags become **workflow signals**
+  and feed action generation (a clip tagged `lame` raises a vet task).
+- **Compile & aggregate** — a cross-source roll-up: totals, per-source and
+  per-asset counts, **top tags**, clip counts, and numeric metric **time series**
+  assembled from every file (e.g. milk yield from a CSV + a JSON partner feed).
+- **AI action-item log agent** — a "Haiku agent" turns incoming bridge data
+  (messages, files, clip tags) into an append-only, prioritized action-item log,
+  each entry carrying a **rationale** and the agent that produced it. Uses Claude
+  Haiku via LiteLLM when a key is set, with a deterministic offline fallback.
+- **Contextualize** — every record is resolved onto a canonical model so data
+  from different sources lines up on the same thing:
+
+  ```
+  Farm --< Asset (herd | crop | field) --< Event >-- ActionItem
+  ```
+
+- **Summarize** — a batch of events becomes a `{summary, points, action_items}`
+  result. Two interchangeable backends:
+  - `rule_based` (default): deterministic, offline, keyword-triggered action
+    items with priority, owner (from the message author) and a suggested due
+    date. No keys needed.
+  - `llm` (optional): LiteLLM + instructor for structured output from any model,
+    used automatically when the `ai` extra is installed **and** an API key is set.
+- **Deliver**
+  - **FastAPI** JSON API (`/ingest/csv`, `/ingest/whatsapp`, `/summarize`,
+    `/action-items`, `/health`).
+  - **Streamlit** dashboard for ingesting data and reviewing open action items.
+
+## Quick start
+
+```bash
+cd agtech-ops
+python3 -m venv .venv && . .venv/bin/activate   # or use your environment
+pip install -e ".[dev,files]"        # core + tests + Excel/PDF/Word support
+# optional: pip install -e ".[ai,dashboard]"
+
+# Run the API
+uvicorn agtech_ops.api:app --reload
+
+# Ingest a range of files at once (incl. video/clip tag metadata)
+curl -F "files=@sample_data/herd.csv" \
+     -F "files=@sample_data/partner_feed.json" \
+     -F "files=@sample_data/field_notes.txt" \
+     -F "files=@sample_data/clips.json" \
+     -F "farm=Green Acres" \
+     http://localhost:8000/ingest/files
+
+# Compile / aggregate everything ingested (incl. top tags + clip counts)
+curl http://localhost:8000/report
+
+# Run the action-item-log agent, then read the log
+curl -X POST "http://localhost:8000/agent/run"
+curl http://localhost:8000/agent/log
+
+# Or the dashboard (needs the 'dashboard' extra)
+streamlit run agtech_ops/dashboard.py
+```
+
+## Tests
+
+```bash
+pytest        # 15 tests, fully offline (forces the rule-based summarizer)
+```
+
+## Configuration
+
+All optional; sensible defaults mean it runs with nothing set.
+
+| Env var | Default | Purpose |
+|---|---|---|
+| `AGTECH_DATABASE_URL` | `sqlite:///agtech_ops.db` | Any SQLAlchemy URL (e.g. Postgres). |
+| `AGTECH_LLM_MODEL` | `anthropic/claude-3-5-haiku-latest` | LiteLLM model id for the agent. |
+| `AGTECH_FORCE_RULE_BASED` | `false` | Force the offline agent. |
+| `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` / … | – | Enables the Haiku/LLM agent. |
+
+## Architecture (mapped to the awesome-python catalog)
+
+| Layer | Library (from this repo's list) |
+|---|---|
+| API / webhooks | FastAPI, uvicorn |
+| Tabular parsing | pandas |
+| Validation | pydantic |
+| Storage / entity resolution | SQLAlchemy |
+| Summaries (AI) | LiteLLM, instructor |
+| Dashboard | Streamlit, Plotly |
+| Scheduling (future) | APScheduler / Prefect / Dagster |
+
+## Roadmap
+
+- **Sprint 1 (done):** CSV + WhatsApp ingest → contextualized action items via
+  API + dashboard, with offline + LLM summarizers.
+- **Sprint 2:** Live **Dropbox** sync and **WhatsApp Business API** webhook;
+  scheduled pulls (APScheduler); push action items back to chat/email.
+- **Sprint 3:** Smarter entity resolution (fuzzy asset matching, aliases),
+  per-farm dashboards, trend charts, action-item status workflow.
+
+## Open questions — where I need more detail
+
+These are the decisions that will most shape Sprints 2–3:
+
+1. **Partner CSV schemas.** What columns do your real partners send? Are they
+   stable, or do we need per-partner mappings? Right now I infer common aliases.
+2. **Dropbox layout.** Folder structure and file naming for herd/crop/operations
+   data, and whether to use a service account or per-user OAuth.
+3. **WhatsApp source.** Live (WhatsApp Business API / Twilio / Meta Cloud API)
+   or periodic chat-export uploads? Live changes the auth + webhook design.
+4. **Entity naming.** How are farms/herds/fields named across sources so we can
+   match them reliably? Is there a master list/IDs we should sync from?
+5. **Action item destination.** Where do ops want items delivered — dashboard
+   only, back into WhatsApp, email, or an existing task tool?
+6. **AI provider + data policy.** Which model/provider is acceptable, and any
+   constraints on sending farm/staff messages to a third-party LLM.
+7. **Deployment + scale.** Single farm vs. multi-tenant ("agrefine network"?),
+   expected data volume, and where this should run.
diff --git a/agtech-ops/agtech_ops/__init__.py b/agtech-ops/agtech_ops/__init__.py
new file mode 100644
index 0000000000..b20bc19f3d
--- /dev/null
+++ b/agtech-ops/agtech_ops/__init__.py
@@ -0,0 +1,3 @@
+"""AgTech Ops Hub: contextualize multi-source farm data into action items."""
+
+__version__ = "0.1.0"
diff --git a/agtech-ops/agtech_ops/agent.py b/agtech-ops/agtech_ops/agent.py
new file mode 100644
index 0000000000..b091360912
--- /dev/null
+++ b/agtech-ops/agtech_ops/agent.py
@@ -0,0 +1,46 @@
+"""Action-item-log agent.
+
+This is the "Haiku agent" entry point: as data flows in from the bridge
+(WhatsApp, Dropbox, partner files, video/clip tags), it distills an append-only
+**action-item log** with provenance — what to do, who owns it, why it was
+raised, and which agent produced it.
+
+It delegates to the configured summarizer backend:
+  - ``llm``  — Claude Haiku (or any LiteLLM model) when the AI extra + an API
+    key are present. Cheap, fast, good at turning chatter + tags into tasks.
+  - ``rule_based`` — deterministic offline fallback, so the log always builds.
+
+Each run appends to the ``action_items`` table (the log); existing entries are
+preserved so the log is a running history, not a snapshot.
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+
+from .schemas import SummaryResult
+from .service import list_action_items, summarize_and_store
+from .summarize import get_summarizer
+
+
+def agent_name() -> str:
+    """Human-readable name of the active agent backend."""
+    return get_summarizer().name
+
+
+def build_action_log(
+    farm: str | None = None,
+    since_days: int | None = None,
+) -> SummaryResult:
+    """Process incoming data and append to the action-item log."""
+    since = None
+    if since_days is not None:
+        since = dt.datetime.now() - dt.timedelta(days=since_days)
+    return summarize_and_store(farm=farm, since=since, persist=True)
+
+
+def action_log(limit: int | None = None) -> list[dict]:
+    """Return the current open action-item log (newest first)."""
+    items = list_action_items()
+    items.sort(key=lambda a: (a.get("logged_at") or ""), reverse=True)
+    return items[:limit] if limit else items
diff --git a/agtech-ops/agtech_ops/api.py b/agtech-ops/agtech_ops/api.py
new file mode 100644
index 0000000000..a602f25adb
--- /dev/null
+++ b/agtech-ops/agtech_ops/api.py
@@ -0,0 +1,137 @@
+"""FastAPI surface for the AgTech Ops Hub."""
+
+from __future__ import annotations
+
+import datetime as dt
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI, File, Form, HTTPException, Query, UploadFile
+
+from .agent import action_log, agent_name, build_action_log
+from .db import init_db
+from .ingest import SUPPORTED_EXTENSIONS, parse_partner_csv, parse_whatsapp_export
+from .models import ActionStatus
+from .schemas import AggregateReport, FileIngestResult, IngestResult, SummaryResult
+from .service import (
+    aggregate,
+    ingest_files,
+    known_asset_names,
+    list_action_items,
+    store_events,
+    summarize_and_store,
+)
+from .summarize import get_summarizer
+
+@asynccontextmanager
+async def lifespan(app: FastAPI) -> AsyncIterator[None]:
+    init_db()
+    yield
+
+
+app = FastAPI(
+    title="AgTech Ops Hub",
+    version="0.1.0",
+    description=(
+        "Ingest partner CSVs, Dropbox exports and WhatsApp chatter, contextualize "
+        "them onto shared farm assets, and turn them into action items for ops teams."
+    ),
+    lifespan=lifespan,
+)
+
+
+@app.get("/health")
+def health() -> dict:
+    return {
+        "status": "ok",
+        "summarizer": get_summarizer().name,
+        "supported_files": sorted(SUPPORTED_EXTENSIONS),
+    }
+
+
+@app.post("/ingest/files", response_model=FileIngestResult)
+async def ingest_files_endpoint(
+    files: list[UploadFile] = File(...),
+    farm: str | None = Form(None),
+    default_asset: str = Form("General"),
+) -> FileIngestResult:
+    """Ingest a range of files (CSV/TSV/Excel/JSON/TXT/MD/LOG/PDF/DOCX).
+
+    Tabular files carry their own farm/asset columns; free-text files use the
+    optional ``farm`` for context resolution.
+    """
+    payload = [(f.filename or "upload", await f.read()) for f in files]
+    result = ingest_files(payload, farm=farm, default_asset=default_asset)
+    if result.events_ingested == 0 and result.errors:
+        raise HTTPException(status_code=422, detail=result.errors)
+    return result
+
+
+@app.get("/report", response_model=AggregateReport)
+def report(farm: str | None = Query(None)) -> AggregateReport:
+    """Compiled, aggregated view across all ingested sources."""
+    return aggregate(farm=farm)
+
+
+@app.post("/ingest/csv", response_model=IngestResult)
+async def ingest_csv(file: UploadFile = File(...)) -> IngestResult:
+    raw = await file.read()
+    events, errors = parse_partner_csv(raw)
+    if not events and errors:
+        raise HTTPException(status_code=422, detail=errors)
+    return store_events(events, errors)
+
+
+@app.post("/ingest/whatsapp", response_model=IngestResult)
+async def ingest_whatsapp(
+    text: str = Form(...),
+    farm: str = Form(...),
+    default_asset: str = Form("General"),
+) -> IngestResult:
+    events, errors = parse_whatsapp_export(
+        text,
+        farm=farm,
+        known_assets=known_asset_names(farm),
+        default_asset=default_asset,
+    )
+    if not events and errors:
+        raise HTTPException(status_code=422, detail=errors)
+    return store_events(events, errors)
+
+
+@app.post("/summarize", response_model=SummaryResult)
+def summarize(
+    farm: str | None = Query(None),
+    since_days: int | None = Query(None, ge=0),
+    persist: bool = Query(True),
+) -> SummaryResult:
+    since = None
+    if since_days is not None:
+        since = dt.datetime.now() - dt.timedelta(days=since_days)
+    return summarize_and_store(farm=farm, since=since, persist=persist)
+
+
+@app.get("/action-items")
+def action_items(status: ActionStatus | None = Query(ActionStatus.open)) -> list[dict]:
+    return list_action_items(status=status)
+
+
+@app.get("/agent")
+def agent_info() -> dict:
+    """Which action-item-log agent is active (haiku-agent or rule_based)."""
+    return {"agent": agent_name()}
+
+
+@app.post("/agent/run", response_model=SummaryResult)
+def agent_run(
+    farm: str | None = Query(None),
+    since_days: int | None = Query(None, ge=0),
+) -> SummaryResult:
+    """Run the agent over incoming data and append to the action-item log."""
+    return build_action_log(farm=farm, since_days=since_days)
+
+
+@app.get("/agent/log")
+def agent_log(limit: int | None = Query(None, ge=1)) -> list[dict]:
+    """Read the current action-item log (newest first)."""
+    return action_log(limit=limit)
diff --git a/agtech-ops/agtech_ops/config.py b/agtech-ops/agtech_ops/config.py
new file mode 100644
index 0000000000..c2494c03ce
--- /dev/null
+++ b/agtech-ops/agtech_ops/config.py
@@ -0,0 +1,38 @@
+"""Runtime configuration, read from environment variables.
+
+Everything has a sensible default so the app runs end-to-end with zero setup.
+Secrets (LLM keys, Dropbox/WhatsApp tokens) are optional and only needed for
+the live integrations.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+
+
+def _env_bool(name: str, default: bool) -> bool:
+    raw = os.getenv(name)
+    if raw is None:
+        return default
+    return raw.strip().lower() in {"1", "true", "yes", "on"}
+
+
+@dataclass(frozen=True)
+class Settings:
+    # SQLAlchemy URL. Defaults to a local SQLite file so there is nothing to
+    # provision for a first run.
+    database_url: str = os.getenv("AGTECH_DATABASE_URL", "sqlite:///agtech_ops.db")
+
+    # LLM model string understood by LiteLLM. Defaults to a small, cheap model
+    # (Claude Haiku) for the action-item-log agent. Only used when AI extras +
+    # a key exist; otherwise the deterministic rule-based agent runs.
+    llm_model: str = os.getenv("AGTECH_LLM_MODEL", "anthropic/claude-3-5-haiku-latest")
+
+    # Force the deterministic summarizer even if AI deps/keys are available.
+    # Handy for tests and offline demos.
+    force_rule_based: bool = _env_bool("AGTECH_FORCE_RULE_BASED", False)
+
+
+def get_settings() -> Settings:
+    return Settings()
diff --git a/agtech-ops/agtech_ops/dashboard.py b/agtech-ops/agtech_ops/dashboard.py
new file mode 100644
index 0000000000..060d7824c4
--- /dev/null
+++ b/agtech-ops/agtech_ops/dashboard.py
@@ -0,0 +1,150 @@
+"""Operator-facing Streamlit dashboard.
+
+Run with:  streamlit run agtech_ops/dashboard.py
+Requires the 'dashboard' extra (streamlit, plotly).
+"""
+
+from __future__ import annotations
+
+import pandas as pd
+import streamlit as st
+
+# Absolute imports so the file works when launched directly via
+# `streamlit run agtech_ops/dashboard.py` (Streamlit runs it as a script, not
+# as part of the package, so relative imports would fail).
+from agtech_ops.agent import action_log, agent_name, build_action_log
+from agtech_ops.db import init_db
+from agtech_ops.ingest import SUPPORTED_EXTENSIONS, parse_whatsapp_export
+from agtech_ops.service import (
+    aggregate,
+    ingest_files,
+    known_asset_names,
+    store_events,
+)
+
+
+def main() -> None:
+    init_db()
+    st.set_page_config(page_title="AgTech Ops Hub", layout="wide")
+    st.title("AgTech Ops Hub")
+    st.caption(
+        f"Compile & aggregate multi-source farm data · action-item agent: "
+        f"**{agent_name()}**"
+    )
+
+    farm_default = "Green Acres"
+    exts = ", ".join(sorted(e.lstrip(".") for e in SUPPORTED_EXTENSIONS))
+
+    with st.sidebar:
+        st.header("Intake")
+        farm = st.text_input("Farm (for text documents)", value=farm_default)
+        st.caption(f"Accepted: {exts}")
+        uploads = st.file_uploader(
+            "Drop a range of files",
+            type=[e.lstrip(".") for e in SUPPORTED_EXTENSIONS],
+            accept_multiple_files=True,
+        )
+        if uploads and st.button("Ingest files", type="primary"):
+            payload = [(u.name, u.getvalue()) for u in uploads]
+            res = ingest_files(payload, farm=farm)
+            st.success(
+                f"Ingested {res.events_ingested} records from "
+                f"{res.files_processed} file(s)."
+            )
+            with st.expander("Per-file detail"):
+                st.dataframe(res.per_file, use_container_width=True)
+            if res.errors:
+                st.warning("\n".join(res.errors[:20]))
+
+        st.divider()
+        wa_text = st.text_area("Or paste WhatsApp / notes text")
+        if wa_text and st.button("Ingest pasted text"):
+            events, errors = parse_whatsapp_export(
+                wa_text, farm=farm, known_assets=known_asset_names(farm)
+            )
+            res = store_events(events, errors)
+            st.success(f"Ingested {res.events_ingested} messages.")
+            if res.errors:
+                st.warning("\n".join(res.errors))
+
+    report = aggregate()
+
+    # --- Compiled overview ---
+    st.subheader("Compiled overview")
+    c1, c2, c3, c4, c5 = st.columns(5)
+    c1.metric("Events", report.total_events)
+    c2.metric("Assets", report.total_assets)
+    c3.metric("Clips", report.media_clips)
+    c4.metric("Farms", report.total_farms)
+    c5.metric("Open actions", report.open_action_items)
+
+    if report.by_source:
+        src_df = pd.DataFrame(
+            {"source": list(report.by_source), "events": list(report.by_source.values())}
+        )
+        st.bar_chart(src_df.set_index("source"))
+
+    # --- Media metadata & tags (workflow signals) ---
+    if report.top_tags:
+        st.subheader("Media metadata & tags (workflow signals)")
+        st.caption(
+            "Tags extracted from video/camera clips — the most frequent tags "
+            "indicate where the workflow needs attention."
+        )
+        tdf = pd.DataFrame(
+            [{"tag": t.tag, "count": t.count} for t in report.top_tags]
+        ).set_index("tag")
+        st.bar_chart(tdf)
+
+    st.divider()
+    st.subheader(f"Action-item log · built by {agent_name()}")
+    st.caption(
+        "An AI agent (Claude Haiku when a key is set, deterministic rules "
+        "otherwise) turns incoming bridge data into a logged, prioritized "
+        "action list with a rationale for each item."
+    )
+    if st.button("Run agent on incoming data", type="primary"):
+        result = build_action_log()
+        st.write(result.summary)
+        for p in result.points:
+            st.markdown(f"- {p}")
+
+    log = action_log()
+    if log:
+        st.dataframe(
+            [
+                {
+                    "priority": a["priority"],
+                    "task": a["task"],
+                    "owner": a["owner"],
+                    "due": a["due"],
+                    "rationale": a["rationale"],
+                    "by": a["created_by"],
+                    "logged": a["logged_at"],
+                }
+                for a in log
+            ],
+            use_container_width=True,
+        )
+    else:
+        st.info("Log is empty. Ingest data, then run the agent.")
+
+    # --- Aggregated metric trends (native charts; one per metric) ---
+    if report.metric_series:
+        st.divider()
+        st.subheader("Aggregated metrics over time")
+        for metric in sorted(report.metric_series):
+            pts = report.metric_series[metric]
+            mdf = pd.DataFrame(
+                [{"date": p.occurred_at, "value": p.value, "asset": p.asset} for p in pts]
+            )
+            # Pivot so each asset is its own line; native chart avoids heavy deps.
+            wide = mdf.pivot_table(
+                index="date", columns="asset", values="value", aggfunc="mean"
+            )
+            st.caption(metric)
+            st.line_chart(wide)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/agtech-ops/agtech_ops/db.py b/agtech-ops/agtech_ops/db.py
new file mode 100644
index 0000000000..030ea700a8
--- /dev/null
+++ b/agtech-ops/agtech_ops/db.py
@@ -0,0 +1,84 @@
+"""Database engine/session helpers and entity-resolution upserts."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+from contextlib import contextmanager
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session, sessionmaker
+
+from .config import get_settings
+from .models import Asset, AssetType, Base, Farm
+
+_engine = None
+_SessionLocal: sessionmaker[Session] | None = None
+
+
+def get_engine():
+    global _engine
+    if _engine is None:
+        url = get_settings().database_url
+        connect_args = {"check_same_thread": False} if url.startswith("sqlite") else {}
+        _engine = create_engine(url, connect_args=connect_args, future=True)
+    return _engine
+
+
+def init_db() -> None:
+    Base.metadata.create_all(get_engine())
+
+
+def get_sessionmaker() -> sessionmaker[Session]:
+    global _SessionLocal
+    if _SessionLocal is None:
+        _SessionLocal = sessionmaker(
+            bind=get_engine(), expire_on_commit=False, future=True
+        )
+    return _SessionLocal
+
+
+@contextmanager
+def session_scope() -> Iterator[Session]:
+    session = get_sessionmaker()()
+    try:
+        yield session
+        session.commit()
+    except Exception:
+        session.rollback()
+        raise
+    finally:
+        session.close()
+
+
+def get_or_create_farm(session: Session, name: str) -> Farm:
+    name = name.strip()
+    farm = session.query(Farm).filter(Farm.name == name).one_or_none()
+    if farm is None:
+        farm = Farm(name=name)
+        session.add(farm)
+        session.flush()
+    return farm
+
+
+def get_or_create_asset(
+    session: Session,
+    farm: Farm,
+    name: str,
+    asset_type: AssetType = AssetType.other,
+) -> Asset:
+    """Resolve an asset by (farm, name), the core of cross-source context."""
+    name = name.strip()
+    asset = (
+        session.query(Asset)
+        .filter(Asset.farm_id == farm.id, Asset.name == name)
+        .one_or_none()
+    )
+    if asset is None:
+        asset = Asset(farm_id=farm.id, name=name, type=asset_type)
+        session.add(asset)
+        session.flush()
+    elif asset_type is not AssetType.other and asset.type is AssetType.other:
+        # Upgrade a placeholder type once we learn what the asset really is.
+        asset.type = asset_type
+        session.flush()
+    return asset
diff --git a/agtech-ops/agtech_ops/ingest/__init__.py b/agtech-ops/agtech_ops/ingest/__init__.py
new file mode 100644
index 0000000000..1ec7a62ff7
--- /dev/null
+++ b/agtech-ops/agtech_ops/ingest/__init__.py
@@ -0,0 +1,18 @@
+"""Ingestors that turn raw source data into validated ``EventIn`` records."""
+
+from .csv_ingest import dataframe_to_events, parse_partner_csv
+from .registry import SUPPORTED_EXTENSIONS, ingest_file
+from .tabular_ingest import parse_excel, parse_json_records
+from .text_ingest import parse_text_document
+from .whatsapp_ingest import parse_whatsapp_export
+
+__all__ = [
+    "parse_partner_csv",
+    "dataframe_to_events",
+    "parse_excel",
+    "parse_json_records",
+    "parse_whatsapp_export",
+    "parse_text_document",
+    "ingest_file",
+    "SUPPORTED_EXTENSIONS",
+]
diff --git a/agtech-ops/agtech_ops/ingest/csv_ingest.py b/agtech-ops/agtech_ops/ingest/csv_ingest.py
new file mode 100644
index 0000000000..b9dd546bae
--- /dev/null
+++ b/agtech-ops/agtech_ops/ingest/csv_ingest.py
@@ -0,0 +1,229 @@
+"""Parse partner / Dropbox CSV exports into normalized events.
+
+Expected (case-insensitive, flexible) columns:
+
+    farm, asset, date, [asset_type], [category], [metric], [value], [notes]
+
+Partners are messy, so column names are normalized and unknown extras are kept
+in ``raw``. Rows that cannot be salvaged are reported as errors rather than
+aborting the whole file.
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import io
+import json
+import re
+
+import pandas as pd
+
+from ..models import AssetType, Source
+from ..schemas import EventIn
+
+# Map common partner header variants onto our canonical names.
+_COLUMN_ALIASES = {
+    "farm": "farm",
+    "farm_name": "farm",
+    "site": "farm",
+    "asset": "asset",
+    "herd": "asset",
+    "crop": "asset",
+    "field": "asset",
+    "asset_name": "asset",
+    "paddock": "asset",
+    "asset_type": "asset_type",
+    "type": "asset_type",
+    "date": "date",
+    "timestamp": "date",
+    "datetime": "date",
+    "recorded_at": "date",
+    "category": "category",
+    "event": "category",
+    "metric": "metric",
+    "measure": "metric",
+    "value": "value",
+    "reading": "value",
+    "amount": "value",
+    "notes": "notes",
+    "note": "notes",
+    "comment": "notes",
+    "comments": "notes",
+    "description": "notes",
+    # Video / clip metadata (tags drive workflow).
+    "tags": "tags",
+    "tag": "tags",
+    "labels": "tags",
+    "detections": "tags",
+    "duration": "duration_s",
+    "duration_s": "duration_s",
+    "length": "duration_s",
+    "camera": "camera",
+    "cam": "camera",
+    "clip": "clip",
+    "clip_id": "clip",
+    "video": "clip",
+    "filename": "clip",
+}
+
+_REQUIRED = {"farm", "asset", "date"}
+# Presence of any of these implies the rows are video/clip metadata.
+_MEDIA_COLUMNS = {"tags", "duration_s", "camera", "clip"}
+
+
+def _split_tags(raw: object) -> list[str]:
+    if raw is None or (isinstance(raw, float) and pd.isna(raw)):
+        return []
+    if isinstance(raw, (list, tuple)):
+        items = raw
+    else:
+        items = re.split(r"[;,|]", str(raw))
+    return [t.strip() for t in items if str(t).strip()]
+
+
+def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
+    renamed = {}
+    for col in df.columns:
+        key = str(col).strip().lower().replace(" ", "_")
+        renamed[col] = _COLUMN_ALIASES.get(key, key)
+    return df.rename(columns=renamed)
+
+
+def _coerce_asset_type(raw: object) -> AssetType:
+    if raw is None or (isinstance(raw, float) and pd.isna(raw)):
+        return AssetType.other
+    try:
+        return AssetType(str(raw).strip().lower())
+    except ValueError:
+        return AssetType.other
+
+
+def parse_partner_csv(
+    data: str | bytes,
+    *,
+    source: Source = Source.csv_partner,
+    sep: str | None = None,
+) -> tuple[list[EventIn], list[str]]:
+    """Return ``(events, errors)`` parsed from CSV/TSV ``data``."""
+
+    if isinstance(data, bytes):
+        buffer: io.StringIO | io.BytesIO = io.BytesIO(data)
+    else:
+        buffer = io.StringIO(data)
+
+    try:
+        read_kwargs = {"sep": sep} if sep is not None else {}
+        df = pd.read_csv(buffer, **read_kwargs)
+    except Exception as exc:  # noqa: BLE001 - surfaced to caller
+        return [], [f"could not read CSV: {exc}"]
+
+    return dataframe_to_events(df, source=source)
+
+
+def dataframe_to_events(
+    df: pd.DataFrame,
+    *,
+    source: Source = Source.csv_partner,
+) -> tuple[list[EventIn], list[str]]:
+    """Convert an already-loaded tabular frame into normalized events.
+
+    Shared by the CSV, Excel and JSON ingestors so column aliasing and row
+    coercion behave identically regardless of the original file format.
+    """
+
+    errors: list[str] = []
+    if df is None or df.empty:
+        return [], ["no rows found"]
+
+    df = _normalize_columns(df)
+
+    missing = _REQUIRED - set(df.columns)
+    if missing:
+        return [], [f"missing required column(s): {', '.join(sorted(missing))}"]
+
+    known = {
+        "farm", "asset", "asset_type", "date", "category", "metric", "value",
+        "notes", "tags", "duration_s", "camera", "clip",
+    }
+    extra_cols = [c for c in df.columns if c not in known]
+
+    # Clip-metadata tables (tags/duration/camera/clip) are auto-tagged as media
+    # unless the caller explicitly asked for a different source.
+    is_media = bool(_MEDIA_COLUMNS & set(df.columns))
+    if is_media and source is Source.csv_partner:
+        source = Source.media
+
+    events: list[EventIn] = []
+    for idx, row in df.iterrows():
+        rownum = int(idx) + 2  # +1 for header, +1 for 1-based humans
+        try:
+            occurred_at = pd.to_datetime(row["date"], errors="coerce")
+            if pd.isna(occurred_at):
+                errors.append(f"row {rownum}: unparseable date {row['date']!r}")
+                continue
+
+            value = row.get("value")
+            if value is not None and not pd.isna(value):
+                try:
+                    value = float(value)
+                except (TypeError, ValueError):
+                    value = None
+            else:
+                value = None
+
+            tags = _split_tags(row.get("tags"))
+
+            # Clip duration becomes a numeric metric so it aggregates like data.
+            metric = _clean(row.get("metric"))
+            duration = row.get("duration_s")
+            if metric is None and duration is not None and not pd.isna(duration):
+                try:
+                    value = float(duration)
+                    metric = "clip_duration_s"
+                except (TypeError, ValueError):
+                    pass
+
+            text = _clean(row.get("notes"))
+            if text is None and tags:
+                text = "Clip tagged: " + ", ".join(tags)
+
+            raw_extra = {c: _jsonable(row.get(c)) for c in extra_cols}
+
+            events.append(
+                EventIn(
+                    farm=str(row["farm"]),
+                    asset=str(row["asset"]),
+                    asset_type=_coerce_asset_type(row.get("asset_type")),
+                    source=source,
+                    occurred_at=occurred_at.to_pydatetime()
+                    if hasattr(occurred_at, "to_pydatetime")
+                    else dt.datetime.fromisoformat(str(occurred_at)),
+                    category=_clean(row.get("category"))
+                    or ("media" if is_media else None),
+                    metric=metric,
+                    value=value,
+                    author=_clean(row.get("camera")),
+                    text=text,
+                    tags=tags,
+                    raw=json.dumps(raw_extra) if raw_extra else None,
+                )
+            )
+        except Exception as exc:  # noqa: BLE001 - per-row resilience
+            errors.append(f"row {rownum}: {exc}")
+
+    return events, errors
+
+
+def _clean(v: object) -> str | None:
+    if v is None or (isinstance(v, float) and pd.isna(v)):
+        return None
+    s = str(v).strip()
+    return s or None
+
+
+def _jsonable(v: object):
+    if v is None or (isinstance(v, float) and pd.isna(v)):
+        return None
+    if isinstance(v, (int, float, str, bool)):
+        return v
+    return str(v)
diff --git a/agtech-ops/agtech_ops/ingest/registry.py b/agtech-ops/agtech_ops/ingest/registry.py
new file mode 100644
index 0000000000..549cff269f
--- /dev/null
+++ b/agtech-ops/agtech_ops/ingest/registry.py
@@ -0,0 +1,96 @@
+"""Single entry point that ingests *any* supported file by dispatching on type.
+
+Supported today:
+  - Tabular:   .csv, .tsv, .xlsx, .xls, .json
+  - Free text: .txt, .md, .log, .pdf, .docx
+WhatsApp exports (.txt) are auto-detected and routed to the chat parser.
+
+Tabular files carry their own farm/asset columns. Free-text files do not, so a
+``farm`` (and optional known asset list) is supplied for context resolution.
+"""
+
+from __future__ import annotations
+
+import os
+
+from ..models import Source
+from ..schemas import EventIn
+from .csv_ingest import parse_partner_csv
+from .tabular_ingest import parse_excel, parse_json_records
+from .text_ingest import (
+    extract_text_from_docx,
+    extract_text_from_pdf,
+    parse_text_document,
+)
+from .whatsapp_ingest import _LINE_RE, parse_whatsapp_export
+
+SUPPORTED_EXTENSIONS = {
+    ".csv", ".tsv", ".xlsx", ".xls", ".json",
+    ".txt", ".md", ".log", ".pdf", ".docx",
+}
+
+
+def _looks_like_whatsapp(text: str) -> bool:
+    lines = [ln for ln in text.splitlines() if ln.strip()][:30]
+    if not lines:
+        return False
+    hits = sum(1 for ln in lines if _LINE_RE.match(ln.strip()))
+    return hits >= max(2, len(lines) // 3)
+
+
+def ingest_file(
+    filename: str,
+    data: bytes,
+    *,
+    farm: str | None = None,
+    known_assets: list[str] | None = None,
+    default_asset: str = "General",
+) -> tuple[list[EventIn], list[str]]:
+    """Return ``(events, errors)`` for a single uploaded file."""
+    ext = os.path.splitext(filename)[1].lower()
+    known_assets = known_assets or []
+
+    # --- Tabular (self-describing: farm/asset in columns) ---
+    if ext == ".csv":
+        return parse_partner_csv(data)
+    if ext == ".tsv":
+        return parse_partner_csv(data, sep="\t")
+    if ext in {".xlsx", ".xls"}:
+        return parse_excel(data)
+    if ext == ".json":
+        return parse_json_records(data)
+
+    # --- Free text (needs a farm for context) ---
+    if ext in {".txt", ".md", ".log", ".pdf", ".docx"}:
+        if not farm:
+            return [], [f"{filename}: a farm name is required for text documents"]
+
+        if ext == ".pdf":
+            text, errors = extract_text_from_pdf(data)
+        elif ext == ".docx":
+            text, errors = extract_text_from_docx(data)
+        else:
+            errors = []
+            try:
+                text = data.decode("utf-8", errors="replace")
+            except Exception as exc:  # noqa: BLE001
+                return [], [f"{filename}: could not decode text ({exc})"]
+
+        if errors:
+            return [], [f"{filename}: {e}" for e in errors]
+
+        if ext in {".txt", ".log"} and _looks_like_whatsapp(text):
+            return parse_whatsapp_export(
+                text, farm=farm, known_assets=known_assets, default_asset=default_asset
+            )
+
+        return parse_text_document(
+            text,
+            farm=farm,
+            known_assets=known_assets,
+            default_asset=default_asset,
+            source=Source.manual,
+            doc_name=filename,
+        )
+
+    return [], [f"{filename}: unsupported file type '{ext}'"]
diff --git a/agtech-ops/agtech_ops/ingest/tabular_ingest.py b/agtech-ops/agtech_ops/ingest/tabular_ingest.py
new file mode 100644
index 0000000000..347edd2eee
--- /dev/null
+++ b/agtech-ops/agtech_ops/ingest/tabular_ingest.py
@@ -0,0 +1,68 @@
+"""Excel and JSON ingestion, reusing the shared tabular -> events core."""
+
+from __future__ import annotations
+
+import io
+import json as jsonlib
+
+import pandas as pd
+
+from ..models import Source
+from ..schemas import EventIn
+from .csv_ingest import dataframe_to_events
+
+
+def parse_excel(
+    data: bytes,
+    *,
+    source: Source = Source.csv_partner,
+) -> tuple[list[EventIn], list[str]]:
+    """Parse every sheet of an .xlsx/.xls workbook into events."""
+    try:
+        sheets = pd.read_excel(io.BytesIO(data), sheet_name=None)
+    except ImportError:
+        return [], [
+            "Excel support requires the 'files' extra (pip install -e '.[files]')."
+        ]
+    except Exception as exc:  # noqa: BLE001
+        return [], [f"could not read Excel file: {exc}"]
+
+    all_events: list[EventIn] = []
+    all_errors: list[str] = []
+    for sheet_name, df in sheets.items():
+        events, errors = dataframe_to_events(df, source=source)
+        all_events.extend(events)
+        all_errors.extend(f"[sheet {sheet_name}] {e}" for e in errors)
+    if not all_events and not all_errors:
+        all_errors.append("workbook contained no rows")
+    return all_events, all_errors
+
+
+def parse_json_records(
+    data: str | bytes,
+    *,
+    source: Source = Source.csv_partner,
+) -> tuple[list[EventIn], list[str]]:
+    """Parse a JSON array (or {records|data|items: [...]}) of record objects."""
+    try:
+        payload = jsonlib.loads(data)
+    except Exception as exc:  # noqa: BLE001
+        return [], [f"could not parse JSON: {exc}"]
+
+    if isinstance(payload, dict):
+        for key in ("records", "data", "items", "rows"):
+            if isinstance(payload.get(key), list):
+                payload = payload[key]
+                break
+        else:
+            payload = [payload]
+
+    if not isinstance(payload, list) or not payload:
+        return [], ["JSON did not contain a list of records"]
+
+    try:
+        df = pd.DataFrame(payload)
+    except Exception as exc:  # noqa: BLE001
+        return [], [f"could not tabulate JSON records: {exc}"]
+
+    return dataframe_to_events(df, source=source)
diff --git a/agtech-ops/agtech_ops/ingest/text_ingest.py b/agtech-ops/agtech_ops/ingest/text_ingest.py
new file mode 100644
index 0000000000..7b7f44f0e8
--- /dev/null
+++ b/agtech-ops/agtech_ops/ingest/text_ingest.py
@@ -0,0 +1,119 @@
+"""Free-text document ingestion: plain text, Markdown, PDF, and Word.
+
+These formats have no tabular structure, so each meaningful chunk (a paragraph)
+becomes a text ``Event`` that the summarizer can mine for action items. An
+optional leading date in a chunk is used as its timestamp; otherwise the
+ingestion time is used. The asset is inferred from known asset names, falling
+back to a default so nothing is dropped.
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import re
+
+from ..models import Source
+from ..schemas import EventIn
+
+# Matches a date at the very start of a chunk, e.g. "2026-06-26", "26/06/2026".
+_LEADING_DATE = re.compile(
+    r"^\s*(\d{1,4}[-/.]\d{1,2}[-/.]\d{1,4})\b[\s:,-]*"
+)
+_DATE_FORMATS = (
+    "%Y-%m-%d", "%d-%m-%Y", "%m-%d-%Y",
+    "%Y/%m/%d", "%d/%m/%Y", "%m/%d/%Y",
+    "%d.%m.%Y", "%Y.%m.%d",
+)
+
+
+def _parse_leading_date(chunk: str) -> tuple[dt.datetime | None, str]:
+    m = _LEADING_DATE.match(chunk)
+    if not m:
+        return None, chunk
+    raw = m.group(1)
+    for fmt in _DATE_FORMATS:
+        try:
+            return dt.datetime.strptime(raw, fmt), chunk[m.end():]
+        except ValueError:
+            continue
+    return None, chunk
+
+
+def _infer_asset(text: str, known_assets: list[str]) -> str | None:
+    low = text.lower()
+    for asset in sorted(known_assets, key=len, reverse=True):
+        if asset.lower() in low:
+            return asset
+    return None
+
+
+def _split_chunks(text: str) -> list[str]:
+    # Prefer blank-line-separated paragraphs; fall back to non-empty lines.
+    paras = [p.strip() for p in re.split(r"\n\s*\n", text) if p.strip()]
+    if len(paras) <= 1:
+        paras = [ln.strip() for ln in text.splitlines() if ln.strip()]
+    return paras
+
+
+def parse_text_document(
+    text: str,
+    *,
+    farm: str,
+    known_assets: list[str] | None = None,
+    default_asset: str = "General",
+    source: Source = Source.manual,
+    doc_name: str | None = None,
+) -> tuple[list[EventIn], list[str]]:
+    known_assets = known_assets or []
+    now = dt.datetime.now()
+    events: list[EventIn] = []
+
+    for chunk in _split_chunks(text):
+        when, body = _parse_leading_date(chunk)
+        body = body.strip() or chunk
+        events.append(
+            EventIn(
+                farm=farm,
+                asset=_infer_asset(body, known_assets) or default_asset,
+                source=source,
+                occurred_at=when or now,
+                category="document",
+                author=doc_name,
+                text=body,
+                raw=doc_name,
+            )
+        )
+
+    if not events:
+        return [], ["document contained no readable text"]
+    return events, []
+
+
+def extract_text_from_pdf(data: bytes) -> tuple[str, list[str]]:
+    try:
+        from pypdf import PdfReader
+    except ImportError:
+        return "", ["PDF support requires the 'files' extra (pip install -e '.[files]')."]
+    import io
+
+    try:
+        reader = PdfReader(io.BytesIO(data))
+        pages = [page.extract_text() or "" for page in reader.pages]
+    except Exception as exc:  # noqa: BLE001
+        return "", [f"could not read PDF: {exc}"]
+    return "\n\n".join(pages), []
+
+
+def extract_text_from_docx(data: bytes) -> tuple[str, list[str]]:
+    try:
+        import docx  # python-docx
+    except ImportError:
+        return "", ["DOCX support requires the 'files' extra (pip install -e '.[files]')."]
+    import io
+
+    try:
+        document = docx.Document(io.BytesIO(data))
+        paras = [p.text for p in document.paragraphs if p.text.strip()]
+    except Exception as exc:  # noqa: BLE001
+        return "", [f"could not read DOCX: {exc}"]
+    return "\n\n".join(paras), []
diff --git a/agtech-ops/agtech_ops/ingest/whatsapp_ingest.py b/agtech-ops/agtech_ops/ingest/whatsapp_ingest.py
new file mode 100644
index 0000000000..8575e9699a
--- /dev/null
+++ b/agtech-ops/agtech_ops/ingest/whatsapp_ingest.py
@@ -0,0 +1,121 @@
+"""Parse a WhatsApp chat export into normalized events.
+
+WhatsApp "Export chat" produces lines like::
+
+    [2026-06-28, 8:56:01 PM] Alice: North Herd cow #42 looks lame, call vet
+    2026/06/28, 20:56 - Bob: Ordered more feed for Field 3
+
+Both bracketed and dash formats are supported. Multi-line messages (a line
+without a new timestamp header) are appended to the previous message.
+
+Because a chat does not name the farm/asset in a structured way, the caller
+supplies a default ``farm``; the asset is inferred from the message text when a
+known asset name is provided, otherwise it falls back to a "General" asset so
+nothing is dropped.
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import re
+
+from ..models import Source
+from ..schemas import EventIn
+
+# [2026-06-28, 8:56:01 PM] Name: message    OR
+# 2026/06/28, 20:56 - Name: message
+_LINE_RE = re.compile(
+    r"^\[?\s*"
+    r"(?P<date>\d{1,4}[-/.]\d{1,2}[-/.]\d{1,4})"
+    r"[,]?\s+"
+    r"(?P<time>\d{1,2}:\d{2}(?::\d{2})?\s*(?:[AaPp][Mm])?)"
+    r"\s*\]?\s*[-]?\s*"
+    r"(?P<author>[^:]{1,80}?):\s"
+    r"(?P<text>.*)$"
+)
+
+_DATE_FORMATS = (
+    "%Y-%m-%d", "%d-%m-%Y", "%m-%d-%Y",
+    "%Y/%m/%d", "%d/%m/%Y", "%m/%d/%Y",
+    "%d.%m.%Y", "%Y.%m.%d",
+)
+_TIME_FORMATS = ("%I:%M:%S %p", "%I:%M %p", "%H:%M:%S", "%H:%M")
+
+
+def _parse_dt(date_s: str, time_s: str) -> dt.datetime | None:
+    time_s = time_s.strip().upper().replace("\u202f", " ")
+    for df in _DATE_FORMATS:
+        for tf in _TIME_FORMATS:
+            try:
+                return dt.datetime.strptime(f"{date_s} {time_s}", f"{df} {tf}")
+            except ValueError:
+                continue
+    return None
+
+
+def _infer_asset(text: str, known_assets: list[str]) -> str | None:
+    low = text.lower()
+    # Longest match first so "North Herd" beats "Herd".
+    for asset in sorted(known_assets, key=len, reverse=True):
+        if asset.lower() in low:
+            return asset
+    return None
+
+
+def parse_whatsapp_export(
+    data: str,
+    *,
+    farm: str,
+    known_assets: list[str] | None = None,
+    default_asset: str = "General",
+) -> tuple[list[EventIn], list[str]]:
+    """Return ``(events, errors)`` from a WhatsApp export string."""
+
+    known_assets = known_assets or []
+    errors: list[str] = []
+    events: list[EventIn] = []
+
+    current: dict | None = None
+
+    def flush(cur: dict) -> None:
+        text = cur["text"].strip()
+        if not text:
+            return
+        asset = _infer_asset(text, known_assets) or default_asset
+        events.append(
+            EventIn(
+                farm=farm,
+                asset=asset,
+                source=Source.whatsapp,
+                occurred_at=cur["dt"],
+                author=cur["author"],
+                text=text,
+                raw=cur["raw"],
+            )
+        )
+
+    for raw_line in data.splitlines():
+        line = raw_line.rstrip("\n")
+        m = _LINE_RE.match(line.strip())
+        if m:
+            if current is not None:
+                flush(current)
+            parsed = _parse_dt(m.group("date"), m.group("time"))
+            if parsed is None:
+                errors.append(f"unparseable timestamp: {line[:60]!r}")
+                current = None
+                continue
+            current = {
+                "dt": parsed,
+                "author": m.group("author").strip(),
+                "text": m.group("text"),
+                "raw": line,
+            }
+        elif current is not None and line.strip():
+            current["text"] += "\n" + line.strip()
+        # blank lines / system messages without a header are ignored
+
+    if current is not None:
+        flush(current)
+
+    return events, errors
diff --git a/agtech-ops/agtech_ops/models.py b/agtech-ops/agtech_ops/models.py
new file mode 100644
index 0000000000..44d938d690
--- /dev/null
+++ b/agtech-ops/agtech_ops/models.py
@@ -0,0 +1,141 @@
+"""Canonical data model.
+
+The whole point of the hub is that data from very different sources (a partner
+CSV, a Dropbox spreadsheet, a WhatsApp thread) gets resolved onto the *same*
+entities so it can be reasoned about together:
+
+    Farm --< Asset (a herd, crop block, or field) --< Event >-- ActionItem
+
+An ``Event`` is the normalized unit every ingestor produces. An ``ActionItem``
+is what the summarizer produces from a batch of events.
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import enum
+
+from sqlalchemy import (
+    DateTime,
+    Enum,
+    Float,
+    ForeignKey,
+    String,
+    Text,
+    UniqueConstraint,
+)
+from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
+
+
+class Base(DeclarativeBase):
+    pass
+
+
+class AssetType(str, enum.Enum):
+    herd = "herd"
+    crop = "crop"
+    field = "field"
+    other = "other"
+
+
+class Source(str, enum.Enum):
+    csv_partner = "csv_partner"
+    dropbox = "dropbox"
+    whatsapp = "whatsapp"
+    media = "media"
+    manual = "manual"
+
+
+class ActionStatus(str, enum.Enum):
+    open = "open"
+    done = "done"
+    dismissed = "dismissed"
+
+
+class Priority(str, enum.Enum):
+    low = "low"
+    medium = "medium"
+    high = "high"
+
+
+class Farm(Base):
+    __tablename__ = "farms"
+
+    id: Mapped[int] = mapped_column(primary_key=True)
+    name: Mapped[str] = mapped_column(String(200), unique=True, index=True)
+
+    assets: Mapped[list["Asset"]] = relationship(
+        back_populates="farm", cascade="all, delete-orphan"
+    )
+
+
+class Asset(Base):
+    """A herd, crop block, or field — the thing events are *about*."""
+
+    __tablename__ = "assets"
+    __table_args__ = (UniqueConstraint("farm_id", "name", name="uq_asset_farm_name"),)
+
+    id: Mapped[int] = mapped_column(primary_key=True)
+    farm_id: Mapped[int] = mapped_column(ForeignKey("farms.id"), index=True)
+    name: Mapped[str] = mapped_column(String(200), index=True)
+    type: Mapped[AssetType] = mapped_column(
+        Enum(AssetType), default=AssetType.other
+    )
+
+    farm: Mapped[Farm] = relationship(back_populates="assets")
+    events: Mapped[list["Event"]] = relationship(
+        back_populates="asset", cascade="all, delete-orphan"
+    )
+
+
+class Event(Base):
+    """A normalized record from any source.
+
+    ``metric``/``value`` carry structured CSV data; ``text`` carries free-form
+    chatter (WhatsApp). Either or both may be present.
+    """
+
+    __tablename__ = "events"
+
+    id: Mapped[int] = mapped_column(primary_key=True)
+    asset_id: Mapped[int] = mapped_column(ForeignKey("assets.id"), index=True)
+    source: Mapped[Source] = mapped_column(Enum(Source), index=True)
+    occurred_at: Mapped[dt.datetime] = mapped_column(DateTime, index=True)
+    category: Mapped[str | None] = mapped_column(String(100), nullable=True)
+    metric: Mapped[str | None] = mapped_column(String(100), nullable=True)
+    value: Mapped[float | None] = mapped_column(Float, nullable=True)
+    author: Mapped[str | None] = mapped_column(String(200), nullable=True)
+    text: Mapped[str | None] = mapped_column(Text, nullable=True)
+    # Comma-separated tags (e.g. from video/clip analysis) used to drive workflow.
+    tags: Mapped[str | None] = mapped_column(Text, nullable=True)
+    raw: Mapped[str | None] = mapped_column(Text, nullable=True)
+
+    asset: Mapped[Asset] = relationship(back_populates="events")
+
+
+class ActionItem(Base):
+    """An ops task distilled from one or more events."""
+
+    __tablename__ = "action_items"
+
+    id: Mapped[int] = mapped_column(primary_key=True)
+    farm_id: Mapped[int | None] = mapped_column(
+        ForeignKey("farms.id"), index=True, nullable=True
+    )
+    asset_id: Mapped[int | None] = mapped_column(
+        ForeignKey("assets.id"), index=True, nullable=True
+    )
+    task: Mapped[str] = mapped_column(Text)
+    owner: Mapped[str | None] = mapped_column(String(200), nullable=True)
+    due: Mapped[dt.date | None] = mapped_column(DateTime, nullable=True)
+    priority: Mapped[Priority] = mapped_column(Enum(Priority), default=Priority.medium)
+    status: Mapped[ActionStatus] = mapped_column(
+        Enum(ActionStatus), default=ActionStatus.open, index=True
+    )
+    source_summary: Mapped[str | None] = mapped_column(Text, nullable=True)
+    # Provenance for the action-item log: which agent produced it and why.
+    created_by: Mapped[str | None] = mapped_column(String(50), nullable=True)
+    rationale: Mapped[str | None] = mapped_column(Text, nullable=True)
+    created_at: Mapped[dt.datetime] = mapped_column(
+        DateTime, default=lambda: dt.datetime.now(dt.timezone.utc)
+    )
diff --git a/agtech-ops/agtech_ops/schemas.py b/agtech-ops/agtech_ops/schemas.py
new file mode 100644
index 0000000000..67a0e27c13
--- /dev/null
+++ b/agtech-ops/agtech_ops/schemas.py
@@ -0,0 +1,106 @@
+"""Pydantic schemas: validate messy inbound data and shape AI output."""
+
+from __future__ import annotations
+
+import datetime as dt
+
+from pydantic import BaseModel, Field, field_validator
+
+from .models import AssetType, Priority, Source
+
+
+class EventIn(BaseModel):
+    """A validated, source-agnostic event ready to persist."""
+
+    farm: str
+    asset: str
+    asset_type: AssetType = AssetType.other
+    source: Source
+    occurred_at: dt.datetime
+    category: str | None = None
+    metric: str | None = None
+    value: float | None = None
+    author: str | None = None
+    text: str | None = None
+    tags: list[str] = Field(default_factory=list)
+    raw: str | None = None
+
+    @field_validator("farm", "asset")
+    @classmethod
+    def _not_blank(cls, v: str) -> str:
+        v = (v or "").strip()
+        if not v:
+            raise ValueError("must not be blank")
+        return v
+
+
+class ActionItemOut(BaseModel):
+    task: str = Field(..., description="Concrete, actionable instruction.")
+    owner: str | None = Field(None, description="Person/team responsible.")
+    due: dt.date | None = Field(None, description="Suggested due date if implied.")
+    priority: Priority = Priority.medium
+    asset: str | None = Field(None, description="Asset name this relates to.")
+    rationale: str | None = Field(
+        None, description="Short reason/evidence this action was raised."
+    )
+
+
+class SummaryResult(BaseModel):
+    """The structured output the summarizer must produce."""
+
+    summary: str = Field(..., description="Short narrative of the situation.")
+    points: list[str] = Field(default_factory=list, description="Key bullet points.")
+    action_items: list[ActionItemOut] = Field(default_factory=list)
+
+
+class IngestResult(BaseModel):
+    source: Source
+    events_ingested: int
+    farms: list[str] = Field(default_factory=list)
+    assets: list[str] = Field(default_factory=list)
+    errors: list[str] = Field(default_factory=list)
+
+
+class FileIngestResult(BaseModel):
+    """Per-file outcome plus an aggregate roll-up for a multi-file upload."""
+
+    files_processed: int
+    events_ingested: int
+    per_file: list[dict] = Field(default_factory=list)
+    farms: list[str] = Field(default_factory=list)
+    assets: list[str] = Field(default_factory=list)
+    errors: list[str] = Field(default_factory=list)
+
+
+class AssetSummary(BaseModel):
+    farm: str
+    asset: str
+    asset_type: str
+    events: int
+    last_seen: dt.datetime | None = None
+
+
+class MetricPoint(BaseModel):
+    asset: str
+    occurred_at: dt.datetime
+    value: float
+
+
+class TagCount(BaseModel):
+    tag: str
+    count: int
+
+
+class AggregateReport(BaseModel):
+    """Compiled, cross-source view of everything ingested."""
+
+    total_events: int
+    total_farms: int
+    total_assets: int
+    open_action_items: int
+    media_clips: int = 0
+    by_source: dict[str, int] = Field(default_factory=dict)
+    by_asset: list[AssetSummary] = Field(default_factory=list)
+    metric_series: dict[str, list[MetricPoint]] = Field(default_factory=dict)
+    top_tags: list[TagCount] = Field(default_factory=list)
+    date_range: list[dt.datetime | None] = Field(default_factory=list)
diff --git a/agtech-ops/agtech_ops/service.py b/agtech-ops/agtech_ops/service.py
new file mode 100644
index 0000000000..7b3eb16402
--- /dev/null
+++ b/agtech-ops/agtech_ops/service.py
@@ -0,0 +1,299 @@
+"""Application service layer: persist events, run summaries, store action items.
+
+Keeps the FastAPI layer and the Streamlit layer thin by centralizing all
+database + summarizer orchestration here.
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+
+from sqlalchemy import func
+from sqlalchemy.orm import Session, joinedload
+
+from .db import get_or_create_asset, get_or_create_farm, session_scope
+from .ingest import ingest_file
+from .models import ActionItem, ActionStatus, Asset, Event, Farm, Priority, Source
+from .schemas import (
+    ActionItemOut,
+    AggregateReport,
+    AssetSummary,
+    EventIn,
+    FileIngestResult,
+    IngestResult,
+    MetricPoint,
+    SummaryResult,
+    TagCount,
+)
+from .summarize import get_summarizer
+
+
+def store_events(events: list[EventIn], errors: list[str] | None = None) -> IngestResult:
+    """Resolve farms/assets and persist a batch of validated events."""
+    errors = list(errors or [])
+    farms: set[str] = set()
+    assets: set[str] = set()
+    count = 0
+
+    with session_scope() as session:
+        for ev in events:
+            farm = get_or_create_farm(session, ev.farm)
+            asset = get_or_create_asset(session, farm, ev.asset, ev.asset_type)
+            session.add(
+                Event(
+                    asset_id=asset.id,
+                    source=ev.source,
+                    occurred_at=ev.occurred_at,
+                    category=ev.category,
+                    metric=ev.metric,
+                    value=ev.value,
+                    author=ev.author,
+                    text=ev.text,
+                    tags=",".join(ev.tags) if ev.tags else None,
+                    raw=ev.raw,
+                )
+            )
+            farms.add(farm.name)
+            assets.add(asset.name)
+            count += 1
+
+    source = events[0].source if events else Source.manual
+    return IngestResult(
+        source=source,
+        events_ingested=count,
+        farms=sorted(farms),
+        assets=sorted(assets),
+        errors=errors,
+    )
+
+
+def ingest_files(
+    files: list[tuple[str, bytes]],
+    *,
+    farm: str | None = None,
+    default_asset: str = "General",
+) -> FileIngestResult:
+    """Ingest a batch of heterogeneous files and compile the results.
+
+    ``files`` is a list of ``(filename, bytes)``. Tabular files self-describe
+    their farm/asset; free-text files use the supplied ``farm`` for context.
+    """
+    per_file: list[dict] = []
+    all_errors: list[str] = []
+    farms: set[str] = set()
+    assets: set[str] = set()
+    total = 0
+
+    for filename, data in files:
+        known = known_asset_names(farm)
+        events, errors = ingest_file(
+            filename, data, farm=farm, known_assets=known, default_asset=default_asset
+        )
+        if events:
+            res = store_events(events)
+            farms.update(res.farms)
+            assets.update(res.assets)
+            total += res.events_ingested
+            per_file.append(
+                {"file": filename, "events": res.events_ingested, "errors": errors}
+            )
+        else:
+            per_file.append({"file": filename, "events": 0, "errors": errors})
+        all_errors.extend(f"{filename}: {e}" for e in errors)
+
+    return FileIngestResult(
+        files_processed=len(files),
+        events_ingested=total,
+        per_file=per_file,
+        farms=sorted(farms),
+        assets=sorted(assets),
+        errors=all_errors,
+    )
+
+
+def aggregate(farm: str | None = None) -> AggregateReport:
+    """Compile a cross-source roll-up of everything ingested."""
+    with session_scope() as session:
+        base = (
+            session.query(Event)
+            .join(Asset)
+            .join(Farm)
+        )
+        if farm:
+            base = base.filter(Farm.name == farm)
+
+        events = base.options(
+            joinedload(Event.asset).joinedload(Asset.farm)
+        ).all()
+
+        total_events = len(events)
+        farms = {e.asset.farm.name for e in events if e.asset and e.asset.farm}
+        assets = {(e.asset.farm.name, e.asset.name) for e in events if e.asset}
+
+        by_source: dict[str, int] = {}
+        per_asset: dict[tuple[str, str], dict] = {}
+        metric_series: dict[str, list[MetricPoint]] = {}
+        tag_counts: dict[str, int] = {}
+        media_clips = 0
+        min_dt = max_dt = None
+
+        for e in events:
+            by_source[e.source.value] = by_source.get(e.source.value, 0) + 1
+            min_dt = e.occurred_at if min_dt is None else min(min_dt, e.occurred_at)
+            max_dt = e.occurred_at if max_dt is None else max(max_dt, e.occurred_at)
+
+            if e.source is Source.media:
+                media_clips += 1
+            if e.tags:
+                for tag in (t.strip() for t in e.tags.split(",") if t.strip()):
+                    tag_counts[tag] = tag_counts.get(tag, 0) + 1
+
+            if e.asset:
+                key = (e.asset.farm.name, e.asset.name)
+                slot = per_asset.setdefault(
+                    key,
+                    {
+                        "farm": e.asset.farm.name,
+                        "asset": e.asset.name,
+                        "asset_type": e.asset.type.value,
+                        "events": 0,
+                        "last_seen": None,
+                    },
+                )
+                slot["events"] += 1
+                if slot["last_seen"] is None or e.occurred_at > slot["last_seen"]:
+                    slot["last_seen"] = e.occurred_at
+
+            if e.metric and e.value is not None:
+                metric_series.setdefault(e.metric, []).append(
+                    MetricPoint(
+                        asset=e.asset.name if e.asset else "?",
+                        occurred_at=e.occurred_at,
+                        value=e.value,
+                    )
+                )
+
+        for series in metric_series.values():
+            series.sort(key=lambda p: p.occurred_at)
+
+        open_items = (
+            session.query(func.count(ActionItem.id))
+            .filter(ActionItem.status == ActionStatus.open)
+            .scalar()
+        )
+
+        by_asset = sorted(
+            (AssetSummary(**v) for v in per_asset.values()),
+            key=lambda a: a.events,
+            reverse=True,
+        )
+        top_tags = [
+            TagCount(tag=t, count=c)
+            for t, c in sorted(tag_counts.items(), key=lambda kv: kv[1], reverse=True)
+        ]
+
+        return AggregateReport(
+            total_events=total_events,
+            total_farms=len(farms),
+            total_assets=len(assets),
+            open_action_items=int(open_items or 0),
+            media_clips=media_clips,
+            by_source=by_source,
+            by_asset=by_asset,
+            metric_series=metric_series,
+            top_tags=top_tags,
+            date_range=[min_dt, max_dt],
+        )
+
+
+def known_asset_names(farm: str | None = None) -> list[str]:
+    with session_scope() as session:
+        q = session.query(Asset.name)
+        if farm:
+            q = q.join(Farm).filter(Farm.name == farm)
+        return [name for (name,) in q.distinct().all()]
+
+
+def _load_events(
+    session: Session,
+    farm: str | None,
+    since: dt.datetime | None,
+) -> list[Event]:
+    q = (
+        session.query(Event)
+        .options(joinedload(Event.asset).joinedload(Asset.farm))
+        .join(Asset)
+        .join(Farm)
+    )
+    if farm:
+        q = q.filter(Farm.name == farm)
+    if since:
+        q = q.filter(Event.occurred_at >= since)
+    return q.order_by(Event.occurred_at).all()
+
+
+def summarize_and_store(
+    farm: str | None = None,
+    since: dt.datetime | None = None,
+    persist: bool = True,
+) -> SummaryResult:
+    """Summarize stored events and (optionally) persist the action items."""
+    summarizer = get_summarizer()
+    with session_scope() as session:
+        events = _load_events(session, farm, since)
+        result = summarizer.summarize(events)
+
+        if persist and result.action_items:
+            farm_id = None
+            if farm:
+                f = session.query(Farm).filter(Farm.name == farm).one_or_none()
+                farm_id = f.id if f else None
+            for ai in result.action_items:
+                asset_id = None
+                if ai.asset:
+                    a = (
+                        session.query(Asset)
+                        .filter(Asset.name == ai.asset)
+                        .first()
+                    )
+                    asset_id = a.id if a else None
+                session.add(
+                    ActionItem(
+                        farm_id=farm_id,
+                        asset_id=asset_id,
+                        task=ai.task,
+                        owner=ai.owner,
+                        due=dt.datetime.combine(ai.due, dt.time()) if ai.due else None,
+                        priority=ai.priority,
+                        source_summary=result.summary,
+                        created_by=summarizer.name,
+                        rationale=ai.rationale,
+                    )
+                )
+    return result
+
+
+def list_action_items(
+    status: ActionStatus | None = ActionStatus.open,
+) -> list[dict]:
+    with session_scope() as session:
+        q = session.query(ActionItem)
+        if status is not None:
+            q = q.filter(ActionItem.status == status)
+        rank = {Priority.high: 0, Priority.medium: 1, Priority.low: 2}
+        items = q.all()
+        items.sort(key=lambda a: (rank[a.priority], a.due or dt.datetime.max))
+        return [
+            {
+                "id": a.id,
+                "task": a.task,
+                "owner": a.owner,
+                "due": a.due.date().isoformat() if a.due else None,
+                "priority": a.priority.value,
+                "status": a.status.value,
+                "created_by": a.created_by,
+                "rationale": a.rationale,
+                "logged_at": a.created_at.isoformat() if a.created_at else None,
+            }
+            for a in items
+        ]
diff --git a/agtech-ops/agtech_ops/summarize/__init__.py b/agtech-ops/agtech_ops/summarize/__init__.py
new file mode 100644
index 0000000000..6bc929ac34
--- /dev/null
+++ b/agtech-ops/agtech_ops/summarize/__init__.py
@@ -0,0 +1,37 @@
+"""Summarization: turn a batch of events into a structured ``SummaryResult``.
+
+``get_summarizer`` picks the best available backend: the LLM backend when the
+AI extras are installed and an API key is present, otherwise the deterministic
+rule-based backend. The rule-based backend guarantees the app is useful with
+zero configuration and makes tests fully offline.
+"""
+
+from __future__ import annotations
+
+import os
+
+from ..config import get_settings
+from .base import Summarizer
+from .rule_based import RuleBasedSummarizer
+
+__all__ = ["Summarizer", "RuleBasedSummarizer", "get_summarizer"]
+
+
+def _has_llm_key() -> bool:
+    return any(
+        os.getenv(k)
+        for k in ("OPENAI_API_KEY", "ANTHROPIC_API_KEY", "AZURE_API_KEY", "GEMINI_API_KEY")
+    )
+
+
+def get_summarizer() -> Summarizer:
+    settings = get_settings()
+    if settings.force_rule_based or not _has_llm_key():
+        return RuleBasedSummarizer()
+    try:
+        from .llm import LLMSummarizer
+
+        return LLMSummarizer(model=settings.llm_model)
+    except Exception:
+        # Any import/setup failure (missing extras, bad config) degrades safely.
+        return RuleBasedSummarizer()
diff --git a/agtech-ops/agtech_ops/summarize/base.py b/agtech-ops/agtech_ops/summarize/base.py
new file mode 100644
index 0000000000..87ecb181d9
--- /dev/null
+++ b/agtech-ops/agtech_ops/summarize/base.py
@@ -0,0 +1,34 @@
+"""Summarizer interface and shared helpers."""
+
+from __future__ import annotations
+
+from typing import Protocol
+
+from ..models import Event
+from ..schemas import SummaryResult
+
+
+def render_events(events: list[Event]) -> str:
+    """Render events into a compact, context-rich text block for a summarizer."""
+    lines: list[str] = []
+    for e in sorted(events, key=lambda x: x.occurred_at):
+        when = e.occurred_at.strftime("%Y-%m-%d %H:%M")
+        asset = e.asset.name if e.asset else "?"
+        farm = e.asset.farm.name if e.asset and e.asset.farm else "?"
+        prefix = f"[{when}] {farm} / {asset} ({e.source.value})"
+        if e.tags:
+            lines.append(f"{prefix} tags: {e.tags}")
+        if e.text:
+            who = f" {e.author}:" if e.author else ""
+            lines.append(f"{prefix}{who} {e.text}")
+        if e.metric is not None:
+            val = "" if e.value is None else f"={e.value}"
+            cat = f" {e.category}" if e.category else ""
+            lines.append(f"{prefix}{cat} {e.metric}{val}")
+    return "\n".join(lines)
+
+
+class Summarizer(Protocol):
+    name: str
+
+    def summarize(self, events: list[Event]) -> SummaryResult: ...
diff --git a/agtech-ops/agtech_ops/summarize/llm.py b/agtech-ops/agtech_ops/summarize/llm.py
new file mode 100644
index 0000000000..13f89cfaaa
--- /dev/null
+++ b/agtech-ops/agtech_ops/summarize/llm.py
@@ -0,0 +1,49 @@
+"""Optional LLM summarizer using LiteLLM + instructor for structured output.
+
+Only imported when AI extras are installed and an API key is configured (see
+``summarize.get_summarizer``). It returns the exact same ``SummaryResult``
+schema as the rule-based backend, so callers never branch on which one ran.
+"""
+
+from __future__ import annotations
+
+from ..models import Event
+from ..schemas import SummaryResult
+from .base import render_events
+
+_SYSTEM = (
+    "You are an operations analyst for a farming business. You receive a log of "
+    "events drawn from partner CSV uploads, shared Dropbox spreadsheets, WhatsApp "
+    "messages between farm staff, and tags from video/camera clips. Produce a "
+    "concise situational summary, key bullet points, and concrete action items "
+    "for the ops team. Each action item must be specific, name an owner when one "
+    "is implied by the message author, set a sensible priority, reference the "
+    "relevant asset, and include a short 'rationale' citing the evidence (e.g. a "
+    "clip tag or a message phrase). Only use information present in the events."
+)
+
+
+class LLMSummarizer:
+    name = "haiku-agent"
+
+    def __init__(self, model: str):
+        import instructor
+        import litellm
+
+        self.model = model
+        self._litellm = litellm
+        self._client = instructor.from_litellm(litellm.completion)
+
+    def summarize(self, events: list[Event]) -> SummaryResult:
+        if not events:
+            return SummaryResult(summary="No events to summarize.")
+
+        context = render_events(events)
+        return self._client.chat.completions.create(
+            model=self.model,
+            response_model=SummaryResult,
+            messages=[
+                {"role": "system", "content": _SYSTEM},
+                {"role": "user", "content": f"Events:\n{context}"},
+            ],
+        )
diff --git a/agtech-ops/agtech_ops/summarize/rule_based.py b/agtech-ops/agtech_ops/summarize/rule_based.py
new file mode 100644
index 0000000000..5e93756c31
--- /dev/null
+++ b/agtech-ops/agtech_ops/summarize/rule_based.py
@@ -0,0 +1,136 @@
+"""Deterministic, offline summarizer.
+
+It is intentionally simple but useful: it scans event text for ops-relevant
+triggers (health, equipment, supplies, etc.), assigns priority, infers an owner
+from the message author, and rolls everything up into points + action items.
+This is the default backend and the one the tests assert against.
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import re
+from collections import Counter
+
+from ..models import Event, Priority
+from ..schemas import ActionItemOut, SummaryResult
+from .base import render_events
+
+# trigger keyword -> (action verb template, priority, default lead-time days)
+_TRIGGERS: list[tuple[tuple[str, ...], str, Priority, int]] = [
+    (("sick", "lame", "ill", "injured", "limping", "down", "mastitis", "fever"),
+     "Have vet assess", Priority.high, 1),
+    (("dead", "death", "died", "mortality"),
+     "Investigate and log mortality", Priority.high, 1),
+    (("vet", "veterinarian"),
+     "Schedule vet visit", Priority.high, 2),
+    (("broken", "broke", "repair", "fix", "leak", "leaking", "fault", "down"),
+     "Arrange repair", Priority.high, 2),
+    (("low", "out of", "empty", "running low", "order", "reorder", "restock"),
+     "Replenish supplies", Priority.medium, 3),
+    (("fence", "gate", "escape", "loose", "out on the road"),
+     "Secure fencing/containment", Priority.high, 1),
+    (("water", "trough", "irrigation", "dry", "drought"),
+     "Check water/irrigation", Priority.medium, 2),
+    (("spray", "weed", "pest", "fungus", "disease", "blight"),
+     "Plan crop protection treatment", Priority.medium, 3),
+    (("harvest", "ready", "ripe"),
+     "Plan harvest", Priority.medium, 4),
+    (("calv", "lamb", "birth", "pregnan"),
+     "Monitor for births", Priority.medium, 3),
+]
+
+_PRIORITY_RANK = {Priority.high: 0, Priority.medium: 1, Priority.low: 2}
+
+# Compile each trigger group into one regex. A leading ``\b`` anchors the match
+# to a word start (so "ill" does NOT match inside "will") while still allowing
+# suffixes (so "calv" matches "calving").
+_COMPILED: list[tuple[re.Pattern[str], str, Priority, int]] = [
+    (
+        re.compile(r"\b(?:" + "|".join(re.escape(k) for k in keywords) + r")", re.I),
+        verb,
+        priority,
+        lead,
+    )
+    for keywords, verb, priority, lead in _TRIGGERS
+]
+
+
+class RuleBasedSummarizer:
+    name = "rule_based"
+
+    def summarize(self, events: list[Event]) -> SummaryResult:
+        if not events:
+            return SummaryResult(summary="No events to summarize.", points=[], action_items=[])
+
+        today = dt.date.today()
+        action_items: list[ActionItemOut] = []
+        by_source: Counter[str] = Counter()
+        by_asset: Counter[str] = Counter()
+        flagged = 0
+
+        for e in events:
+            by_source[e.source.value] += 1
+            if e.asset:
+                by_asset[e.asset.name] += 1
+
+            haystack = " ".join(
+                p for p in [e.text, e.category, e.metric, e.tags] if p
+            ).lower()
+            if not haystack:
+                continue
+
+            for pattern, verb, priority, lead in _COMPILED:
+                match = pattern.search(haystack)
+                if not match:
+                    continue
+                asset_name = e.asset.name if e.asset else None
+                detail = (e.text or e.category or e.metric or "").strip()
+                task = f"{verb} — {asset_name or 'farm'}"
+                if detail:
+                    task += f": {detail[:160]}"
+                trigger = match.group(0)
+                rationale = f"Triggered by '{trigger}' from {e.source.value}"
+                if e.tags:
+                    rationale += f" (tags: {e.tags})"
+                action_items.append(
+                    ActionItemOut(
+                        task=task,
+                        owner=e.author,
+                        due=today + dt.timedelta(days=lead),
+                        priority=priority,
+                        asset=asset_name,
+                        rationale=rationale,
+                    )
+                )
+                flagged += 1
+                break  # one action per event keeps the list focused
+
+        action_items.sort(key=lambda a: (_PRIORITY_RANK[a.priority], a.due or today))
+
+        points: list[str] = [
+            f"{len(events)} events across {len(by_asset)} asset(s) "
+            f"from {len(by_source)} source(s)."
+        ]
+        if by_asset:
+            top = ", ".join(f"{n} ({c})" for n, c in by_asset.most_common(5))
+            points.append(f"Most active assets: {top}.")
+        points.append(f"{flagged} event(s) triggered a suggested action.")
+        high = sum(1 for a in action_items if a.priority is Priority.high)
+        if high:
+            points.append(f"{high} high-priority item(s) need attention first.")
+
+        summary = (
+            f"Reviewed {len(events)} events and generated {len(action_items)} "
+            f"action item(s)"
+            + (f", {high} high priority." if high else ".")
+        )
+
+        # Keep a small rendered context excerpt for traceability.
+        excerpt = render_events(events)
+        if len(excerpt) > 2000:
+            excerpt = excerpt[:2000] + "…"
+
+        result = SummaryResult(summary=summary, points=points, action_items=action_items)
+        result.__dict__["_context_excerpt"] = excerpt  # not serialized; debugging aid
+        return result
diff --git a/agtech-ops/pyproject.toml b/agtech-ops/pyproject.toml
new file mode 100644
index 0000000000..1dc9220097
--- /dev/null
+++ b/agtech-ops/pyproject.toml
@@ -0,0 +1,50 @@
+[project]
+name = "agtech-ops-hub"
+version = "0.1.0"
+description = "Contextualized agriculture operations hub: ingest partner CSVs, Dropbox exports and WhatsApp chatter, then turn them into summaries and action items for ops teams."
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "fastapi>=0.110",
+    "uvicorn>=0.29",
+    "python-multipart>=0.0.9",
+    "pydantic>=2.6",
+    "sqlalchemy>=2.0",
+    "pandas>=2.2",
+]
+
+[project.optional-dependencies]
+# Multi-format file ingestion (Excel, PDF, Word). Plain text/CSV/TSV/JSON work
+# without these; the ingestors degrade gracefully with a helpful message.
+files = [
+    "openpyxl>=3.1",
+    "pypdf>=4.0",
+    "python-docx>=1.1",
+]
+# AI summarization. Without these installed (or without an API key), the app
+# transparently falls back to the deterministic rule-based summarizer.
+ai = [
+    "litellm>=1.40",
+    "instructor>=1.3",
+]
+# Operator-facing dashboard (uses Streamlit's native charts).
+dashboard = [
+    "streamlit>=1.34",
+]
+dev = [
+    "pytest>=8.0",
+    "httpx>=0.27",
+    "openpyxl>=3.1",
+    "pypdf>=4.0",
+    "python-docx>=1.1",
+]
+
+[build-system]
+requires = ["setuptools>=68"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.packages.find]
+include = ["agtech_ops*"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
diff --git a/agtech-ops/render.yaml b/agtech-ops/render.yaml
new file mode 100644
index 0000000000..15e355351f
--- /dev/null
+++ b/agtech-ops/render.yaml
@@ -0,0 +1,43 @@
+# Render blueprint for the AgTech Ops Hub dashboard.
+# Deploy: New > Blueprint, point at this repo, set Root Directory to "agtech-ops".
+#
+# This stands up the Streamlit dashboard as a web service. The FastAPI ingest
+# API can be added as a second service later (see commented block below).
+services:
+  - type: web
+    name: agtech-ops-hub
+    runtime: python
+    rootDir: agtech-ops
+    plan: free
+    buildCommand: pip install -e ".[files,dashboard]"
+    # Render injects $PORT; Streamlit must bind it on 0.0.0.0 and run headless.
+    startCommand: >-
+      streamlit run agtech_ops/dashboard.py
+      --server.port $PORT
+      --server.address 0.0.0.0
+      --server.headless true
+      --browser.gatherUsageStats false
+    healthCheckPath: /_stcore/health
+    envVars:
+      # Default summarizer is offline/rule-based so the app works with no keys.
+      - key: AGTECH_FORCE_RULE_BASED
+        value: "1"
+      # SQLite on the free plan is ephemeral (resets on redeploy). For
+      # persistence, attach a paid disk and point this at it, or use Postgres:
+      #   AGTECH_DATABASE_URL = postgresql+psycopg://USER:PASS@HOST/DB
+      - key: AGTECH_DATABASE_URL
+        value: sqlite:////tmp/agtech_ops.db
+      # To enable the LLM summarizer instead, set AGTECH_FORCE_RULE_BASED=0,
+      # add the 'ai' extra to buildCommand, and provide a key below (as a secret):
+      # - key: OPENAI_API_KEY
+      #   sync: false
+
+  # Optional: expose the ingest API as its own service.
+  # - type: web
+  #   name: agtech-ops-api
+  #   runtime: python
+  #   rootDir: agtech-ops
+  #   plan: free
+  #   buildCommand: pip install -e ".[files]"
+  #   startCommand: uvicorn agtech_ops.api:app --host 0.0.0.0 --port $PORT
+  #   healthCheckPath: /health
diff --git a/agtech-ops/sample_data/clips.json b/agtech-ops/sample_data/clips.json
new file mode 100644
index 0000000000..f05c5eaa61
--- /dev/null
+++ b/agtech-ops/sample_data/clips.json
@@ -0,0 +1,7 @@
+[
+  {"farm": "Green Acres", "asset": "North Herd", "date": "2026-06-28T06:12:00", "camera": "barn-cam-1", "duration_s": 18, "tags": "cow, lame, limping", "notes": "Auto-detected gait anomaly on cow #42"},
+  {"farm": "Green Acres", "asset": "North Herd", "date": "2026-06-28T06:40:00", "camera": "barn-cam-1", "duration_s": 9, "tags": "cow, feeding, normal"},
+  {"farm": "Green Acres", "asset": "South Field", "date": "2026-06-28T11:03:00", "camera": "gate-cam-3", "duration_s": 12, "tags": "fence, open, gate"},
+  {"farm": "Green Acres", "asset": "South Field", "date": "2026-06-28T14:21:00", "camera": "field-cam-2", "duration_s": 7, "tags": "irrigation, leak, water"},
+  {"farm": "Green Acres", "asset": "North Herd", "date": "2026-06-28T19:55:00", "camera": "pasture-cam-4", "duration_s": 22, "tags": "calving, cow"}
+]
diff --git a/agtech-ops/sample_data/field_notes.txt b/agtech-ops/sample_data/field_notes.txt
new file mode 100644
index 0000000000..65a9ec6f0e
--- /dev/null
+++ b/agtech-ops/sample_data/field_notes.txt
@@ -0,0 +1,7 @@
+2026-06-28 Walked the North Herd this morning. Cow #42 is improving after the vet visit but should be monitored for another week.
+
+2026-06-28 South Field irrigation contractor is booked for tomorrow to fix the leaking line. Soil is still very dry.
+
+2026-06-28 Feed Store is almost empty. The reorder placed earlier has not arrived yet - chase the supplier.
+
+2026-06-28 The north boundary fence repair is done, cattle are secure again.
diff --git a/agtech-ops/sample_data/herd.csv b/agtech-ops/sample_data/herd.csv
new file mode 100644
index 0000000000..6153ec3a76
--- /dev/null
+++ b/agtech-ops/sample_data/herd.csv
@@ -0,0 +1,6 @@
+farm,asset,asset_type,date,category,metric,value,notes
+Green Acres,North Herd,herd,2026-06-25,health,milk_yield_l,1820,Yield steady
+Green Acres,North Herd,herd,2026-06-26,health,milk_yield_l,1610,"Cow #42 looks lame, may need vet"
+Green Acres,South Field,crop,2026-06-26,agronomy,soil_moisture_pct,18,"Soil getting dry, irrigation low"
+Green Acres,South Field,crop,2026-06-27,agronomy,soil_moisture_pct,15,Drought stress visible
+Green Acres,Feed Store,other,2026-06-27,supplies,feed_tonnes,2,"Running low on feed, reorder soon"
diff --git a/agtech-ops/sample_data/partner_feed.json b/agtech-ops/sample_data/partner_feed.json
new file mode 100644
index 0000000000..5e42c1a762
--- /dev/null
+++ b/agtech-ops/sample_data/partner_feed.json
@@ -0,0 +1,7 @@
+{
+  "records": [
+    {"farm": "Green Acres", "asset": "North Herd", "asset_type": "herd", "date": "2026-06-28", "category": "health", "metric": "milk_yield_l", "value": 1655, "notes": "Recovering after vet visit"},
+    {"farm": "Green Acres", "asset": "Feed Store", "asset_type": "other", "date": "2026-06-28", "category": "supplies", "metric": "feed_tonnes", "value": 1, "notes": "Critically low, order not yet delivered"},
+    {"farm": "Green Acres", "asset": "South Field", "asset_type": "crop", "date": "2026-06-28", "category": "agronomy", "metric": "soil_moisture_pct", "value": 13, "notes": "Still dry, irrigation repair pending"}
+  ]
+}
diff --git a/agtech-ops/sample_data/whatsapp_export.txt b/agtech-ops/sample_data/whatsapp_export.txt
new file mode 100644
index 0000000000..d9ae9b899d
--- /dev/null
+++ b/agtech-ops/sample_data/whatsapp_export.txt
@@ -0,0 +1,6 @@
+[2026-06-26, 7:32 AM] Alice: Morning all, North Herd cow #42 is limping badly, think we need the vet out
+[2026-06-26, 7:35 AM] Bob: Agreed, I'll be in South Field most of the day
+2026/06/26, 16:10 - Bob: South Field irrigation line is leaking again, water pressure low
+[2026-06-27, 8:01 AM] Alice: Feed Store is nearly empty, can someone order more feed before the weekend?
+[2026-06-27, 8:05 AM] Carol: On it, will place the feed order today
+[2026-06-27, 6:22 PM] Bob: Fence on the north boundary is loose, cattle could get out on the road
diff --git a/agtech-ops/tests/conftest.py b/agtech-ops/tests/conftest.py
new file mode 100644
index 0000000000..b4a0ce65e5
--- /dev/null
+++ b/agtech-ops/tests/conftest.py
@@ -0,0 +1,31 @@
+"""Test fixtures. Sets up an isolated SQLite DB and forces offline summarizer.
+
+Environment must be configured *before* any ``agtech_ops`` module is imported,
+because config defaults are read at import time.
+"""
+
+from __future__ import annotations
+
+import os
+import tempfile
+
+# Configure before importing the package.
+_tmp = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
+_tmp.close()
+os.environ["AGTECH_DATABASE_URL"] = f"sqlite:///{_tmp.name}"
+os.environ["AGTECH_FORCE_RULE_BASED"] = "1"
+
+import pytest  # noqa: E402
+
+from agtech_ops.db import get_engine, init_db  # noqa: E402
+from agtech_ops.models import Base  # noqa: E402
+
+
+@pytest.fixture(autouse=True)
+def clean_db():
+    """Recreate all tables before each test for isolation."""
+    engine = get_engine()
+    Base.metadata.drop_all(engine)
+    init_db()
+    yield
+    Base.metadata.drop_all(engine)
diff --git a/agtech-ops/tests/test_aggregate.py b/agtech-ops/tests/test_aggregate.py
new file mode 100644
index 0000000000..0189663dfc
--- /dev/null
+++ b/agtech-ops/tests/test_aggregate.py
@@ -0,0 +1,54 @@
+import io
+import json
+
+from agtech_ops.service import aggregate, ingest_files
+
+
+def _csv() -> bytes:
+    return (
+        b"farm,asset,asset_type,date,metric,value,notes\n"
+        b"Green Acres,North Herd,herd,2026-06-25,milk_yield_l,1820,steady\n"
+        b"Green Acres,North Herd,herd,2026-06-26,milk_yield_l,1610,cow lame\n"
+        b"Green Acres,South Field,crop,2026-06-26,soil_moisture_pct,18,dry\n"
+    )
+
+
+def test_multi_file_ingest_and_aggregate():
+    json_doc = json.dumps(
+        [{"farm": "Green Acres", "asset": "Feed Store", "date": "2026-06-27",
+          "metric": "feed_tonnes", "value": 2, "notes": "running low"}]
+    ).encode()
+    text_doc = b"2026-06-27 Fence on north boundary is loose, cattle could escape."
+
+    files = [
+        ("herd.csv", _csv()),
+        ("partner.json", json_doc),
+        ("notes.txt", text_doc),
+    ]
+    res = ingest_files(files, farm="Green Acres")
+    assert res.files_processed == 3
+    assert res.events_ingested == 5  # 3 csv + 1 json + 1 text
+    assert "Green Acres" in res.farms
+
+    report = aggregate()
+    assert report.total_events == 5
+    assert report.total_farms == 1
+    # North Herd, South Field, Feed Store, plus "General" for the text note
+    # whose content matched no known asset name.
+    assert report.total_assets == 4
+    assert {a.asset for a in report.by_asset} >= {
+        "North Herd", "South Field", "Feed Store", "General"
+    }
+    assert report.by_source  # has at least one source
+    # milk yield should be a compiled time series with 2 points
+    assert "milk_yield_l" in report.metric_series
+    assert len(report.metric_series["milk_yield_l"]) == 2
+    # series is sorted ascending by time
+    pts = report.metric_series["milk_yield_l"]
+    assert pts[0].occurred_at <= pts[1].occurred_at
+
+
+def test_aggregate_empty():
+    report = aggregate()
+    assert report.total_events == 0
+    assert report.total_assets == 0
diff --git a/agtech-ops/tests/test_api.py b/agtech-ops/tests/test_api.py
new file mode 100644
index 0000000000..059a44c4d7
--- /dev/null
+++ b/agtech-ops/tests/test_api.py
@@ -0,0 +1,81 @@
+import io
+
+from fastapi.testclient import TestClient
+
+from agtech_ops.api import app
+
+client = TestClient(app)
+
+
+def test_health():
+    r = client.get("/health")
+    assert r.status_code == 200
+    assert r.json()["status"] == "ok"
+    assert r.json()["summarizer"] == "rule_based"
+
+
+def test_end_to_end_csv_then_summarize():
+    csv = (
+        "farm,asset,asset_type,date,metric,value,notes\n"
+        "Green Acres,North Herd,herd,2026-06-26,milk_yield_l,1610,Cow #42 looks lame\n"
+        "Green Acres,Feed Store,other,2026-06-27,feed_tonnes,2,Running low on feed\n"
+    )
+    files = {"file": ("herd.csv", io.BytesIO(csv.encode()), "text/csv")}
+    r = client.post("/ingest/csv", files=files)
+    assert r.status_code == 200, r.text
+    body = r.json()
+    assert body["events_ingested"] == 2
+    assert "Green Acres" in body["farms"]
+
+    r = client.post("/summarize", params={"persist": True})
+    assert r.status_code == 200, r.text
+    result = r.json()
+    assert len(result["action_items"]) >= 1
+
+    r = client.get("/action-items")
+    assert r.status_code == 200
+    items = r.json()
+    assert len(items) >= 1
+    assert items[0]["priority"] == "high"  # lame cow sorts first
+
+
+def test_whatsapp_ingest_endpoint():
+    data = {
+        "text": "[2026-06-26, 7:32 AM] Alice: North Herd cow is lame, call vet\n",
+        "farm": "Green Acres",
+    }
+    r = client.post("/ingest/whatsapp", data=data)
+    assert r.status_code == 200, r.text
+    assert r.json()["events_ingested"] == 1
+
+
+def test_csv_with_no_valid_columns_returns_422():
+    files = {"file": ("bad.csv", io.BytesIO(b"a,b\n1,2\n"), "text/csv")}
+    r = client.post("/ingest/csv", files=files)
+    assert r.status_code == 422
+
+
+def test_ingest_files_multi_then_report():
+    csv = b"farm,asset,date,metric,value\nGreen Acres,North Herd,2026-06-26,milk_yield_l,1610\n"
+    notes = b"2026-06-27 South Field irrigation is leaking, needs repair."
+    files = [
+        ("files", ("herd.csv", io.BytesIO(csv), "text/csv")),
+        ("files", ("notes.txt", io.BytesIO(notes), "text/plain")),
+    ]
+    r = client.post("/ingest/files", files=files, data={"farm": "Green Acres"})
+    assert r.status_code == 200, r.text
+    body = r.json()
+    assert body["files_processed"] == 2
+    assert body["events_ingested"] == 2
+
+    r = client.get("/report")
+    assert r.status_code == 200, r.text
+    report = r.json()
+    assert report["total_events"] == 2
+    assert "milk_yield_l" in report["metric_series"]
+
+
+def test_health_lists_supported_files():
+    r = client.get("/health")
+    assert r.status_code == 200
+    assert ".pdf" in r.json()["supported_files"]
diff --git a/agtech-ops/tests/test_csv_ingest.py b/agtech-ops/tests/test_csv_ingest.py
new file mode 100644
index 0000000000..05b960ba73
--- /dev/null
+++ b/agtech-ops/tests/test_csv_ingest.py
@@ -0,0 +1,53 @@
+from agtech_ops.ingest import parse_partner_csv
+from agtech_ops.models import AssetType, Source
+
+
+def test_parse_basic_csv():
+    csv = (
+        "farm,asset,asset_type,date,metric,value,notes\n"
+        "Green Acres,North Herd,herd,2026-06-26,milk_yield_l,1610,Cow lame\n"
+    )
+    events, errors = parse_partner_csv(csv)
+    assert errors == []
+    assert len(events) == 1
+    e = events[0]
+    assert e.farm == "Green Acres"
+    assert e.asset == "North Herd"
+    assert e.asset_type is AssetType.herd
+    assert e.source is Source.csv_partner
+    assert e.metric == "milk_yield_l"
+    assert e.value == 1610.0
+    assert e.text == "Cow lame"
+
+
+def test_header_aliases_and_extra_columns_in_raw():
+    csv = (
+        "site,paddock,timestamp,reading,comment,partner_id\n"
+        "Green Acres,South Field,2026-06-27 10:00,15,Dry,PX-9\n"
+    )
+    events, errors = parse_partner_csv(csv)
+    assert errors == []
+    assert len(events) == 1
+    e = events[0]
+    assert e.farm == "Green Acres"
+    assert e.asset == "South Field"
+    assert e.value == 15.0
+    assert e.text == "Dry"
+    assert e.raw is not None and "PX-9" in e.raw
+
+
+def test_missing_required_column():
+    events, errors = parse_partner_csv("foo,bar\n1,2\n")
+    assert events == []
+    assert any("missing required column" in err for err in errors)
+
+
+def test_bad_row_is_reported_not_fatal():
+    csv = (
+        "farm,asset,date,value\n"
+        "Green Acres,North Herd,not-a-date,5\n"
+        "Green Acres,North Herd,2026-06-27,7\n"
+    )
+    events, errors = parse_partner_csv(csv)
+    assert len(events) == 1
+    assert any("unparseable date" in err for err in errors)
diff --git a/agtech-ops/tests/test_files_ingest.py b/agtech-ops/tests/test_files_ingest.py
new file mode 100644
index 0000000000..fec9202a22
--- /dev/null
+++ b/agtech-ops/tests/test_files_ingest.py
@@ -0,0 +1,123 @@
+import io
+import json
+
+from agtech_ops.ingest import (
+    SUPPORTED_EXTENSIONS,
+    ingest_file,
+    parse_excel,
+    parse_json_records,
+    parse_text_document,
+)
+from agtech_ops.models import Source
+
+
+def test_supported_extensions_cover_core_formats():
+    for ext in [".csv", ".tsv", ".xlsx", ".json", ".txt", ".pdf", ".docx"]:
+        assert ext in SUPPORTED_EXTENSIONS
+
+
+def test_json_records():
+    payload = json.dumps(
+        [
+            {"farm": "Green Acres", "asset": "North Herd", "date": "2026-06-26",
+             "metric": "milk_yield_l", "value": 1610, "notes": "ok"},
+        ]
+    )
+    events, errors = parse_json_records(payload)
+    assert errors == []
+    assert len(events) == 1
+    assert events[0].asset == "North Herd"
+    assert events[0].value == 1610.0
+
+
+def test_json_wrapped_in_records_key():
+    payload = json.dumps({"records": [
+        {"farm": "F", "asset": "A", "date": "2026-06-26", "value": 1}
+    ]})
+    events, errors = parse_json_records(payload)
+    assert errors == []
+    assert len(events) == 1
+
+
+def test_excel_roundtrip():
+    import pandas as pd
+
+    df = pd.DataFrame(
+        {
+            "farm": ["Green Acres"],
+            "asset": ["South Field"],
+            "date": ["2026-06-27"],
+            "metric": ["soil_moisture_pct"],
+            "value": [15],
+            "notes": ["dry"],
+        }
+    )
+    buf = io.BytesIO()
+    df.to_excel(buf, index=False)
+    events, errors = parse_excel(buf.getvalue())
+    assert errors == []
+    assert len(events) == 1
+    assert events[0].asset == "South Field"
+
+
+def test_text_document_chunks_and_infers_asset():
+    text = (
+        "2026-06-26 North Herd cow looks lame, needs vet.\n\n"
+        "Feed Store is running low, please reorder.\n"
+    )
+    events, errors = parse_text_document(
+        text, farm="Green Acres", known_assets=["North Herd", "Feed Store"]
+    )
+    assert errors == []
+    assert len(events) == 2
+    assert events[0].asset == "North Herd"
+    assert events[1].asset == "Feed Store"
+
+
+def test_registry_dispatch_csv():
+    csv = b"farm,asset,date,value\nGreen Acres,North Herd,2026-06-26,5\n"
+    events, errors = ingest_file("data.csv", csv)
+    assert errors == []
+    assert len(events) == 1
+    assert events[0].source is Source.csv_partner
+
+
+def test_registry_dispatch_txt_whatsapp_autodetected():
+    wa = (
+        b"[2026-06-26, 7:32 AM] Alice: North Herd cow is lame\n"
+        b"[2026-06-26, 7:40 AM] Bob: South Field looks dry\n"
+    )
+    events, errors = ingest_file(
+        "chat.txt", wa, farm="Green Acres", known_assets=["North Herd", "South Field"]
+    )
+    assert errors == []
+    assert len(events) == 2
+    assert events[0].source is Source.whatsapp
+
+
+def test_registry_text_requires_farm():
+    events, errors = ingest_file("notes.txt", b"some notes here")
+    assert events == []
+    assert any("farm name is required" in e for e in errors)
+
+
+def test_registry_unsupported_extension():
+    events, errors = ingest_file("photo.heic", b"\x00\x01")
+    assert events == []
+    assert any("unsupported file type" in e for e in errors)
+
+
+def test_docx_extraction():
+    import docx
+
+    d = docx.Document()
+    d.add_paragraph("North Herd vaccination completed today.")
+    d.add_paragraph("Feed Store needs a reorder soon.")
+    buf = io.BytesIO()
+    d.save(buf)
+    events, errors = ingest_file(
+        "report.docx", buf.getvalue(), farm="Green Acres",
+        known_assets=["North Herd", "Feed Store"],
+    )
+    assert errors == []
+    assert len(events) == 2
diff --git a/agtech-ops/tests/test_media_and_agent.py b/agtech-ops/tests/test_media_and_agent.py
new file mode 100644
index 0000000000..a6693c534f
--- /dev/null
+++ b/agtech-ops/tests/test_media_and_agent.py
@@ -0,0 +1,66 @@
+import io
+import json
+
+from agtech_ops.agent import action_log, agent_name, build_action_log
+from agtech_ops.ingest import ingest_file, parse_json_records
+from agtech_ops.models import Source
+from agtech_ops.service import aggregate, ingest_files
+
+
+def test_clip_json_autoroutes_to_media_with_tags():
+    payload = json.dumps(
+        [
+            {"farm": "Green Acres", "asset": "North Herd", "date": "2026-06-28",
+             "camera": "barn-cam-1", "duration_s": 18, "tags": "cow, lame, limping"},
+        ]
+    )
+    events, errors = parse_json_records(payload)
+    assert errors == []
+    assert len(events) == 1
+    e = events[0]
+    assert e.source is Source.media          # auto-detected from tags/clip columns
+    assert e.tags == ["cow", "lame", "limping"]
+    assert e.author == "barn-cam-1"          # camera -> author
+    assert e.metric == "clip_duration_s"     # duration -> numeric metric
+    assert e.value == 18.0
+
+
+def test_csv_with_tags_column_is_media():
+    csv = b"farm,asset,date,tags\nGreen Acres,South Field,2026-06-28,\"fence, open\"\n"
+    events, errors = ingest_file("clips.csv", csv)
+    assert errors == []
+    assert events[0].source is Source.media
+    assert events[0].tags == ["fence", "open"]
+
+
+def test_aggregate_counts_clips_and_top_tags():
+    clips = json.dumps([
+        {"farm": "F", "asset": "A", "date": "2026-06-28", "tags": "cow, lame"},
+        {"farm": "F", "asset": "A", "date": "2026-06-28", "tags": "cow, feeding"},
+    ]).encode()
+    ingest_files([("clips.json", clips)], farm="F")
+    report = aggregate()
+    assert report.media_clips == 2
+    tags = {t.tag: t.count for t in report.top_tags}
+    assert tags["cow"] == 2
+    assert tags["lame"] == 1
+
+
+def test_agent_builds_log_from_tags_offline():
+    # A clip tagged "lame" should drive a high-priority action via tags alone.
+    clips = json.dumps([
+        {"farm": "F", "asset": "North Herd", "date": "2026-06-28",
+         "camera": "cam1", "tags": "cow, lame"},
+    ]).encode()
+    ingest_files([("clips.json", clips)], farm="F")
+
+    assert agent_name() == "rule_based"  # offline default in tests
+    result = build_action_log()
+    assert len(result.action_items) >= 1
+
+    log = action_log()
+    assert log
+    item = log[0]
+    assert item["created_by"] == "rule_based"
+    assert item["rationale"]  # rationale recorded
+    assert "lame" in item["rationale"]
diff --git a/agtech-ops/tests/test_summarize.py b/agtech-ops/tests/test_summarize.py
new file mode 100644
index 0000000000..0c05ed697c
--- /dev/null
+++ b/agtech-ops/tests/test_summarize.py
@@ -0,0 +1,51 @@
+import datetime as dt
+
+from agtech_ops.models import Asset, AssetType, Event, Farm, Priority, Source
+from agtech_ops.summarize.rule_based import RuleBasedSummarizer
+
+
+def _make_event(text: str, asset_name: str = "North Herd") -> Event:
+    farm = Farm(id=1, name="Green Acres")
+    asset = Asset(id=1, farm_id=1, name=asset_name, type=AssetType.herd, farm=farm)
+    return Event(
+        id=1,
+        asset_id=1,
+        source=Source.whatsapp,
+        occurred_at=dt.datetime(2026, 6, 26, 7, 30),
+        author="Alice",
+        text=text,
+        asset=asset,
+    )
+
+
+def test_empty_summary():
+    result = RuleBasedSummarizer().summarize([])
+    assert result.action_items == []
+    assert "No events" in result.summary
+
+
+def test_lame_triggers_high_priority_vet_action():
+    e = _make_event("cow #42 is lame, needs vet")
+    result = RuleBasedSummarizer().summarize([e])
+    assert len(result.action_items) == 1
+    ai = result.action_items[0]
+    assert ai.priority is Priority.high
+    assert ai.owner == "Alice"
+    assert ai.asset == "North Herd"
+    assert ai.due is not None
+
+
+def test_supplies_low_triggers_medium_action():
+    e = _make_event("feed store running low, please reorder", asset_name="Feed Store")
+    result = RuleBasedSummarizer().summarize([e])
+    assert len(result.action_items) == 1
+    assert result.action_items[0].priority is Priority.medium
+
+
+def test_actions_sorted_high_first():
+    events = [
+        _make_event("feed running low", asset_name="Feed Store"),
+        _make_event("cow is lame", asset_name="North Herd"),
+    ]
+    result = RuleBasedSummarizer().summarize(events)
+    assert result.action_items[0].priority is Priority.high
diff --git a/agtech-ops/tests/test_whatsapp_ingest.py b/agtech-ops/tests/test_whatsapp_ingest.py
new file mode 100644
index 0000000000..519adf2aec
--- /dev/null
+++ b/agtech-ops/tests/test_whatsapp_ingest.py
@@ -0,0 +1,39 @@
+from agtech_ops.ingest import parse_whatsapp_export
+from agtech_ops.models import Source
+
+
+def test_parse_bracketed_and_dash_formats():
+    text = (
+        "[2026-06-26, 7:32 AM] Alice: North Herd cow #42 is limping\n"
+        "2026/06/26, 16:10 - Bob: South Field irrigation leaking\n"
+    )
+    events, errors = parse_whatsapp_export(
+        text, farm="Green Acres", known_assets=["North Herd", "South Field"]
+    )
+    assert errors == []
+    assert len(events) == 2
+    assert events[0].author == "Alice"
+    assert events[0].asset == "North Herd"
+    assert events[0].source is Source.whatsapp
+    assert events[1].author == "Bob"
+    assert events[1].asset == "South Field"
+
+
+def test_multiline_message_appended():
+    text = (
+        "[2026-06-26, 7:32 AM] Alice: First line\n"
+        "still the same message\n"
+        "[2026-06-26, 7:40 AM] Bob: Second message\n"
+    )
+    events, _ = parse_whatsapp_export(text, farm="Green Acres")
+    assert len(events) == 2
+    assert "still the same message" in events[0].text
+
+
+def test_unknown_asset_falls_back_to_default():
+    text = "[2026-06-26, 7:32 AM] Alice: something vague happened\n"
+    events, _ = parse_whatsapp_export(
+        text, farm="Green Acres", known_assets=["North Herd"], default_asset="General"
+    )
+    assert len(events) == 1
+    assert events[0].asset == "General"