Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
*.db
./data/*

# Ruff Lint
.ruff_cache/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
2 changes: 0 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@ async def get_textified_wd(
- **all_ranks** (bool): If `true`, include preferred, normal, and deprecated statement ranks.
- **qualifiers** (bool): If `true`, include qualifiers for claim values.
- **fallback_lang** (str): Fallback language used when `lang` is unavailable.
- **request** (Request): FastAPI request context object.
- **background_tasks** (BackgroundTasks): Background task manager used for cache cleanup.

**Returns:**

Expand Down
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ dependencies = [

[dependency-groups]
dev = [
"pytest>=8.4.2",
"ruff>=0.9.0"
]

[tool.ruff]
target-version = "py313"
line-length = 120

exclude = ["data/mysql"]

[tool.ruff.lint]
Expand All @@ -40,3 +40,8 @@ convention = "google"
known-first-party = [
"wikidatasearch"
]

[tool.pytest.ini_options]
testpaths = [
"tests"
]
68 changes: 68 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Tests

This folder contains automated tests split into two layers:

- **Unit (`tests/unit`)**: Fast isolated tests with stubs/mocks and direct function/route calls.
- **Integration (`tests/integration`)**: Live HTTP tests against a running local API server.

## What Is Covered

### Unit (`tests/unit`)

- Route wiring behavior for single-ID vs multi-ID normalization paths.
- JSON normalizer behavior (rank filtering, datatype conversion, external-id filtering).
- Textifier model behavior (serialization, triplet/text rendering, truthiness rules).
- Utility helpers (`src/utils.py`) with mocked HTTP calls.
- Label helper behavior (`src/WikidataLabel.py`) including language fallback and lazy resolution.

### Integration (`tests/integration`)

- Local API contracts for `GET /` and docs endpoint availability.
- Response shape checks for JSON and text output.
- Cache verification: ensure label rows are written and reused between repeated requests.

## Setup

From project root:

```bash
uv sync --locked
```

For integration tests, start Docker services first:

```bash
docker compose up --build
```

## Common Commands

Run unit tests only:

```bash
uv run pytest -q tests/unit
```

Run integration tests only:

```bash
uv run pytest -q tests/integration -m integration
```

Run all tests:

```bash
uv run pytest -q tests
```

Run lint:

```bash
uv run ruff check .
```

## Notes

- Integration tests assume the API is available at `http://127.0.0.1:5000`.
- The cache integration test reads DB credentials from environment variables or local `.env`.
- If DB credentials are not usable, the cache verification test is skipped with a clear message.
219 changes: 219 additions & 0 deletions tests/integration/test_live_routes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
"""Live integration tests against the local FastAPI service."""

import json
import os
from pathlib import Path
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import Request, urlopen

import pymysql
import pytest

pytestmark = pytest.mark.integration
LOCAL_BASE_URL = "http://127.0.0.1:5000"


def _api_get(path: str, params: dict | None = None, expected_status: int | None = 200) -> dict:
"""Submit a GET request to the local API and return parsed response data."""
query = f"?{urlencode(params or {}, doseq=True)}" if params else ""
req = Request(
f"{LOCAL_BASE_URL}{path}{query}",
method="GET",
headers={
"User-Agent": "Pytest Integration Suite/1.0 (integration-tests@example.org)",
"Accept": "application/json",
},
)

try:
with urlopen(req, timeout=120) as res:
status = res.status
body_bytes = res.read()
headers = dict(res.headers.items())
except HTTPError as e:
status = e.code
body_bytes = e.read()
headers = dict(e.headers.items()) if e.headers else {}
except URLError as e:
pytest.fail(f"Local API is unreachable at {LOCAL_BASE_URL}: {e}")

body_text = body_bytes.decode("utf-8", errors="replace")
try:
payload = json.loads(body_text)
except json.JSONDecodeError:
payload = body_text

if expected_status is not None:
assert status == expected_status, f"{path} expected {expected_status}, got {status}: {payload}"

return {"status": status, "payload": payload, "headers": headers}


def _load_env_file() -> dict[str, str]:
"""Load key-value pairs from local ``.env`` file if present."""
env_path = Path(__file__).resolve().parents[2] / ".env"
out: dict[str, str] = {}
if not env_path.exists():
return out

for raw_line in env_path.read_text(encoding="utf-8").splitlines():
line = raw_line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, value = line.split("=", 1)
normalized_value = value.strip()
if (
len(normalized_value) >= 2
and normalized_value[0] == normalized_value[-1]
and normalized_value[0] in {"'", '"'}
):
normalized_value = normalized_value[1:-1]
out[key.strip()] = normalized_value
return out


def _db_config() -> dict[str, str | int]:
"""Build DB connection config from environment with sensible defaults."""
env_file = _load_env_file()

user = os.environ.get("DB_USER") or env_file.get("DB_USER", "root")
password = os.environ.get("DB_PASS")
if password is None:
password = env_file.get("DB_PASS", "")

db_name = os.environ.get("DB_NAME")
if db_name is None:
db_name = env_file.get("DB_NAME_LABEL") or env_file.get("DB_NAME", "label")

return {
"host": os.environ.get("DB_HOST") or env_file.get("DB_HOST", "127.0.0.1"),
"port": int(os.environ.get("DB_PORT") or env_file.get("DB_PORT", "3306")),
"user": user,
"password": password,
"database": db_name,
}


def _db_connect():
"""Open a DB connection for cache verification queries."""
cfg = _db_config()
return pymysql.connect(
host=cfg["host"],
port=cfg["port"],
user=cfg["user"],
password=cfg["password"],
database=cfg["database"],
charset="utf8mb4",
autocommit=True,
)


def test_docs_route_is_reachable():
"""Validate docs route is reachable."""
result = _api_get("/docs", expected_status=200)
content_type = result["headers"].get("Content-Type") or result["headers"].get("content-type", "")
assert "text/html" in content_type


def test_entity_query_json_contract_for_multi_ids():
"""Validate JSON contract for multi-ID query."""
result = _api_get(
"/",
params={
"id": "Q42,Q2",
"format": "json",
"lang": "en",
"pid": "P31",
},
expected_status=200,
)
payload = result["payload"]

assert isinstance(payload, dict)
assert set(payload.keys()) == {"Q42", "Q2"}
assert isinstance(payload["Q42"], dict)
assert payload["Q42"]["QID"] == "Q42"
assert "claims" in payload["Q42"]


def test_entity_query_text_contract_for_single_id():
"""Validate text contract for single-ID query."""
result = _api_get(
"/",
params={
"id": "Q42",
"format": "text",
"lang": "en",
"pid": "P31",
},
expected_status=200,
)
payload = result["payload"]

assert isinstance(payload, dict)
assert "Q42" in payload
assert isinstance(payload["Q42"], str)
assert payload["Q42"]


def test_cache_writes_and_reuses_label_entries():
"""Validate label cache rows are written and then reused across repeated requests."""
tracked_ids = ["P31", "Q5"]

try:
with _db_connect() as conn:
with conn.cursor() as cur:
cur.execute(
"DELETE FROM labels WHERE id IN (%s, %s)",
(tracked_ids[0], tracked_ids[1]),
)
except pymysql.err.OperationalError as e:
pytest.skip(f"Cannot connect to MariaDB for cache verification: {e}")

first = _api_get(
"/",
params={
"id": "Q42,Q2",
"format": "json",
"lang": "en",
"pid": "P31",
},
expected_status=200,
)
assert isinstance(first["payload"], dict)

with _db_connect() as conn:
with conn.cursor() as cur:
cur.execute(
"SELECT id, date_added FROM labels WHERE id IN (%s, %s)",
(tracked_ids[0], tracked_ids[1]),
)
rows_first = cur.fetchall()

assert rows_first, "Expected label cache entries to be created after first request."
first_dates = {row[0]: row[1] for row in rows_first}
assert "P31" in first_dates

second = _api_get(
"/",
params={
"id": "Q42,Q2",
"format": "json",
"lang": "en",
"pid": "P31",
},
expected_status=200,
)
assert isinstance(second["payload"], dict)

with _db_connect() as conn:
with conn.cursor() as cur:
cur.execute(
"SELECT id, date_added FROM labels WHERE id IN (%s, %s)",
(tracked_ids[0], tracked_ids[1]),
)
rows_second = cur.fetchall()

second_dates = {row[0]: row[1] for row in rows_second}
assert second_dates["P31"] == first_dates["P31"]
48 changes: 48 additions & 0 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Setup for unit tests: shared fixtures and import bootstrap."""

import asyncio
import sys
from pathlib import Path
from urllib.parse import urlencode

import pytest
from starlette.requests import Request

ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))


@pytest.fixture
def run_async():
"""Run an async coroutine in unit tests."""

def _run(coro):
return asyncio.run(coro)

return _run


@pytest.fixture
def make_request():
"""Create a minimal Starlette request object for route calls."""

def _make(path: str, method: str = "GET", params: dict | None = None) -> Request:
"""Construct a request scope with query params and test headers."""
query_string = urlencode(params or {}, doseq=True).encode()
scope = {
"type": "http",
"http_version": "1.1",
"method": method,
"path": path,
"query_string": query_string,
"headers": [
(b"user-agent", b"Unit Test Client/1.0 (unit-tests@example.org)"),
],
"client": ("127.0.0.1", 12345),
"scheme": "http",
"server": ("testserver", 80),
}
return Request(scope)

return _make
Loading
Loading