Skip to content

Commit 38e562d

Browse files
authored
Merge pull request #30 from Climate-REF/fix/readonly-ref-mount
fix: allow API to run against a read-only /ref mount
2 parents 87b4235 + db16f2b commit 38e562d

12 files changed

Lines changed: 579 additions & 443 deletions

File tree

Dockerfile

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
# Build the API container for the REF compute engine
22
# The frontend and backend are built separately and then combined into a single image
33

4-
FROM ghcr.io/astral-sh/uv:python3.13-bookworm AS backend
4+
FROM ghcr.io/astral-sh/uv:python3.13-bookworm AS base
5+
6+
FROM base AS backend
57

68
# Enable bytecode compilation
79
ENV UV_COMPILE_BYTECODE=1
@@ -21,7 +23,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
2123

2224
ADD backend /app
2325
RUN --mount=type=cache,target=/root/.cache/uv \
24-
uv sync --frozen --no-dev
26+
uv sync --frozen --no-editable --no-dev
2527

2628

2729
# Build the frontend
@@ -40,17 +42,18 @@ RUN npm run build
4042

4143
# Runtime container
4244
# Copy the installed packages from the build stage to decrease the size of the final image
43-
FROM python:3.13-slim-bookworm AS runtime
45+
FROM base AS runtime
4446

4547
LABEL maintainer="Jared Lewis <jared.lewis@climate-resource.com>"
4648
LABEL description="Docker image for the REF API"
4749

4850
ENV PATH="/app/.venv/bin:${PATH}"
4951
ENV STATIC_DIR=/app/static
50-
ENV REF_CONFIGURATION=/app/.ref
52+
ENV REF_CONFIGURATION=/ref
5153
ENV FRONTEND_HOST=http://0.0.0.0:8000
54+
ENV XDG_CACHE_HOME=$REF_CONFIGURATION/cache
5255

53-
RUN groupadd --system app && useradd --system --gid app app
56+
RUN useradd -m -u 1000 app
5457

5558
WORKDIR /app
5659

@@ -61,7 +64,8 @@ COPY --from=frontend --chown=app:app /frontend/dist /app/static
6164

6265
RUN chown -R app:app /app
6366

64-
USER app
67+
# Switch to non-root user -- use numeric ID for k8s systems that enforce runAsUser
68+
USER 1000
6569

6670
# Run the REF CLI tool by default
6771
ENTRYPOINT ["fastapi", "run", "--workers", "4", "/app/src/ref_backend/main.py"]

backend/pyproject.toml

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,21 @@ description = "Backend for the Climate Rapid Evaluation Framework"
55
requires-python = ">=3.11"
66
dependencies = [
77
"fastapi[standard]<1.0.0,>=0.114.2",
8-
"python-multipart<1.0.0,>=0.0.7",
9-
"tenacity<9.0.0,>=8.2.3",
108
"pydantic>2.0",
119
"psycopg[binary]<4.0.0,>=3.1.13",
12-
"pydantic-settings<3.0.0,>=2.2.1",
10+
"pydantic-settings<3.0.0,>=2.13.1",
1311
"sentry-sdk[fastapi]>=2.0.0",
14-
"climate-ref[aft-providers,postgres]>=0.12.2",
12+
"climate-ref[aft-providers,postgres]>=0.13.1,<0.14",
1513
"loguru",
1614
"pyyaml>=6.0",
1715
"fastapi-sqlalchemy-monitor>=1.1.3",
1816
]
1917

2018
[dependency-groups]
2119
dev = [
22-
"pytest>=8.0",
20+
"pytest>=9.0",
2321
"mypy>=1.8.0",
24-
"ruff>=0.12.0",
22+
"ruff>=0.15.0",
2523
"pre-commit>=4.0",
2624
"coverage>=7.4.3",
2725
# "climate-ref-example",
@@ -31,9 +29,6 @@ dev = [
3129
"towncrier>=24.8.0",
3230
]
3331

34-
[tool.uv]
35-
constraint-dependencies = ["environs<15.0.0"]
36-
3732
[tool.uv.sources]
3833
# Temporary pin for testing
3934
# climate-ref = { git = "https://github.com/Climate-REF/climate-ref", subdirectory = "packages/climate-ref", tag="v0.7.0" }

backend/src/ref_backend/api/deps.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ def _ref_config_dependency(settings: SettingsDep) -> Config:
2424
REFConfigDep = Annotated[Config, Depends(_ref_config_dependency)]
2525

2626

27-
def _get_database_dependency(ref_config: REFConfigDep) -> Database:
28-
return get_database(ref_config)
27+
def _get_database_dependency(settings: SettingsDep, ref_config: REFConfigDep) -> Database:
28+
return get_database(ref_config, read_only=settings.REF_READ_ONLY_DATABASE)
2929

3030

3131
DatabaseDep = Annotated[Database, Depends(_get_database_dependency)]
@@ -57,11 +57,11 @@ class AppContext:
5757
provider_registry: ProviderRegistry
5858

5959

60-
def _provider_registry_dependency(ref_config: REFConfigDep) -> ProviderRegistry:
60+
def _provider_registry_dependency(settings: SettingsDep, ref_config: REFConfigDep) -> ProviderRegistry:
6161
"""
6262
Get the provider registry
6363
"""
64-
return get_provider_registry(ref_config)
64+
return get_provider_registry(ref_config, read_only=settings.REF_READ_ONLY_DATABASE)
6565

6666

6767
ProviderRegistryDep = Annotated[ProviderRegistry, Depends(_provider_registry_dependency)]

backend/src/ref_backend/core/config.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,18 @@ def all_cors_origins(self) -> list[str]:
6868
PROJECT_NAME: str = "Climate Rapid Evaluation Framework"
6969
SENTRY_DSN: HttpUrl | None = None
7070
REF_CONFIGURATION: str = "data"
71+
REF_READ_ONLY_DATABASE: bool = False
72+
"""
73+
Open the SQLite database in read-only mode.
74+
75+
When true, the API opens the configured SQLite database via a URI-form
76+
connection string with ``mode=ro&immutable=1`` so that SQLite does not
77+
attempt to create a journal/WAL sidecar. This lets the REF state volume
78+
(e.g. ``/ref``) be mounted read-only in deployments where the API is a
79+
pure consumer of worker-produced state.
80+
81+
Ignored for non-SQLite databases.
82+
"""
7183
STATIC_DIR: str | None = None
7284
USE_TEST_DATA: bool = False
7385
"""

backend/src/ref_backend/core/outliers.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,11 @@ def detect_outliers_in_scalar_values(
140140
lower_bound, upper_bound = iqr_bounds
141141
# Apply bounds to individual values (Reference values always non-outlier)
142142
source_id_flags = group_values.apply(
143-
lambda row: (row["value"] < lower_bound or row["value"] > upper_bound)
144-
if row["source_id"] != "Reference"
145-
else False,
143+
lambda row: (
144+
(row["value"] < lower_bound or row["value"] > upper_bound)
145+
if row["source_id"] != "Reference"
146+
else False
147+
),
146148
axis=1,
147149
)
148150
else:

backend/src/ref_backend/core/ref.py

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from loguru import logger
44

55
from climate_ref.config import Config
6-
from climate_ref.database import Database, _get_database_revision
6+
from climate_ref.database import Database, MigrationState
77
from climate_ref.provider_registry import ProviderRegistry
88
from ref_backend.core.config import Settings
99

@@ -20,26 +20,47 @@ def get_ref_config(settings: Settings) -> Config:
2020
return Config.load(config_fname, allow_missing=True)
2121

2222

23-
def get_database(ref_config: Config) -> Database:
23+
def get_database(ref_config: Config, read_only: bool = False) -> Database:
2424
"""
25-
Get a database connection using the default config
25+
Get a database connection using the default config.
26+
27+
When ``read_only`` is true,
28+
the SQLite database is opened via``Database.from_config(..., read_only=True)``,
29+
which rewrites the URL to read-only URI form so no journal/WAL sidecar is created.
2630
"""
27-
database = Database.from_config(ref_config, run_migrations=False)
28-
with database._engine.connect() as connection:
29-
if _get_database_revision(connection) is None:
30-
msg = (
31-
"The database migration has not been run. "
32-
"Check the database URL in your config file and run the migration."
33-
)
34-
logger.warning(msg)
35-
if ref_config.db.run_migrations:
36-
raise ValueError(msg)
31+
database = Database.from_config(ref_config, run_migrations=False, read_only=read_only)
32+
33+
status = database.migration_status(ref_config)
34+
state = status["state"]
35+
if state is MigrationState.UP_TO_DATE:
36+
return database
37+
38+
if state is MigrationState.UNMANAGED:
39+
msg = (
40+
"The database has no alembic revision stamp. "
41+
"Check the database URL in your config file and run the migration."
42+
)
43+
logger.warning(msg)
44+
if ref_config.db.run_migrations:
45+
raise ValueError(msg)
46+
elif state is MigrationState.REMOVED:
47+
raise ValueError(
48+
f"Database revision {status['current']!r} has been removed. "
49+
"Please delete your database and start again."
50+
)
51+
else:
52+
logger.warning(
53+
f"Database revision {status['current']!r} does not match this image's "
54+
f"head revision {status['head']!r}. "
55+
"The API will continue to read this database."
56+
)
57+
3758
return database
3859

3960

40-
def get_provider_registry(ref_config: Config) -> ProviderRegistry:
61+
def get_provider_registry(ref_config: Config, read_only: bool = False) -> ProviderRegistry:
4162
"""
4263
Get the provider registry
4364
"""
44-
database = get_database(ref_config)
65+
database = get_database(ref_config, read_only=read_only)
4566
return ProviderRegistry.build_from_config(ref_config, database)

backend/src/ref_backend/main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828

2929
# Initialize singletons at application startup
3030
ref_config = get_ref_config(settings)
31-
database = get_database(ref_config)
32-
provider_registry = get_provider_registry(ref_config)
31+
database = get_database(ref_config, read_only=settings.REF_READ_ONLY_DATABASE)
32+
provider_registry = get_provider_registry(ref_config, read_only=settings.REF_READ_ONLY_DATABASE)
3333

3434
setup_logging(settings.LOG_LEVEL)
3535
app = build_app(settings, ref_config, database)

backend/tests/test_core/test_ref.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
1+
import copy
2+
import shutil
13
from pathlib import Path
24

5+
import pytest
6+
import sqlalchemy
7+
38
from climate_ref.config import Config
49
from ref_backend.core.config import Settings
5-
from ref_backend.core.ref import get_ref_config
10+
from ref_backend.core.ref import get_database, get_ref_config
11+
from ref_backend.testing import test_ref_config as _load_test_ref_config
612

713

814
def test_get_ref_config_missing_toml(tmp_path: Path):
@@ -24,3 +30,44 @@ def test_get_ref_config_with_toml(tmp_path: Path):
2430
config = get_ref_config(settings)
2531

2632
assert isinstance(config, Config)
33+
34+
35+
def _copy_test_db(tmp_path: Path) -> Path:
36+
"""Copy the checked-in test SQLite database to an isolated location."""
37+
src = Path(_load_test_ref_config().db.database_url.removeprefix("sqlite:///"))
38+
dst = tmp_path / "climate_ref.db"
39+
shutil.copy2(src, dst)
40+
return dst
41+
42+
43+
def test_get_database_read_only_rejects_writes(tmp_path: Path):
44+
"""read_only=True opens SQLite via mode=ro so writes raise OperationalError."""
45+
db_path = _copy_test_db(tmp_path)
46+
ref_config = copy.deepcopy(_load_test_ref_config())
47+
ref_config.db.database_url = f"sqlite:///{db_path}"
48+
49+
database = get_database(ref_config, read_only=True)
50+
51+
assert "mode=ro" in database.url
52+
with database._engine.connect() as connection:
53+
with pytest.raises(sqlalchemy.exc.OperationalError):
54+
connection.execute(sqlalchemy.text("CREATE TABLE probe (x INTEGER)"))
55+
connection.commit()
56+
57+
58+
def test_get_database_tolerates_unknown_revision(tmp_path: Path):
59+
"""
60+
A DB stamped with an alembic revision this image doesn't know must not
61+
raise — it means a newer climate-ref CLI ran the migration.
62+
"""
63+
db_path = _copy_test_db(tmp_path)
64+
ref_config = copy.deepcopy(_load_test_ref_config())
65+
ref_config.db.database_url = f"sqlite:///{db_path}"
66+
67+
engine = sqlalchemy.create_engine(ref_config.db.database_url)
68+
with engine.begin() as connection:
69+
connection.execute(sqlalchemy.text("UPDATE alembic_version SET version_num = 'from_future_cli'"))
70+
engine.dispose()
71+
72+
database = get_database(ref_config)
73+
assert database is not None

0 commit comments

Comments
 (0)