geobtaa
diff --git a/‎COMMIT_MESSAGE.txt‎
Lines changed: 35 additions & 0 deletions b/‎COMMIT_MESSAGE.txt‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 29 additions & 2 deletions b/‎Makefile‎
Lines changed: 29 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 122 additions & 2 deletions b/‎README.md‎
Lines changed: 122 additions & 2 deletions
diff --git a/‎app/api/v1/endpoint_modules/admin.py‎
Lines changed: 112 additions & 0 deletions b/‎app/api/v1/endpoint_modules/admin.py‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎app/api/v1/endpoint_modules/resources/viewer.py‎
Lines changed: 3 additions & 2 deletions b/‎app/api/v1/endpoint_modules/resources/viewer.py‎
Lines changed: 3 additions & 2 deletions
@@ -0,0 +1,35 @@
+Implement API service tiers and rate limiting with API key authentication
+
+This commit introduces a comprehensive service tier and rate limiting system
+for the BTAA Geospatial API, enabling tiered access control and request
+throttling based on API keys.
+
+Key features:
+- Service tier system with six predefined tiers (btaa_primary, btaa_secondary,
+  btaa_member_primary, btaa_member_affiliated, general_registered, anonymous)
+  each with configurable rate limits (or unlimited for internal tiers)
+- API key management with SHA-256 hashing, validation, and tier association
+- Redis-based rate limiting middleware that enforces per-tier, per-identifier
+  limits using sliding window algorithm
+- Admin endpoints for creating, listing, and revoking API keys, and listing
+  service tiers
+- Database migrations to create api_service_tiers, api_keys, and
+  api_usage_logs tables with default tier seeding
+- Comprehensive test coverage including unit tests, integration tests, and
+  middleware tests
+- Documentation updates including service tiers runbook and README updates
+  explaining authentication methods and rate limiting behavior
+
+API keys can be provided via:
+1. X-API-Key header (highest priority)
+2. api_key query parameter
+3. Cookie named 'api_key'
+
+Anonymous requests are automatically assigned to the 'anonymous' tier with
+10 requests/minute limit. All requests are logged to api_usage_logs for
+analytics.
+
+Rate limiting is configurable via RATE_LIMIT_ENABLED environment variable
+and uses a dedicated Redis database (RATE_LIMIT_REDIS_DB=2) to avoid
+conflicts with caching.
+
@@ -13,6 +13,13 @@ endif
 # Can be overridden with: COVERAGE_THRESHOLD=25 make test
 COVERAGE_THRESHOLD ?= 50
 
+# Number of parallel workers for pytest-xdist
+# Default: 4 (to avoid hitting PostgreSQL connection limits)
+# Can be overridden with: PARALLEL_WORKERS=8 make test
+# Use 'auto' to use all CPU cores (may hit connection limits with many cores)
+# Set to 0 or empty to disable parallelism
+PARALLEL_WORKERS ?= 4
+
 # Run both linting and formatting checks (without modifying files)
 lint:
 	@echo "Checking code with ruff..."
@@ -42,12 +49,32 @@ test:
 		docker compose exec -T paradedb bash -lc 'PGPASSWORD=$$POSTGRES_PASSWORD psql -U postgres -c "CREATE DATABASE btaa_geospatial_api_test WITH TEMPLATE btaa_geospatial_api OWNER postgres;"'; \
 	fi
 	@echo "Running tests with coverage threshold of $(COVERAGE_THRESHOLD)%..."
-	pytest --cov=app --cov-report=term-missing --cov-report=html --cov-fail-under=$(COVERAGE_THRESHOLD)
+	@if [ -n "$(PARALLEL_WORKERS)" ] && [ "$(PARALLEL_WORKERS)" != "0" ]; then \
+		echo "Running tests in parallel with $(PARALLEL_WORKERS) workers..."; \
+		pytest -n $(PARALLEL_WORKERS) --cov=app --cov-report=term-missing --cov-report=html --cov-fail-under=$(COVERAGE_THRESHOLD); \
+	else \
+		echo "Running tests sequentially..."; \
+		pytest --cov=app --cov-report=term-missing --cov-report=html --cov-fail-under=$(COVERAGE_THRESHOLD); \
+	fi
 
 # Run just the tests without coverage threshold (for debugging)
 test-no-coverage:
 	@echo "Running tests without coverage threshold..."
-	pytest --full-trace
+	@if [ -n "$(PARALLEL_WORKERS)" ] && [ "$(PARALLEL_WORKERS)" != "0" ]; then \
+		echo "Running tests in parallel with $(PARALLEL_WORKERS) workers..."; \
+		pytest -n $(PARALLEL_WORKERS) --full-trace; \
+	else \
+		pytest --full-trace; \
+	fi
+
+# Run tests in parallel without coverage (fastest option for local development)
+test-fast:
+	@echo "Running tests in parallel without coverage (fast mode)..."
+	@if [ -n "$(PARALLEL_WORKERS)" ] && [ "$(PARALLEL_WORKERS)" != "0" ]; then \
+		pytest -n $(PARALLEL_WORKERS); \
+	else \
+		pytest -n 4; \
+	fi
 
 # Force a fresh clone of the test database
 test-fresh-db:
 
@@ -161,6 +161,13 @@ SEARCH_CACHE_TTL=3600     # 1 hour
 SUGGEST_CACHE_TTL=7200    # 2 hours 
 LIST_CACHE_TTL=43200      # 12 hours
 CACHE_TTL=43200           # Default TTL (12 hours)
+
+# Rate Limiting settings
+RATE_LIMIT_ENABLED=true   # Enable/disable rate limiting
+RATE_LIMIT_REDIS_DB=2     # Redis database number for rate limiting (uses same Redis instance)
+
+# API Usage Analytics Enrichment (User Agent Parsing)
+# Note: Geocoding has been removed due to licensing complexity
 ```
 
 When caching is enabled:
@@ -178,6 +185,119 @@ You can manually clear the cache using:
 GET /api/v1/cache/clear?cache_type=search|resource|suggest|all
 ```
 
+## API Usage Analytics
+
+The API automatically logs all requests to the `api_usage_logs` table for analytics purposes. This includes:
+
+- Request metadata (endpoint, method, status code, response time)
+- API key and tier information
+- IP address and user agent
+- Referrer and UTM parameters
+- Query parameters (stored in JSON properties field)
+
+### Service tiers, API keys, and rate limiting
+
+The public API supports **service tiers** and **API key–based rate limiting**.
+
+- **Service tiers** are defined in the `api_service_tiers` table and seeded by the migrations into tiers such as:
+  - `btaa_primary` / `btaa_secondary` – internal BTAA applications with unlimited access
+  - `btaa_member_primary` / `btaa_member_affiliated` – member applications with higher limits
+  - `general_registered` – registered external users
+  - `anonymous` – unauthenticated access with the lowest limits
+- **API keys** are stored (hashed) in the `api_keys` table and associated with a tier.
+- **Rate limits** are enforced per tier, per identifier (API key hash or IP address) using Redis.
+
+#### How clients authenticate
+
+Clients can authenticate with an API key in one of three ways (in order of precedence):
+
+- `X-API-Key` header:
+
+  ```http
+  X-API-Key: your-api-key-here
+  ```
+
+- `Authorization` header with Bearer token:
+
+  ```http
+  Authorization: Bearer your-api-key-here
+  ```
+
+- `api_key` query parameter:
+
+  ```text
+  GET /api/v1/search?q=roads&api_key=your-api-key-here
+  ```
+
+If no valid API key is provided, the request is treated as **anonymous** and uses the anonymous tier’s rate limit.
+
+#### Admin API for managing keys and tiers
+
+Admin users (protected by HTTP Basic auth with `ADMIN_USERNAME` / `ADMIN_PASSWORD`) can manage keys and inspect tiers:
+
+- `POST /api/v1/admin/api-keys` – create a new API key for a given `tier_name`.
+  - Request body: `{ "tier_name": "anonymous", "name": "optional friendly name" }`
+  - Response includes the **plaintext** `api_key` once, plus `key_id` and `tier_name`.
+- `GET /api/v1/admin/api-keys` – list existing keys and their tiers.
+- `PATCH /api/v1/admin/api-keys/{key_id}` – update `tier_name`, `is_active`, or `name`.
+- `DELETE /api/v1/admin/api-keys/{key_id}` – revoke (deactivate) a key.
+- `GET /api/v1/admin/api-tiers` – list all tiers, limits, and descriptions.
+
+The admin endpoints are intended for trusted operators only; do **not** expose them directly to the public internet without appropriate protections (e.g., network restrictions, stronger auth).
+
+#### Rate limiting behavior
+
+Rate limiting is enforced by middleware in front of all non-admin API routes:
+
+- Configuration is controlled via environment variables:
+
+  ```text
+  RATE_LIMIT_ENABLED=true     # Enable/disable rate limiting middleware
+  RATE_LIMIT_REDIS_DB=2       # Redis database used for rate limiting
+  REDIS_HOST=redis            # Redis host
+  REDIS_PORT=6379             # Redis port
+  REDIS_PASSWORD=optional_password
+  ```
+
+- For each request, the middleware:
+  - Resolves the caller’s **tier** from the API key (if provided) or falls back to the `anonymous` tier.
+  - Uses Redis to track the number of requests per minute per `(tier_name, identifier)`, where `identifier` is the API key hash or client IP (via `X-Forwarded-For` or socket address).
+  - Enforces the tier’s `requests_per_minute` limit.
+
+When rate limiting is enabled, responses include:
+
+- `X-RateLimit-Limit` – the allowed number of requests per minute for the current tier (or `unlimited`).
+- `X-RateLimit-Remaining` – remaining requests in the current window (or `unlimited`).
+- `X-RateLimit-Reset` – UNIX timestamp when the window resets.
+
+If a client exceeds its rate limit:
+
+- The API returns **HTTP 429 Too Many Requests** with a JSON body describing the error.
+- The response includes `Retry-After` and `X-RateLimit-*` headers indicating when to retry.
+
+### Enrichment with User Agent Parsing
+
+API usage logs are automatically enriched in the background with:
+
+- **User agent parsing**: Browser, operating system, and device type
+
+This enrichment happens asynchronously via Celery tasks to avoid blocking API requests.
+
+**Note**: IP geocoding (country, region, city, latitude, longitude) has been removed due to licensing complexity with geocoding databases.
+
+#### Backfilling Enrichment Data
+
+To enrich existing API usage logs that were created before enrichment was enabled, you can use the batch enrichment task:
+
+```python
+from app.tasks.api_usage_enrichment import enrich_api_usage_logs_batch
+
+# Enrich 100 logs at a time
+enrich_api_usage_logs_batch.delay(batch_size=100)
+```
+
+This can be run repeatedly until all logs are enriched.
+
 ## AI Summarization
 
 The API uses OpenAI's ChatGPT API to generate summaries and identify geographic named entities of historical maps and geographic datasets. To use this feature:
@@ -279,8 +399,8 @@ Data from Who's On First. [License](https://whosonfirst.org/docs/licenses/)
 - [X] Search - basic faceting
 - [X] Performance - Redis caching
 - [X] Search - facet include/exclude
-- [ ] Search - facet alpha and numerical pagination, and search within facets
-- [ ] Search - advanced/fielded search
+- [X] Search - facet alpha and numerical pagination, and search within facets
+- [X] Search - advanced/fielded search
 - [X] Search - spatial search
 - [X] Search Results - thumbnail images (needs improvements)
 - [X] Search Results - bookmarked resources
 
@@ -3,6 +3,7 @@
 
 from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query
 from fastapi.security import HTTPBasic
+from pydantic import BaseModel
 
 from app.api.v1.auth import verify_credentials
 from app.api.v1.utils import create_response, sanitize_for_json
@@ -16,6 +17,7 @@
     ResourceProcessingError,
     ResourceProcessingService,
 )
+from app.services.api_key_service import APIKeyService
 
 logger = logging.getLogger(__name__)
 
@@ -34,6 +36,21 @@ def get_admin_service() -> AdminService:
 # Module-level singleton for dependency injection
 _admin_service_dependency = Depends(get_admin_service)
 
+# API Key Service instance (handles its own async engine and session)
+api_key_service = APIKeyService()
+
+
+# Pydantic models for request/response
+class CreateAPIKeyRequest(BaseModel):
+    tier_name: str
+    name: Optional[str] = None
+
+
+class UpdateAPIKeyRequest(BaseModel):
+    tier_name: Optional[str] = None
+    is_active: Optional[bool] = None
+    name: Optional[str] = None
+
 
 @router.post("/cache/clear")
 async def clear_cache(
@@ -136,3 +153,98 @@ async def identify_geo_entities(
             f"for resource {id}: {str(e)}"
         )
         raise HTTPException(status_code=500, detail=str(e)) from e
+
+
+# API Key Management Endpoints
+
+
+@router.post("/api-keys")
+async def create_api_key(
+    request: CreateAPIKeyRequest,
+):
+    """Create a new API key."""
+    try:
+        result = await api_key_service.create_api_key(
+            tier_name=request.tier_name,
+            name=request.name,
+        )
+
+        if result is None:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Failed to create API key. Tier '{request.tier_name}' may not exist.",
+            )
+
+        return create_response(result)
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error creating API key: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e)) from e
+
+
+@router.get("/api-keys")
+async def list_api_keys():
+    """List all API keys."""
+    try:
+        keys = await api_key_service.list_api_keys()
+        return create_response({"keys": keys})
+    except Exception as e:
+        logger.error(f"Error listing API keys: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e)) from e
+
+
+@router.patch("/api-keys/{key_id}")
+async def update_api_key(
+    key_id: int,
+    request: UpdateAPIKeyRequest,
+):
+    """Update an API key."""
+    try:
+        updated = await api_key_service.update_api_key_by_id(
+            key_id=key_id,
+            tier_name=request.tier_name,
+            is_active=request.is_active,
+            name=request.name,
+        )
+
+        if not updated:
+            # Could be missing key, missing tier, or no fields to update
+            raise HTTPException(status_code=400, detail="Failed to update API key")
+
+        return create_response({"message": "API key updated successfully"})
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error updating API key: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e)) from e
+
+
+@router.delete("/api-keys/{key_id}")
+async def revoke_api_key(key_id: int):
+    """Revoke (deactivate) an API key."""
+    try:
+        # Use service method that handles its own async session (NullPool) to
+        # avoid cross-event-loop issues with the shared database connection.
+        success = await api_key_service.revoke_api_key_by_id(key_id)
+
+        if not success:
+            raise HTTPException(status_code=500, detail="Failed to revoke API key")
+
+        return create_response({"message": "API key revoked successfully"})
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error revoking API key: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e)) from e
+
+
+@router.get("/api-tiers")
+async def list_api_tiers():
+    """List all service tiers."""
+    try:
+        tiers = await api_key_service.list_tiers()
+        return create_response({"tiers": tiers})
+    except Exception as e:
+        logger.error(f"Error listing API tiers: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e)) from e
@@ -1,6 +1,6 @@
 from typing import Optional
 
-from fastapi import Query, Request
+from fastapi import HTTPException, Query, Request
 from fastapi.responses import JSONResponse
 from sqlalchemy.sql import select
 
@@ -27,7 +27,8 @@ async def get_resource_viewer_data(
             row = result.fetchone()
 
             if not row:
-                return JSONResponse(content={"error": "Resource not found"}, status_code=404)
+                # Align with tests: return 404 with {"detail": "Resource not found"}
+                raise HTTPException(status_code=404, detail="Resource not found")
 
             resource_dict = sanitize_for_json(dict(row._mapping))