Skip to content

Commit eb1ec61

Browse files
committed
Introduce logging for API Usage script consumption. Add AI-generated tests and README.md content. These still need to be vetted.
1 parent 3246f85 commit eb1ec61

10 files changed

Lines changed: 1734 additions & 0 deletions

src/app.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from schema.schema_constants import TriggerTypeEnum
3939
from metadata_constraints import get_constraints, constraints_json_is_valid
4040
# from lib.ontology import initialize_ubkg, init_ontology, Ontology, UbkgSDK
41+
from setup_lifecycle_hooks import setup_flask_lifecycle_hooks
4142

4243
# HuBMAP commons
4344
from hubmap_commons import string_helper
@@ -64,6 +65,9 @@
6465
# will be inherited by the sub-module loggers
6566
logger = logging.getLogger()
6667

68+
# Add in Flask lifecycle hooks which rely on the logger being instantiated
69+
setup_flask_lifecycle_hooks(app)
70+
6771
# Remove trailing slash / from URL base to avoid "//" caused by config with trailing slash
6872
app.config['UUID_API_URL'] = app.config['UUID_API_URL'].strip('/')
6973
app.config['INGEST_API_URL'] = app.config['INGEST_API_URL'].strip('/')

src/setup_lifecycle_hooks.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
"""
2+
Flask lifecycle hooks for API request/response logging. Uses the existing global logger configured in app.py.
3+
4+
Provides before_request and after_request hooks that log API usage in using
5+
Common Log Format, as previously used for API Gateway custom access log format on AWS.
6+
https://en.wikipedia.org/wiki/Common_Log_Format#Combined_Log_Format
7+
8+
Log format:
9+
$sourceIp $caller $user [$requestTime] "$method $resourcePath $protocol" $status $responseLength $requestId
10+
replacement for AWS API Gateway custom access log format:
11+
$context.identity.sourceIp $context.identity.caller $context.identity.user [$context.requestTime]
12+
"$context.httpMethod $context.resourcePath $context.protocol"
13+
$context.status $context.responseLength $context.requestId
14+
15+
Example log output:
16+
[2026-03-07 10:30:45] DEBUG in setup_lifecycle_hooks: Request started: GET /entities/abc123 from 172.19.0.1 [ID: req-1709809845-1234]
17+
[2026-03-07 10:30:45] INFO in setup_lifecycle_hooks: 172.19.0.1 - user@example.com [07/Mar/2026:10:30:45 +0000] "GET /entities/abc123 HTTP/1.1" 200 1234 req-1709809845-1234
18+
"""
19+
20+
import logging
21+
import time
22+
from flask import request, g
23+
from datetime import datetime
24+
25+
# Use the same logger configuration as app.py
26+
logger = logging.getLogger(__name__)
27+
28+
def setup_flask_lifecycle_hooks(app):
29+
"""
30+
Register Flask lifecycle hooks for request/response logging.
31+
32+
Sets up before_request and after_request handlers that log all API calls
33+
using the existing logger configured in app.py.
34+
35+
Args:
36+
app: Flask application instance
37+
38+
Usage:
39+
from setup_lifecycle_hooks import setup_flask_lifecycle_hooks
40+
41+
app = Flask(__name__)
42+
# ... existing logger configuration ...
43+
setup_flask_lifecycle_hooks(app)
44+
"""
45+
46+
@app.before_request
47+
def log_endpoint_request():
48+
"""
49+
Log basic request information at DEBUG level when request starts.
50+
51+
Runs BEFORE any route function executes.
52+
Captures request start time and generates unique request ID.
53+
"""
54+
# Store request start time for potential duration calculation
55+
g.request_start_time = time.time()
56+
57+
# Generate unique request ID for tracking this request
58+
g.request_id = f"req-{int(time.time() * 1000)}-{hash(request.remote_addr) % 10000}"
59+
60+
# Log request start at DEBUG level
61+
logger.debug(
62+
f"Request started: {request.method} {request.path} "
63+
f"from {request.remote_addr} [ID: {g.request_id}]"
64+
)
65+
66+
@app.after_request
67+
def log_endpoint_response(response):
68+
"""
69+
Log complete API usage in AWS API Gateway format at INFO level.
70+
71+
Runs AFTER route function executes (or after error handler if route failed).
72+
Has access to both request and response data.
73+
74+
Format matches AWS API Gateway custom access logs:
75+
$sourceIp $caller $user [$requestTime] "$method $resourcePath $protocol" $status $responseLength $requestId
76+
77+
Args:
78+
response: Flask response object
79+
80+
Returns:
81+
response: Must return the response unchanged
82+
"""
83+
# Extract request details
84+
source_ip = request.remote_addr or '-'
85+
86+
# Caller - not available without AWS IAM, use '-'
87+
caller = '-'
88+
89+
# User from X-Hubmap-User header (set by hubmap-auth after authorization)
90+
# Falls back to '-' if not authenticated
91+
user = request.headers.get('X-Hubmap-User', '-')
92+
93+
# Request time in AWS/Apache format: [DD/MMM/YYYY:HH:MM:SS +0000]
94+
request_time = datetime.utcnow().strftime('%d/%b/%Y:%H:%M:%S +0000')
95+
96+
# HTTP method, path, and protocol
97+
method = request.method
98+
resource_path = request.path
99+
protocol = request.environ.get('SERVER_PROTOCOL', 'HTTP/1.1')
100+
101+
# Response status code
102+
status = response.status_code
103+
104+
# Response length (content length in bytes)
105+
response_length = '-'
106+
if response.content_length:
107+
response_length = response.content_length
108+
elif hasattr(response, 'data'):
109+
response_length = len(response.data)
110+
111+
# Request ID (generated in before_request, or '-' if not available)
112+
request_id = getattr(g, 'request_id', '-')
113+
114+
# Format log message matching AWS API Gateway custom access log format:
115+
# $sourceIp $caller $user [$requestTime] "$method $resourcePath $protocol" $status $responseLength $requestId
116+
log_message = (
117+
f'{source_ip} {caller} {user} '
118+
f'[{request_time}] '
119+
f'"{method} {resource_path} {protocol}" '
120+
f'{status} {response_length} {request_id}'
121+
)
122+
123+
# Log at INFO level using existing logger
124+
logger.info(log_message)
125+
126+
# Must return response unchanged for Flask
127+
return response

test/README.md

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
# Entity-API Test Suite
2+
3+
This directory contains all tests for the entity-api service, organized by test type and deployment environment.
4+
5+
## Directory Structure
6+
7+
```
8+
test/
9+
├── README.md # This file - test suite overview
10+
├── localhost/ # Tests for localhost Docker deployment
11+
│ ├── integration/ # Integration tests with hubmap-auth
12+
│ └── performance/ # Performance benchmarks (future)
13+
└── [existing test files] # Other test types
14+
```
15+
16+
## Test Categories
17+
18+
### Localhost Tests (`localhost/`)
19+
20+
Tests for entity-api running in Docker Desktop for local development and proof-of-concept deployments.
21+
22+
**When to run:** Before pushing changes that affect localhost deployment, Docker configuration, or hubmap-auth integration.
23+
24+
**See:** [localhost/README.md](localhost/README.md)
25+
26+
### Integration Tests (`localhost/integration/`)
27+
28+
End-to-end tests verifying entity-api integrates correctly with hubmap-auth for authorization over the `gateway_hubmap` Docker network.
29+
30+
**See:** [localhost/integration/README.md](localhost/integration/README.md)
31+
32+
### Performance Tests (`localhost/performance/`) - Future
33+
34+
Load testing and performance benchmarks for localhost deployment.
35+
36+
## Quick Start
37+
38+
### Run All Tests
39+
40+
```bash
41+
# Activate virtual environment
42+
source .venv/bin/activate
43+
44+
# Run all tests
45+
python -m unittest discover -s test -v
46+
```
47+
48+
### Run Localhost Integration Tests Only
49+
50+
```bash
51+
source .venv/bin/activate
52+
python -m unittest discover -s test/localhost/integration -v
53+
```
54+
55+
### Prerequisites
56+
57+
1. **Docker containers running:**
58+
```bash
59+
# Start hubmap-auth first
60+
cd gateway
61+
./docker-localhost.sh start
62+
63+
# Then start entity-api
64+
cd entity-api/docker
65+
./docker-localhost.sh start
66+
67+
# Verify both are healthy
68+
docker ps | grep -E "hubmap-auth|entity-api"
69+
```
70+
71+
2. **Python virtual environment:**
72+
73+
Tests use the same dependencies as the main application:
74+
75+
```bash
76+
# Create virtual environment (first time only)
77+
python3 -m venv .venv
78+
79+
# Activate virtual environment
80+
source .venv/bin/activate
81+
82+
# Install application dependencies (includes requests)
83+
pip install -r src/requirements.txt
84+
```
85+
86+
## CI/CD Integration
87+
88+
These tests are designed to run in GitHub Actions or similar CI/CD systems. Example workflow:
89+
90+
```yaml
91+
name: Entity-API Localhost Integration Tests
92+
93+
on: [push, pull_request]
94+
95+
jobs:
96+
test:
97+
runs-on: ubuntu-latest
98+
steps:
99+
- uses: actions/checkout@v3
100+
101+
- name: Checkout gateway repo
102+
uses: actions/checkout@v3
103+
with:
104+
repository: hubmapconsortium/gateway
105+
path: gateway
106+
107+
- name: Set up Python
108+
uses: actions/setup-python@v4
109+
with:
110+
python-version: '3.13'
111+
112+
- name: Create Docker network
113+
run: docker network create gateway_hubmap
114+
115+
- name: Start hubmap-auth
116+
run: |
117+
cd gateway
118+
./docker-localhost.sh build
119+
./docker-localhost.sh start
120+
121+
- name: Wait for hubmap-auth healthy
122+
run: timeout 60 bash -c 'until docker ps | grep hubmap-auth | grep healthy; do sleep 2; done'
123+
124+
- name: Start entity-api
125+
run: |
126+
cd docker
127+
./docker-localhost.sh build
128+
./docker-localhost.sh start
129+
130+
- name: Wait for entity-api healthy
131+
run: timeout 60 bash -c 'until docker ps | grep entity-api | grep healthy; do sleep 2; done'
132+
133+
- name: Install test dependencies
134+
run: |
135+
python -m venv .venv
136+
source .venv/bin/activate
137+
pip install -r src/requirements.txt
138+
139+
- name: Run integration tests
140+
run: |
141+
source .venv/bin/activate
142+
python -m unittest discover -s test/localhost/integration -v
143+
```
144+
145+
## Contributing
146+
147+
When adding new tests:
148+
149+
1. **Choose the right directory** - Place tests in the appropriate subdirectory based on type
150+
2. **Follow existing patterns** - Match the style and structure of existing tests
151+
3. **Add documentation** - Update relevant README files
152+
4. **Keep tests independent** - Each test should run in isolation
153+
5. **Use descriptive names** - Test names should clearly indicate what they verify
154+
6. **Handle errors gracefully** - Provide actionable error messages
155+
156+
## Test Execution Order
157+
158+
Tests are discovered and run alphabetically by default. If execution order matters:
159+
160+
1. Use `setUpClass` and `tearDownClass` for class-level setup
161+
2. Use `setUp` and `tearDown` for test-level setup
162+
3. Name test files to control discovery order if needed
163+
164+
## Getting Help
165+
166+
- **Test failures:** Check container logs with `docker logs entity-api`
167+
- **Connection errors:** Verify containers are running with `docker ps`
168+
- **Import errors:** Ensure virtual environment is activated
169+
- **Docker issues:** Check Docker Desktop is running
170+
- **Auth failures:** Verify hubmap-auth is running and healthy
171+
172+
## Related Documentation
173+
174+
- [Entity-API Deployment Guide](../README.md)
175+
- [Gateway API Endpoints Configuration](../../gateway/api_endpoints.localhost.json)
176+
- [Docker Compose Configuration](../docker/docker-compose.localhost.yml)
177+
- [Gateway Test Suite](../../gateway/test/README.md)

0 commit comments

Comments
 (0)