Skip to content

Commit 288cb16

Browse files
authored
Merge pull request #4 from P2GX/enhancement/model-sync
Enhancement/model sync
2 parents 472c6e2 + 04ca669 commit 288cb16

27 files changed

Lines changed: 2675 additions & 872 deletions

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,7 @@ site/
8484
*.tmp
8585
tmp/
8686
temp/
87+
88+
# Node.js (for schema bundling)
89+
node_modules/
90+
package-lock.json

README.md

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,68 @@ pip install pre-commit
250250
pre-commit install
251251
```
252252

253+
### Schema Synchronization
254+
255+
This project includes tooling to detect drift between your Pydantic models and the official [Beacon v2 JSON schemas](https://github.com/ga4gh-beacon/beacon-v2).
256+
257+
#### Prerequisites
258+
259+
```bash
260+
npm install @apidevtools/json-schema-ref-parser
261+
```
262+
263+
#### Sync and Compare
264+
265+
Run the sync script to download schemas and compare against your models:
266+
267+
```bash
268+
# Download latest release and compare
269+
./scripts/sync_beacon_schemas.sh
270+
271+
# Use a specific version
272+
./scripts/sync_beacon_schemas.sh --version v2.1.0
273+
274+
# Clean cached schemas and re-download
275+
./scripts/sync_beacon_schemas.sh --clean
276+
```
277+
278+
Or run just the comparison (if schemas are already downloaded):
279+
280+
```bash
281+
uv run python scripts/compare_models.py
282+
```
283+
284+
#### What It Does
285+
286+
1. **Downloads** Beacon v2 release artifacts from GitHub
287+
2. **Bundles** JSON schemas (resolves all `$ref` references)
288+
3. **Compares** schema fields against `src/beacon_api/models/`
289+
4. **Reports** missing fields, extra fields, and coverage
290+
291+
#### Output
292+
293+
The comparison script reports:
294+
- **Missing fields** - Fields in the schema but not in your model
295+
- **Extra fields** - Custom fields you've added (not in schema)
296+
- **Field counts** - Coverage summary per model
297+
298+
Downloaded schemas are cached in `tmp/` (gitignored):
299+
300+
```
301+
tmp/
302+
├── beacon-v2-schemas/ # Downloaded release artifacts
303+
└── bundled_schemas/ # Resolved JSON schemas
304+
```
305+
306+
#### Workflow for Updating Models
307+
308+
1. Run `./scripts/sync_beacon_schemas.sh`
309+
2. Review the comparison report for missing fields
310+
3. Add missing fields to `src/beacon_api/models/` as needed
311+
4. Re-run to verify coverage
312+
313+
This approach keeps you in control while ensuring your models stay aligned with the upstream specification.
314+
253315
## API Endpoints
254316

255317
### Info

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ dev = [
4444
"httpx>=0.27.0",
4545
"ruff>=0.7.0",
4646
"mypy>=1.13.0",
47+
"datamodel-code-generator>=0.53.0",
4748
]
4849

4950
[build-system]

scripts/bundle_schemas.js

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
#!/usr/bin/env node
2+
/**
3+
* Bundle Beacon v2 JSON schemas for Pydantic model generation.
4+
*
5+
* Strategy:
6+
* - Entity models: Copy pre-dereferenced schemas from beacon-v2/bin/deref_schemas/
7+
* - Framework schemas: Bundle using json-schema-ref-parser (these resolve correctly)
8+
*
9+
* Usage:
10+
* npm install @apidevtools/json-schema-ref-parser
11+
* node scripts/bundle_schemas.js
12+
*/
13+
14+
const $RefParser = require('@apidevtools/json-schema-ref-parser');
15+
const fs = require('fs');
16+
const path = require('path');
17+
18+
// Configuration
19+
// BEACON_V2_ROOT can be overridden via environment variable
20+
const BEACON_V2_ROOT = process.env.BEACON_V2_ROOT
21+
? path.resolve(process.env.BEACON_V2_ROOT)
22+
: path.resolve(__dirname, '../tmp/beacon-v2-schemas');
23+
const OUTPUT_DIR = path.resolve(__dirname, '../tmp/bundled_schemas');
24+
25+
const FRAMEWORK_DIR = path.join(BEACON_V2_ROOT, 'framework', 'json');
26+
const DEREF_SCHEMAS_DIR = path.join(BEACON_V2_ROOT, 'bin', 'deref_schemas');
27+
28+
// Pre-dereferenced entity schemas (just copy these)
29+
const ENTITY_SCHEMAS = {
30+
'individual': path.join(DEREF_SCHEMAS_DIR, 'individuals', 'defaultSchema.json'),
31+
'biosample': path.join(DEREF_SCHEMAS_DIR, 'biosamples', 'defaultSchema.json'),
32+
'cohort': path.join(DEREF_SCHEMAS_DIR, 'cohorts', 'defaultSchema.json'),
33+
'dataset': path.join(DEREF_SCHEMAS_DIR, 'datasets', 'defaultSchema.json'),
34+
'run': path.join(DEREF_SCHEMAS_DIR, 'runs', 'defaultSchema.json'),
35+
'analysis': path.join(DEREF_SCHEMAS_DIR, 'analyses', 'defaultSchema.json'),
36+
'genomicVariation': path.join(DEREF_SCHEMAS_DIR, 'genomicVariations', 'defaultSchema.json'),
37+
};
38+
39+
// Framework schemas (need bundling)
40+
const FRAMEWORK_SCHEMAS = {
41+
// Common
42+
'common': path.join(FRAMEWORK_DIR, 'common', 'beaconCommonComponents.json'),
43+
'ontologyTerm': path.join(FRAMEWORK_DIR, 'common', 'ontologyTerm.json'),
44+
45+
// Requests
46+
'requestBody': path.join(FRAMEWORK_DIR, 'requests', 'beaconRequestBody.json'),
47+
'requestMeta': path.join(FRAMEWORK_DIR, 'requests', 'beaconRequestMeta.json'),
48+
'filteringTerms': path.join(FRAMEWORK_DIR, 'requests', 'filteringTerms.json'),
49+
50+
// Responses
51+
'booleanResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconBooleanResponse.json'),
52+
'countResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconCountResponse.json'),
53+
'resultsetsResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconResultsetsResponse.json'),
54+
'collectionsResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconCollectionsResponse.json'),
55+
'infoResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconInfoResponse.json'),
56+
'errorResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconErrorResponse.json'),
57+
'filteringTermsResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconFilteringTermsResponse.json'),
58+
'mapResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconMapResponse.json'),
59+
60+
// Response Sections
61+
'responseMeta': path.join(FRAMEWORK_DIR, 'responses', 'sections', 'beaconResponseMeta.json'),
62+
'resultsets': path.join(FRAMEWORK_DIR, 'responses', 'sections', 'beaconResultsets.json'),
63+
};
64+
65+
/**
66+
* Copy a pre-dereferenced schema file
67+
*/
68+
function copySchema(name, sourcePath) {
69+
console.log(`Copying ${name}...`);
70+
71+
if (!fs.existsSync(sourcePath)) {
72+
console.warn(` WARNING: Schema file not found: ${sourcePath}`);
73+
return false;
74+
}
75+
76+
const outputPath = path.join(OUTPUT_DIR, `${name}.json`);
77+
fs.copyFileSync(sourcePath, outputPath);
78+
79+
const stats = fs.statSync(outputPath);
80+
console.log(` -> ${outputPath} (${(stats.size / 1024).toFixed(1)} KB)`);
81+
return true;
82+
}
83+
84+
/**
85+
* Bundle a framework schema using json-schema-ref-parser
86+
*/
87+
async function bundleSchema(name, schemaPath) {
88+
console.log(`Bundling ${name}...`);
89+
90+
if (!fs.existsSync(schemaPath)) {
91+
console.warn(` WARNING: Schema file not found: ${schemaPath}`);
92+
return false;
93+
}
94+
95+
try {
96+
const schema = await $RefParser.dereference(schemaPath, {
97+
dereference: {
98+
circular: 'ignore'
99+
}
100+
});
101+
102+
const outputPath = path.join(OUTPUT_DIR, `${name}.json`);
103+
fs.writeFileSync(outputPath, JSON.stringify(schema, null, 2));
104+
105+
const stats = fs.statSync(outputPath);
106+
console.log(` -> ${outputPath} (${(stats.size / 1024).toFixed(1)} KB)`);
107+
return true;
108+
} catch (error) {
109+
console.error(` ERROR bundling ${name}: ${error.message}`);
110+
return false;
111+
}
112+
}
113+
114+
async function main() {
115+
console.log('Beacon v2 Schema Bundler');
116+
console.log('========================\n');
117+
console.log(`Beacon v2 root: ${BEACON_V2_ROOT}`);
118+
console.log(`Output directory: ${OUTPUT_DIR}\n`);
119+
120+
// Check beacon-v2 exists
121+
if (!fs.existsSync(BEACON_V2_ROOT)) {
122+
console.error(`ERROR: Beacon v2 directory not found: ${BEACON_V2_ROOT}`);
123+
process.exit(1);
124+
}
125+
126+
// Check deref_schemas exists
127+
if (!fs.existsSync(DEREF_SCHEMAS_DIR)) {
128+
console.error(`ERROR: Pre-dereferenced schemas not found: ${DEREF_SCHEMAS_DIR}`);
129+
console.error('Make sure your beacon-v2 clone includes the bin/deref_schemas directory.');
130+
process.exit(1);
131+
}
132+
133+
// Create output directory
134+
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
135+
136+
let successful = 0;
137+
let failed = 0;
138+
139+
// Copy pre-dereferenced entity schemas
140+
console.log('--- Entity Schemas (pre-dereferenced) ---\n');
141+
for (const [name, schemaPath] of Object.entries(ENTITY_SCHEMAS)) {
142+
if (copySchema(name, schemaPath)) {
143+
successful++;
144+
} else {
145+
failed++;
146+
}
147+
}
148+
149+
// Bundle framework schemas
150+
console.log('\n--- Framework Schemas (bundling) ---\n');
151+
for (const [name, schemaPath] of Object.entries(FRAMEWORK_SCHEMAS)) {
152+
if (await bundleSchema(name, schemaPath)) {
153+
successful++;
154+
} else {
155+
failed++;
156+
}
157+
}
158+
159+
// Summary
160+
console.log('\n========================');
161+
console.log('Summary:');
162+
console.log(` Successful: ${successful}`);
163+
console.log(` Failed: ${failed}`);
164+
console.log(`\nBundled schemas written to: ${OUTPUT_DIR}`);
165+
console.log('\nNext step: Run the Python model generator:');
166+
console.log(' uv run python scripts/generate_from_bundled.py');
167+
168+
process.exit(failed > 0 ? 1 : 0);
169+
}
170+
171+
main().catch(console.error);

0 commit comments

Comments
 (0)