Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,7 @@ site/
*.tmp
tmp/
temp/

# Node.js (for schema bundling)
node_modules/
package-lock.json
62 changes: 62 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,68 @@ pip install pre-commit
pre-commit install
```

### Schema Synchronization

This project includes tooling to detect drift between your Pydantic models and the official [Beacon v2 JSON schemas](https://github.com/ga4gh-beacon/beacon-v2).

#### Prerequisites

```bash
npm install @apidevtools/json-schema-ref-parser
```

#### Sync and Compare

Run the sync script to download schemas and compare against your models:

```bash
# Download latest release and compare
./scripts/sync_beacon_schemas.sh

# Use a specific version
./scripts/sync_beacon_schemas.sh --version v2.1.0

# Clean cached schemas and re-download
./scripts/sync_beacon_schemas.sh --clean
```

Or run just the comparison (if schemas are already downloaded):

```bash
uv run python scripts/compare_models.py
```

#### What It Does

1. **Downloads** Beacon v2 release artifacts from GitHub
2. **Bundles** JSON schemas (resolves all `$ref` references)
3. **Compares** schema fields against `src/beacon_api/models/`
4. **Reports** missing fields, extra fields, and coverage

#### Output

The comparison script reports:
- **Missing fields** - Fields in the schema but not in your model
- **Extra fields** - Custom fields you've added (not in schema)
- **Field counts** - Coverage summary per model

Downloaded schemas are cached in `tmp/` (gitignored):

```
tmp/
├── beacon-v2-schemas/ # Downloaded release artifacts
└── bundled_schemas/ # Resolved JSON schemas
```

#### Workflow for Updating Models

1. Run `./scripts/sync_beacon_schemas.sh`
2. Review the comparison report for missing fields
3. Add missing fields to `src/beacon_api/models/` as needed
4. Re-run to verify coverage

This approach keeps you in control while ensuring your models stay aligned with the upstream specification.

## API Endpoints

### Info
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ dev = [
"httpx>=0.27.0",
"ruff>=0.7.0",
"mypy>=1.13.0",
"datamodel-code-generator>=0.53.0",
]

[build-system]
Expand Down
171 changes: 171 additions & 0 deletions scripts/bundle_schemas.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
#!/usr/bin/env node
/**
* Bundle Beacon v2 JSON schemas for Pydantic model generation.
*
* Strategy:
* - Entity models: Copy pre-dereferenced schemas from beacon-v2/bin/deref_schemas/
* - Framework schemas: Bundle using json-schema-ref-parser (these resolve correctly)
*
* Usage:
* npm install @apidevtools/json-schema-ref-parser
* node scripts/bundle_schemas.js
*/

const $RefParser = require('@apidevtools/json-schema-ref-parser');
const fs = require('fs');
const path = require('path');

// Configuration
// BEACON_V2_ROOT can be overridden via environment variable
const BEACON_V2_ROOT = process.env.BEACON_V2_ROOT
? path.resolve(process.env.BEACON_V2_ROOT)
: path.resolve(__dirname, '../tmp/beacon-v2-schemas');
const OUTPUT_DIR = path.resolve(__dirname, '../tmp/bundled_schemas');

const FRAMEWORK_DIR = path.join(BEACON_V2_ROOT, 'framework', 'json');
const DEREF_SCHEMAS_DIR = path.join(BEACON_V2_ROOT, 'bin', 'deref_schemas');

// Pre-dereferenced entity schemas (just copy these)
const ENTITY_SCHEMAS = {
'individual': path.join(DEREF_SCHEMAS_DIR, 'individuals', 'defaultSchema.json'),
'biosample': path.join(DEREF_SCHEMAS_DIR, 'biosamples', 'defaultSchema.json'),
'cohort': path.join(DEREF_SCHEMAS_DIR, 'cohorts', 'defaultSchema.json'),
'dataset': path.join(DEREF_SCHEMAS_DIR, 'datasets', 'defaultSchema.json'),
'run': path.join(DEREF_SCHEMAS_DIR, 'runs', 'defaultSchema.json'),
'analysis': path.join(DEREF_SCHEMAS_DIR, 'analyses', 'defaultSchema.json'),
'genomicVariation': path.join(DEREF_SCHEMAS_DIR, 'genomicVariations', 'defaultSchema.json'),
};

// Framework schemas (need bundling)
const FRAMEWORK_SCHEMAS = {
// Common
'common': path.join(FRAMEWORK_DIR, 'common', 'beaconCommonComponents.json'),
'ontologyTerm': path.join(FRAMEWORK_DIR, 'common', 'ontologyTerm.json'),

// Requests
'requestBody': path.join(FRAMEWORK_DIR, 'requests', 'beaconRequestBody.json'),
'requestMeta': path.join(FRAMEWORK_DIR, 'requests', 'beaconRequestMeta.json'),
'filteringTerms': path.join(FRAMEWORK_DIR, 'requests', 'filteringTerms.json'),

// Responses
'booleanResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconBooleanResponse.json'),
'countResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconCountResponse.json'),
'resultsetsResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconResultsetsResponse.json'),
'collectionsResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconCollectionsResponse.json'),
'infoResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconInfoResponse.json'),
'errorResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconErrorResponse.json'),
'filteringTermsResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconFilteringTermsResponse.json'),
'mapResponse': path.join(FRAMEWORK_DIR, 'responses', 'beaconMapResponse.json'),

// Response Sections
'responseMeta': path.join(FRAMEWORK_DIR, 'responses', 'sections', 'beaconResponseMeta.json'),
'resultsets': path.join(FRAMEWORK_DIR, 'responses', 'sections', 'beaconResultsets.json'),
};

/**
* Copy a pre-dereferenced schema file
*/
function copySchema(name, sourcePath) {
console.log(`Copying ${name}...`);

if (!fs.existsSync(sourcePath)) {
console.warn(` WARNING: Schema file not found: ${sourcePath}`);
return false;
}

const outputPath = path.join(OUTPUT_DIR, `${name}.json`);
fs.copyFileSync(sourcePath, outputPath);

const stats = fs.statSync(outputPath);
console.log(` -> ${outputPath} (${(stats.size / 1024).toFixed(1)} KB)`);
return true;
}

/**
* Bundle a framework schema using json-schema-ref-parser
*/
async function bundleSchema(name, schemaPath) {
console.log(`Bundling ${name}...`);

if (!fs.existsSync(schemaPath)) {
console.warn(` WARNING: Schema file not found: ${schemaPath}`);
return false;
}

try {
const schema = await $RefParser.dereference(schemaPath, {
dereference: {
circular: 'ignore'
}
});

const outputPath = path.join(OUTPUT_DIR, `${name}.json`);
fs.writeFileSync(outputPath, JSON.stringify(schema, null, 2));

const stats = fs.statSync(outputPath);
console.log(` -> ${outputPath} (${(stats.size / 1024).toFixed(1)} KB)`);
return true;
} catch (error) {
console.error(` ERROR bundling ${name}: ${error.message}`);
return false;
}
}

async function main() {
console.log('Beacon v2 Schema Bundler');
console.log('========================\n');
console.log(`Beacon v2 root: ${BEACON_V2_ROOT}`);
console.log(`Output directory: ${OUTPUT_DIR}\n`);

// Check beacon-v2 exists
if (!fs.existsSync(BEACON_V2_ROOT)) {
console.error(`ERROR: Beacon v2 directory not found: ${BEACON_V2_ROOT}`);
process.exit(1);
}

// Check deref_schemas exists
if (!fs.existsSync(DEREF_SCHEMAS_DIR)) {
console.error(`ERROR: Pre-dereferenced schemas not found: ${DEREF_SCHEMAS_DIR}`);
console.error('Make sure your beacon-v2 clone includes the bin/deref_schemas directory.');
process.exit(1);
}

// Create output directory
fs.mkdirSync(OUTPUT_DIR, { recursive: true });

let successful = 0;
let failed = 0;

// Copy pre-dereferenced entity schemas
console.log('--- Entity Schemas (pre-dereferenced) ---\n');
for (const [name, schemaPath] of Object.entries(ENTITY_SCHEMAS)) {
if (copySchema(name, schemaPath)) {
successful++;
} else {
failed++;
}
}

// Bundle framework schemas
console.log('\n--- Framework Schemas (bundling) ---\n');
for (const [name, schemaPath] of Object.entries(FRAMEWORK_SCHEMAS)) {
if (await bundleSchema(name, schemaPath)) {
successful++;
} else {
failed++;
}
}

// Summary
console.log('\n========================');
console.log('Summary:');
console.log(` Successful: ${successful}`);
console.log(` Failed: ${failed}`);
console.log(`\nBundled schemas written to: ${OUTPUT_DIR}`);
console.log('\nNext step: Run the Python model generator:');
console.log(' uv run python scripts/generate_from_bundled.py');

process.exit(failed > 0 ? 1 : 0);
}

main().catch(console.error);
Loading
Loading