Skip to content

Commit ded9985

Browse files
committed
Add script to integrate additional open datasets
1 parent df8dcda commit ded9985

2 files changed

Lines changed: 123 additions & 0 deletions

File tree

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#!/bin/bash
2+
# SPDX-License-Identifier: PMPL-1.0-or-later
3+
# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
4+
5+
# Integrate Additional Open Datasets
6+
7+
# This script integrates additional open datasets into the BoJ server.
8+
# It supports integrating datasets from various sources and formats.
9+
10+
set -euo pipefail
11+
12+
echo "=== Integrate Additional Open Datasets ==="
13+
14+
# Check if the required tools are installed
15+
if ! command -v curl &>/dev/null; then
16+
echo "Error: curl is not installed. Please install curl first."
17+
exit 1
18+
fi
19+
20+
if ! command -v jq &>/dev/null; then
21+
echo "Error: jq is not installed. Please install jq first."
22+
exit 1
23+
fi
24+
25+
# Create a directory for datasets
26+
mkdir -p datasets
27+
28+
# Download and integrate datasets
29+
# Example: Download a dataset from a public API
30+
echo "Downloading dataset from public API..."
31+
curl -s https://api.example.com/dataset | jq '.' > datasets/example.json
32+
33+
# Example: Download a dataset from a GitHub repository
34+
echo "Downloading dataset from GitHub repository..."
35+
curl -s https://raw.githubusercontent.com/example/repo/main/dataset.json | jq '.' > datasets/github.json
36+
37+
# Example: Download a dataset from a public S3 bucket
38+
echo "Downloading dataset from public S3 bucket..."
39+
curl -s https://example.s3.amazonaws.com/dataset.json | jq '.' > datasets/s3.json
40+
41+
# Process and integrate the datasets
42+
echo "Processing and integrating datasets..."
43+
node scripts/datasets/process-datasets.js
44+
45+
echo "=== Dataset Integration Complete ==="
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#!/usr/bin/env node
2+
// SPDX-License-Identifier: PMPL-1.0-or-later
3+
// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) <j.d.a.jewell@open.ac.uk>
4+
5+
// Process and Integrate Datasets
6+
7+
import { readFile, writeFile, mkdir, readdir } from 'node:fs/promises';
8+
import { join, dirname } from 'node:path';
9+
10+
async function readJsonFile(filePath) {
11+
const content = await readFile(filePath, 'utf8');
12+
return JSON.parse(content);
13+
}
14+
15+
async function writeJsonFile(filePath, data) {
16+
await writeFile(filePath, JSON.stringify(data, null, 2), 'utf8');
17+
}
18+
19+
async function ensureDirectoryExists(directory) {
20+
await mkdir(directory, { recursive: true });
21+
}
22+
23+
async function processDatasets() {
24+
const datasetsDir = 'datasets';
25+
const outputDir = 'processed-datasets';
26+
27+
// Ensure the output directory exists
28+
await ensureDirectoryExists(outputDir);
29+
30+
// Read all dataset files
31+
const files = await readdir(datasetsDir);
32+
const datasets = [];
33+
34+
for (const file of files) {
35+
if (file.endsWith('.json')) {
36+
const dataset = await readJsonFile(join(datasetsDir, file));
37+
datasets.push(dataset);
38+
}
39+
}
40+
41+
// Process and integrate the datasets
42+
const processedDatasets = datasets.map((dataset) => {
43+
// Add metadata to the dataset
44+
return {
45+
...dataset,
46+
metadata: {
47+
source: 'open-dataset',
48+
processedAt: new Date().toISOString(),
49+
version: '1.0.0',
50+
},
51+
};
52+
});
53+
54+
// Write the processed datasets to files
55+
for (const dataset of processedDatasets) {
56+
const fileName = `${dataset.id || 'dataset'}-processed.json`;
57+
await writeJsonFile(join(outputDir, fileName), dataset);
58+
}
59+
60+
return processedDatasets;
61+
}
62+
63+
async function main() {
64+
try {
65+
console.log('Processing datasets...');
66+
const datasets = await processDatasets();
67+
console.log(`Processed ${datasets.length} datasets.`);
68+
} catch (error) {
69+
console.error('Error processing datasets:', error);
70+
process.exit(1);
71+
}
72+
}
73+
74+
if (import.meta.url === `file://${process.argv[1]}`) {
75+
main();
76+
}
77+
78+
export { processDatasets };

0 commit comments

Comments
 (0)