Skip to content

Commit 86ff4fc

Browse files
committed
Improve datagouv module
Allow passing either datasetId or organisationIdOrSlug
1 parent 3764459 commit 86ff4fc

3 files changed

Lines changed: 197 additions & 46 deletions

File tree

scripts/dataset/publish/datagouv/dataset.js

Lines changed: 148 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,120 @@ import path from 'path';
44
import FormData from 'form-data';
55
import nodeFetch from 'node-fetch';
66

7-
import * as readme from '../../assets/README.template.js';
87
import { createModuleLogger } from '../../logger/index.js';
98

109
const logger = createModuleLogger('datagouv');
1110

1211
const DATASET_LICENSE = 'odc-odbl';
1312
const DEFAULT_RESOURCE_DESCRIPTION = 'See README.md inside the archive for dataset structure and usage information.';
1413

15-
export async function updateDatasetMetadata({ apiBaseUrl, headers, datasetId, releaseDate, stats }) {
14+
const routes = {
15+
dataset: (apiBaseUrl, datasetId) => `${apiBaseUrl}/datasets/${datasetId}/`,
16+
datasets: apiBaseUrl => `${apiBaseUrl}/datasets/`,
17+
datasetUpload: (apiBaseUrl, datasetId) => `${apiBaseUrl}/datasets/${datasetId}/upload/`,
18+
resource: (apiBaseUrl, datasetId, resourceId) => `${apiBaseUrl}/datasets/${datasetId}/resources/${resourceId}/`,
19+
resourceUpload: (apiBaseUrl, datasetId, resourceId) => `${apiBaseUrl}/datasets/${datasetId}/resources/${resourceId}/upload/`,
20+
organization: (apiBaseUrl, organizationIdOrSlug) => `${apiBaseUrl}/organizations/${organizationIdOrSlug}/`,
21+
organizationDatasets: (apiBaseUrl, organizationId) => `${apiBaseUrl}/organizations/${organizationId}/datasets/?page_size=100`,
22+
};
23+
24+
export async function getOrganization({ apiBaseUrl, headers, organizationIdOrSlug }) {
25+
logger.info(`Fetching organization: ${organizationIdOrSlug}…`);
26+
27+
const orgResponse = await nodeFetch(routes.organization(apiBaseUrl, organizationIdOrSlug), { headers });
28+
29+
if (!orgResponse.ok) {
30+
const errorText = await orgResponse.text();
31+
32+
throw new Error(`Failed to retrieve organization: ${orgResponse.status} ${orgResponse.statusText} - ${errorText}`);
33+
}
34+
35+
const orgData = await orgResponse.json();
36+
37+
logger.info(`Found organization: ${orgData.name} (ID: ${orgData.id})`);
38+
39+
return orgData;
40+
}
41+
42+
export async function getDataset({ apiBaseUrl, headers, datasetId }) {
43+
const datasetResponse = await nodeFetch(routes.dataset(apiBaseUrl, datasetId), { headers });
44+
45+
if (!datasetResponse.ok) {
46+
const errorText = await datasetResponse.text();
47+
const error = new Error(`Failed to retrieve dataset: ${datasetResponse.status} ${datasetResponse.statusText} - ${errorText}`);
48+
49+
error.statusCode = datasetResponse.status;
50+
throw error;
51+
}
52+
53+
const datasetData = await datasetResponse.json();
54+
55+
return datasetData;
56+
}
57+
58+
export async function findDatasetByTitle({ apiBaseUrl, headers, organizationId, title }) {
59+
logger.info(`Searching for dataset with title "${title}" in organization…`);
60+
61+
const searchResponse = await nodeFetch(routes.organizationDatasets(apiBaseUrl, organizationId), { headers });
62+
63+
if (!searchResponse.ok) {
64+
const errorText = await searchResponse.text();
65+
66+
throw new Error(`Failed to search for datasets: ${searchResponse.status} ${searchResponse.statusText} - ${errorText}`);
67+
}
68+
69+
const searchData = await searchResponse.json();
70+
71+
const dataset = searchData.data.find(ds => ds.title === title);
72+
73+
if (dataset) {
74+
logger.info(`Found existing dataset: ${dataset.title} (ID: ${dataset.id})`);
75+
76+
return dataset;
77+
}
78+
79+
logger.info('No existing dataset found with this title');
80+
81+
return null;
82+
}
83+
84+
export async function createDataset({ apiBaseUrl, headers, organizationId, title, description, license, frequency }) {
85+
logger.info(`Creating new dataset: ${title}…`);
86+
87+
const createResponse = await nodeFetch(routes.datasets(apiBaseUrl), {
88+
method: 'POST',
89+
headers: {
90+
...headers,
91+
'Content-Type': 'application/json',
92+
},
93+
body: JSON.stringify({
94+
title,
95+
description,
96+
organization: organizationId,
97+
license,
98+
frequency,
99+
}),
100+
});
101+
102+
if (!createResponse.ok) {
103+
const errorText = await createResponse.text();
104+
105+
throw new Error(`Failed to create dataset: ${createResponse.status} ${createResponse.statusText} - ${errorText}`);
106+
}
107+
108+
const dataset = await createResponse.json();
109+
110+
logger.info(`Dataset created successfully: ${dataset.title} (ID: ${dataset.id})`);
111+
112+
return dataset;
113+
}
114+
115+
export async function updateDatasetMetadata({ apiBaseUrl, headers, datasetId, title, description, stats, frequency }) {
16116
const updatePayload = {
17-
title: readme.title({ releaseDate }),
18-
description: readme.body(stats),
117+
title,
118+
description,
19119
license: DATASET_LICENSE,
120+
frequency,
20121
};
21122

22123
if (stats?.firstVersionDate && stats?.lastVersionDate) {
@@ -26,7 +127,7 @@ export async function updateDatasetMetadata({ apiBaseUrl, headers, datasetId, re
26127
};
27128
}
28129

29-
const updateResponse = await nodeFetch(`${apiBaseUrl}/datasets/${datasetId}/`, {
130+
const updateResponse = await nodeFetch(routes.dataset(apiBaseUrl, datasetId), {
30131
method: 'PUT',
31132
headers: {
32133
...headers,
@@ -37,25 +138,21 @@ export async function updateDatasetMetadata({ apiBaseUrl, headers, datasetId, re
37138

38139
if (!updateResponse.ok) {
39140
const errorText = await updateResponse.text();
141+
const error = new Error(`Failed to update dataset metadata: ${updateResponse.status} ${updateResponse.statusText} - ${errorText}`);
40142

41-
throw new Error(`Failed to update dataset metadata: ${updateResponse.status} ${updateResponse.statusText} - ${errorText}`);
143+
error.statusCode = updateResponse.status;
144+
throw error;
42145
}
146+
147+
logger.info('Dataset metadata updated successfully');
43148
}
44149

45150
export async function uploadResource({ apiBaseUrl, headers, datasetId, archivePath }) {
46151
logger.info('Uploading dataset archive…');
47152

48-
const formData = new FormData();
49-
const fileName = path.basename(archivePath);
50-
const fileStats = fsApi.statSync(archivePath);
153+
const { formData, fileName } = createFormDataForFile(archivePath);
51154

52-
formData.append('file', fsApi.createReadStream(archivePath), {
53-
filename: fileName,
54-
contentType: 'application/zip',
55-
knownLength: fileStats.size,
56-
});
57-
58-
const uploadResponse = await nodeFetch(`${apiBaseUrl}/datasets/${datasetId}/upload/`, {
155+
const uploadResponse = await nodeFetch(routes.datasetUpload(apiBaseUrl, datasetId), {
59156
method: 'POST',
60157
headers: { ...formData.getHeaders(), ...headers },
61158
body: formData,
@@ -74,10 +171,34 @@ export async function uploadResource({ apiBaseUrl, headers, datasetId, archivePa
74171
return { resourceId: uploadResult.id, fileName };
75172
}
76173

174+
export async function replaceResourceFile({ apiBaseUrl, headers, datasetId, resourceId, archivePath }) {
175+
logger.info(`Replacing file for existing resource ID: ${resourceId}…`);
176+
177+
const { formData, fileName } = createFormDataForFile(archivePath);
178+
179+
const uploadResponse = await nodeFetch(routes.resourceUpload(apiBaseUrl, datasetId, resourceId), {
180+
method: 'POST',
181+
headers: { ...formData.getHeaders(), ...headers },
182+
body: formData,
183+
});
184+
185+
if (!uploadResponse.ok) {
186+
const errorText = await uploadResponse.text();
187+
188+
throw new Error(`Failed to replace resource file: ${uploadResponse.status} ${uploadResponse.statusText} - ${errorText}`);
189+
}
190+
191+
const uploadResult = await uploadResponse.json();
192+
193+
logger.info('Resource file replaced successfully');
194+
195+
return { resourceId: uploadResult.id, fileName };
196+
}
197+
77198
export async function updateResourceMetadata({ apiBaseUrl, headers, datasetId, resourceId, fileName }) {
78199
logger.info('Updating resource metadata…');
79200

80-
const resourceUpdateResponse = await nodeFetch(`${apiBaseUrl}/datasets/${datasetId}/resources/${resourceId}/`, {
201+
const resourceUpdateResponse = await nodeFetch(routes.resource(apiBaseUrl, datasetId, resourceId), {
81202
method: 'PUT',
82203
headers: { ...headers, 'Content-Type': 'application/json' },
83204
body: JSON.stringify({
@@ -98,20 +219,16 @@ export async function updateResourceMetadata({ apiBaseUrl, headers, datasetId, r
98219
logger.info('Resource metadata updated successfully');
99220
}
100221

101-
export async function getDatasetUrl({ apiBaseUrl, headers, datasetId }) {
102-
const datasetResponse = await nodeFetch(`${apiBaseUrl}/datasets/${datasetId}/`, {
103-
method: 'GET',
104-
headers: { ...headers },
105-
});
106-
107-
if (!datasetResponse.ok) {
108-
const errorText = await datasetResponse.text();
109-
110-
throw new Error(`Failed to retrieve dataset URL: ${datasetResponse.status} ${datasetResponse.statusText} - ${errorText}`);
111-
}
222+
function createFormDataForFile(archivePath) {
223+
const formData = new FormData();
224+
const fileName = path.basename(archivePath);
225+
const fileStats = fsApi.statSync(archivePath);
112226

113-
const datasetData = await datasetResponse.json();
114-
const datasetUrl = datasetData.page;
227+
formData.append('file', fsApi.createReadStream(archivePath), {
228+
filename: fileName,
229+
contentType: 'application/zip',
230+
knownLength: fileStats.size,
231+
});
115232

116-
return datasetUrl;
233+
return { formData, fileName };
117234
}
Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,34 @@
11
import config from 'config';
22

3+
import * as readme from '../../assets/README.template.js';
34
import { createModuleLogger } from '../../logger/index.js';
45

5-
import { updateDatasetMetadata, uploadResource, updateResourceMetadata, getDatasetUrl } from './dataset.js';
6+
import { updateDatasetMetadata, uploadResource, replaceResourceFile, updateResourceMetadata, getDataset, getOrganization, findDatasetByTitle, createDataset } from './dataset.js';
7+
68
const logger = createModuleLogger('datagouv');
79

810
const PRODUCTION_API_BASE_URL = 'https://www.data.gouv.fr/api/1';
911
const DEMO_API_BASE_URL = 'https://demo.data.gouv.fr/api/1';
12+
const DATASET_LICENSE = 'odc-odbl';
13+
14+
export default async function publish({ archivePath, stats }) {
15+
const { datasetId, organizationIdOrSlug, apiBaseUrl, headers, datasetTitle, frequency } = loadConfiguration();
16+
const description = readme.body(stats);
17+
18+
const dataset = datasetId
19+
? await getDataset({ apiBaseUrl, headers, datasetId })
20+
: await ensureDatasetExists({ apiBaseUrl, headers, organizationIdOrSlug, datasetTitle, description, frequency });
21+
22+
await updateDatasetMetadata({ apiBaseUrl, headers, datasetId: dataset.id, title: datasetTitle, description, stats, frequency });
23+
24+
const { resourceId, fileName } = await handleResourceUpload({ apiBaseUrl, headers, datasetId: dataset.id, dataset, archivePath });
25+
26+
await updateResourceMetadata({ apiBaseUrl, headers, datasetId: dataset.id, resourceId, fileName });
27+
28+
logger.info(`Dataset published successfully: ${dataset.page}`);
29+
30+
return dataset.page;
31+
}
1032

1133
function loadConfiguration() {
1234
const apiKey = process.env.OTA_ENGINE_DATAGOUV_API_KEY;
@@ -15,13 +37,16 @@ function loadConfiguration() {
1537
throw new Error('OTA_ENGINE_DATAGOUV_API_KEY environment variable is required for data.gouv.fr publishing');
1638
}
1739

18-
const datasetId = config.get('@opentermsarchive/engine.dataset.datagouv.datasetId');
40+
const datasetId = config.has('@opentermsarchive/engine.dataset.datagouv.datasetId') && config.get('@opentermsarchive/engine.dataset.datagouv.datasetId');
41+
const organizationIdOrSlug = config.has('@opentermsarchive/engine.dataset.datagouv.organizationIdOrSlug') && config.get('@opentermsarchive/engine.dataset.datagouv.organizationIdOrSlug');
1942

20-
if (!datasetId) {
21-
throw new Error('datasetId is required in config at @opentermsarchive/engine.dataset.datagouv.datasetId. Run "node scripts/dataset/publish/datagouv/create-dataset.js" to create a dataset first.');
43+
if (!datasetId && !organizationIdOrSlug) {
44+
throw new Error('Either datasetId or organizationIdOrSlug is required in config at @opentermsarchive/engine.dataset.datagouv');
2245
}
2346

24-
const useDemo = config.get('@opentermsarchive/engine.dataset.datagouv.useDemo');
47+
const datasetTitle = config.get('@opentermsarchive/engine.dataset.title');
48+
const frequency = config.has('@opentermsarchive/engine.dataset.datagouv.frequency') && config.get('@opentermsarchive/engine.dataset.datagouv.frequency');
49+
const useDemo = config.has('@opentermsarchive/engine.dataset.datagouv.useDemo') && config.get('@opentermsarchive/engine.dataset.datagouv.useDemo');
2550
const apiBaseUrl = useDemo ? DEMO_API_BASE_URL : PRODUCTION_API_BASE_URL;
2651

2752
if (useDemo) {
@@ -30,19 +55,28 @@ function loadConfiguration() {
3055

3156
const headers = { 'X-API-KEY': apiKey };
3257

33-
return { datasetId, apiBaseUrl, headers };
58+
return { datasetId, organizationIdOrSlug, apiBaseUrl, headers, datasetTitle, frequency };
3459
}
3560

36-
export default async function publish({ archivePath, releaseDate, stats }) {
37-
const config = loadConfiguration();
61+
async function ensureDatasetExists({ apiBaseUrl, headers, organizationIdOrSlug, datasetTitle, description, frequency }) {
62+
const organization = await getOrganization({ apiBaseUrl, headers, organizationIdOrSlug });
63+
let dataset = await findDatasetByTitle({ apiBaseUrl, headers, organizationId: organization.id, title: datasetTitle });
64+
65+
if (!dataset) {
66+
dataset = await createDataset({ apiBaseUrl, headers, organizationId: organization.id, title: datasetTitle, description, license: DATASET_LICENSE, frequency });
67+
}
3868

39-
await updateDatasetMetadata({ ...config, releaseDate, stats });
69+
return dataset;
70+
}
4071

41-
const { resourceId, fileName } = await uploadResource({ ...config, archivePath });
72+
function handleResourceUpload({ apiBaseUrl, headers, datasetId, dataset, archivePath }) {
73+
if (dataset?.resources?.length > 0) {
74+
const existingResource = dataset.resources[0];
4275

43-
await updateResourceMetadata({ ...config, resourceId, fileName });
76+
logger.info(`Found existing resource: ${existingResource.title} (ID: ${existingResource.id})`);
4477

45-
const datasetUrl = await getDatasetUrl({ ...config });
78+
return replaceResourceFile({ apiBaseUrl, headers, datasetId, resourceId: existingResource.id, archivePath });
79+
}
4680

47-
return datasetUrl;
81+
return uploadResource({ apiBaseUrl, headers, datasetId, archivePath });
4882
}

scripts/dataset/publish/index.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ export default async function publishRelease({ archivePath, releaseDate, stats }
1616
platforms.push({ name: 'GitLab', publish: () => publishGitLab({ archivePath, releaseDate, stats }) });
1717
}
1818

19-
if (process.env.OTA_ENGINE_DATAGOUV_API_KEY && config.get('@opentermsarchive/engine.dataset.datagouv.datasetId')) {
19+
if (process.env.OTA_ENGINE_DATAGOUV_API_KEY && (config.has('@opentermsarchive/engine.dataset.datagouv.datasetId') || config.has('@opentermsarchive/engine.dataset.datagouv.organizationIdOrSlug'))) {
2020
platforms.push({ name: 'data.gouv.fr', publish: () => publishDataGouv({ archivePath, releaseDate, stats }) });
2121
}
2222

2323
if (!platforms.length) {
24-
throw new Error('No publishing platform configured. Please configure at least one of: GitHub (OTA_ENGINE_GITHUB_TOKEN), GitLab (OTA_ENGINE_GITLAB_TOKEN), or data.gouv.fr (OTA_ENGINE_DATAGOUV_API_KEY + datasetId in config).');
24+
throw new Error('No publishing platform configured. Please configure at least one of: GitHub (OTA_ENGINE_GITHUB_TOKEN), GitLab (OTA_ENGINE_GITLAB_TOKEN), or data.gouv.fr (OTA_ENGINE_DATAGOUV_API_KEY + datasetId or organizationIdOrSlug in config).');
2525
}
2626

2727
const results = await Promise.allSettled(platforms.map(async platform => {

0 commit comments

Comments
 (0)