Skip to content

Commit d869366

Browse files
authored
Fix incorrect versioning that occurred when adding new source documents to combined terms declarations (#1207)
2 parents 05b6cc0 + a24d41d commit d869366

18 files changed

Lines changed: 565 additions & 176 deletions

.github/workflows/test.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ jobs:
1616
node_version:
1717
- 20
1818
- 24
19+
exclude:
20+
- operating_system: windows-latest
21+
node_version: 24
22+
- operating_system: macos-latest
23+
node_version: 24
1924
fail-fast: false # run tests on other operating systems even if one fails
2025

2126
runs-on: ${{ matrix.operating_system }}
@@ -46,6 +51,11 @@ jobs:
4651
node_version:
4752
- 20
4853
- 24
54+
exclude:
55+
- operating_system: windows-latest
56+
node_version: 24
57+
- operating_system: macos-latest
58+
node_version: 24
4959
fail-fast: false # run tests on other operating systems even if one fails
5060

5161
runs-on: ${{ matrix.operating_system }}
@@ -76,6 +86,11 @@ jobs:
7686
node_version:
7787
- 20
7888
- 24
89+
exclude:
90+
- operating_system: windows-latest
91+
node_version: 24
92+
- operating_system: macos-latest
93+
node_version: 24
7994
fail-fast: false
8095

8196
runs-on: ${{ matrix.operating_system }}

CHANGELOG.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,22 @@
22

33
All changes that impact users of this module are documented in this file, in the [Common Changelog](https://common-changelog.org) format with some additional specifications defined in the CONTRIBUTING file. This codebase adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
44

5+
## Unreleased [major]
6+
7+
> Development of this release was supported by the [French Ministry for Foreign Affairs](https://www.diplomatie.gouv.fr/fr/politique-etrangere-de-la-france/diplomatie-numerique/) through its ministerial [State Startups incubator](https://beta.gouv.fr/startups/open-terms-archive.html) under the aegis of the Ambassador for Digital Affairs.
8+
9+
### Added
10+
11+
- Add `ota apply-technical-upgrades` CLI command to apply technical upgrades independently
12+
13+
### Changed
14+
15+
- **Breaking:** Remove `--extract-only` option from `ota track` command; use the new `ota apply-technical-upgrades` command instead
16+
17+
### Fixed
18+
19+
- Fix incorrect versioning that occurred when adding new source documents to combined terms declarations
20+
521
## 9.2.3 - 2025-11-19
622

723
_Full changeset and discussions: [#1204](https://github.com/OpenTermsArchive/engine/pull/1204)._

CONTRIBUTING.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,9 @@ For command-line examples and documentation, we follow the [docopt usage pattern
165165

166166
In order to improve the understandability of commands, we document all CLI options and examples with the long version of the options.
167167

168-
169168
```diff
170-
- ota track -s $service_id -r
171-
+ ota track --services <service_id> --extract-only
169+
- ota track -s <service_id> -t <terms_type>
170+
+ ota track --services <service_id> --types <terms_type>
172171
```
173172

174173
## Naming
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#! /usr/bin/env node
2+
import './env.js';
3+
4+
import path from 'path';
5+
import { fileURLToPath, pathToFileURL } from 'url';
6+
7+
import { program } from 'commander';
8+
9+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
10+
11+
const { applyTechnicalUpgrades } = await import(pathToFileURL(path.resolve(__dirname, '../src/index.js'))); // load asynchronously to ensure env.js is loaded before
12+
13+
program
14+
.name('ota apply-technical-upgrades')
15+
.description('Apply technical upgrades by generating new versions from the latest snapshots using updated declarations, engine logic, or dependencies, and by retrieving any missing snapshots for newly added source documents')
16+
.option('-s, --services [serviceId...]', 'service IDs to apply technical upgrades to')
17+
.option('-t, --types [termsType...]', 'terms types to apply technical upgrades to');
18+
19+
applyTechnicalUpgrades(program.parse(process.argv).opts());

bin/ota-track.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ program
1515
.description('Retrieve declared documents, record snapshots, extract versions and publish the resulting records')
1616
.option('-s, --services [serviceId...]', 'service IDs of services to track')
1717
.option('-t, --types [termsType...]', 'terms types to track')
18-
.option('-e, --extract-only', 'extract versions from existing snapshots with latest declarations and engine, without recording new snapshots')
1918
.option('--schedule', 'track automatically at a regular interval');
2019

2120
track(program.parse(process.argv).opts());

bin/ota.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ program
1111
.description(description)
1212
.version(version)
1313
.command('track', 'Track the current terms of services according to provided declarations')
14+
.command('apply-technical-upgrades', 'Apply technical upgrades by generating new versions from the latest snapshots using updated declarations, engine logic, or dependencies')
1415
.command('validate', 'Run a series of tests to check the validity of terms declarations')
1516
.command('lint', 'Check format and stylistic errors in declarations and auto fix them')
1617
.command('dataset', 'Export the versions dataset into a ZIP file and optionally publish it to GitHub releases')

src/archivist/index.js

Lines changed: 75 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ const { version: PACKAGE_VERSION } = require('../../package.json');
2020
// - too many requests on the same endpoint yield 403
2121
// - sometimes when creating a commit no SHA are returned for unknown reasons
2222
const MAX_PARALLEL_TRACKING = 1;
23-
const MAX_PARALLEL_EXTRACTING = 10;
23+
const MAX_PARALLEL_TECHNICAL_UPGRADES = 10;
2424

2525
export const EVENTS = [
2626
'snapshotRecorded',
@@ -128,22 +128,40 @@ export default class Archivist extends events.EventEmitter {
128128
});
129129
}
130130

131-
async track({ services: servicesIds = this.servicesIds, types: termsTypes = [], extractOnly = false } = {}) {
131+
async track({ services: servicesIds = this.servicesIds, types: termsTypes = [] } = {}) {
132+
await this.processTerms({
133+
servicesIds,
134+
termsTypes,
135+
technicalUpgradeOnly: false,
136+
concurrency: MAX_PARALLEL_TRACKING,
137+
});
138+
}
139+
140+
async applyTechnicalUpgrades({ services: servicesIds = this.servicesIds, types: termsTypes = [] } = {}) {
141+
await this.processTerms({
142+
servicesIds,
143+
termsTypes,
144+
technicalUpgradeOnly: true,
145+
concurrency: MAX_PARALLEL_TECHNICAL_UPGRADES,
146+
});
147+
}
148+
149+
async processTerms({ servicesIds, termsTypes, technicalUpgradeOnly, concurrency }) {
132150
const numberOfTerms = Service.getNumberOfTerms(this.services, servicesIds, termsTypes);
133151

134-
this.emit('trackingStarted', servicesIds.length, numberOfTerms, extractOnly);
152+
this.emit('trackingStarted', servicesIds.length, numberOfTerms, technicalUpgradeOnly);
135153

136154
await Promise.all([ launchHeadlessBrowser(), this.recorder.initialize() ]);
137155

138-
this.trackingQueue.concurrency = extractOnly ? MAX_PARALLEL_EXTRACTING : MAX_PARALLEL_TRACKING;
156+
this.trackingQueue.concurrency = concurrency;
139157

140158
servicesIds.forEach(serviceId => {
141159
this.services[serviceId].getTermsTypes().forEach(termsType => {
142160
if (termsTypes.length && !termsTypes.includes(termsType)) {
143161
return;
144162
}
145163

146-
this.trackingQueue.push({ terms: this.services[serviceId].getTerms({ type: termsType }), extractOnly });
164+
this.trackingQueue.push({ terms: this.services[serviceId].getTerms({ type: termsType }), technicalUpgradeOnly });
147165
});
148166
});
149167

@@ -153,12 +171,14 @@ export default class Archivist extends events.EventEmitter {
153171

154172
await Promise.all([ stopHeadlessBrowser(), this.recorder.finalize() ]);
155173

156-
this.emit('trackingCompleted', servicesIds.length, numberOfTerms, extractOnly);
174+
this.emit('trackingCompleted', servicesIds.length, numberOfTerms, technicalUpgradeOnly);
157175
}
158176

159-
async trackTermsChanges({ terms, extractOnly = false }) {
160-
if (!extractOnly) {
177+
async trackTermsChanges({ terms, technicalUpgradeOnly = false }) {
178+
if (!technicalUpgradeOnly) {
161179
await this.fetchAndRecordSnapshots(terms);
180+
} else {
181+
await this.fetchAndRecordNewSourceDocuments(terms); // In technical upgrade mode, fetch and record snapshots only for new source documents that don't have existing snapshots yet (e.g., when a declaration is updated to add a new source document)
162182
}
163183

164184
const contents = await this.extractContentsFromSnapshots(terms);
@@ -167,7 +187,7 @@ export default class Archivist extends events.EventEmitter {
167187
return;
168188
}
169189

170-
await this.recordVersion(terms, contents.join(Version.SOURCE_DOCUMENTS_SEPARATOR), extractOnly);
190+
await this.recordVersion(terms, contents.join(Version.SOURCE_DOCUMENTS_SEPARATOR), technicalUpgradeOnly);
171191
}
172192

173193
async fetchAndRecordSnapshots(terms) {
@@ -190,6 +210,50 @@ export default class Archivist extends events.EventEmitter {
190210
}
191211
}
192212

213+
async fetchAndRecordNewSourceDocuments(terms) {
214+
if (!terms.hasMultipleSourceDocuments) { // If the terms has only one source document, there is nothing to do
215+
return;
216+
}
217+
218+
const existingVersion = await this.recorder.versionsRepository.findLatest(terms.service.id, terms.type);
219+
220+
if (!existingVersion) { // If the terms does not have a version recorded, skip this step as the next version will be tagged as "First record…" anyway
221+
return;
222+
}
223+
224+
const missingSourceDocuments = [];
225+
226+
for (const sourceDocument of terms.sourceDocuments) {
227+
const snapshot = await this.recorder.getLatestSnapshot(terms, sourceDocument.id);
228+
229+
if (!snapshot) {
230+
missingSourceDocuments.push(sourceDocument);
231+
}
232+
}
233+
234+
if (!missingSourceDocuments.length) {
235+
return;
236+
}
237+
238+
terms.fetchDate = new Date();
239+
const fetchDocumentErrors = [];
240+
241+
for (const sourceDocument of missingSourceDocuments) {
242+
const error = await this.fetchSourceDocument(sourceDocument);
243+
244+
if (error) {
245+
fetchDocumentErrors.push(error);
246+
} else {
247+
await this.recordSnapshot(terms, sourceDocument);
248+
sourceDocument.clearContent(); // Reduce memory usage by clearing no longer needed large content strings
249+
}
250+
}
251+
252+
if (fetchDocumentErrors.length) {
253+
throw new InaccessibleContentError(fetchDocumentErrors);
254+
}
255+
}
256+
193257
async fetchSourceDocument(sourceDocument) {
194258
const { location: url, executeClientScripts, cssSelectors } = sourceDocument;
195259

@@ -249,14 +313,14 @@ export default class Archivist extends events.EventEmitter {
249313
return contents;
250314
}
251315

252-
async recordVersion(terms, content, extractOnly) {
316+
async recordVersion(terms, content, technicalUpgradeOnly) {
253317
const record = new Version({
254318
content,
255319
snapshotIds: terms.sourceDocuments.map(sourceDocuments => sourceDocuments.snapshotId),
256320
serviceId: terms.service.id,
257321
termsType: terms.type,
258322
fetchDate: terms.fetchDate,
259-
isExtractOnly: extractOnly,
323+
isTechnicalUpgrade: technicalUpgradeOnly,
260324
metadata: { 'x-engine-version': PACKAGE_VERSION },
261325
});
262326

0 commit comments

Comments
 (0)