Skip to content

Commit 9ff0f31

Browse files
committed
Replace characters that are illegal in filenames
1 parent 533f95f commit 9ff0f31

2 files changed

Lines changed: 57 additions & 1 deletion

File tree

src/archivist/services/sourceDocument.js

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ export default class SourceDocument {
2121
this.filters = filters;
2222
this.content = content;
2323
this.mimeType = mimeType;
24-
this.id = new URL(location).pathname.split('/').filter(Boolean).join('-');
24+
this.id = SourceDocument.generateId(location);
2525
}
2626

2727
get cssSelectors() {
@@ -60,6 +60,16 @@ export default class SourceDocument {
6060
return [selector];
6161
}
6262

63+
static generateId(location) {
64+
const ILLEGAL_CHARACTERS = /[\\:"<>|*?]/g; // Characters forbidden in filenames for cross-platform compatibility; see https://github.com/actions/toolkit/blob/main/packages/artifact/src/internal/upload/path-and-artifact-name-validation.ts
65+
66+
return decodeURIComponent(new URL(location).pathname)
67+
.split('/')
68+
.filter(Boolean)
69+
.join('-')
70+
.replace(ILLEGAL_CHARACTERS, '_');
71+
}
72+
6373
toPersistence() {
6474
return {
6575
fetch: this.location,

src/archivist/services/sourceDocument.test.js

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,52 @@ describe('SourceDocument', () => {
157157
});
158158
});
159159

160+
describe('#generateId', () => {
161+
it('generates ID from URL pathname', () => {
162+
expect(new SourceDocument({ location: 'https://example.com/legal/terms' }).id).to.equal('legal-terms');
163+
});
164+
165+
it('returns empty string for root URL', () => {
166+
expect(new SourceDocument({ location: 'https://example.com/' }).id).to.equal('');
167+
});
168+
169+
it('decodes URL-encoded characters', () => {
170+
expect(new SourceDocument({ location: 'https://example.com/terms%20of%20service' }).id).to.equal('terms of service');
171+
});
172+
173+
it('decodes URL-encoded characters before replacing illegal ones', () => {
174+
expect(new SourceDocument({ location: 'https://example.com/terms%3Aof%3Aservice' }).id).to.equal('terms_of_service');
175+
});
176+
177+
context('replaces characters that are illegal in filenames for cross-platform compatibility', () => {
178+
const ILLEGAL_CHARACTERS_IN_URL_PATHNAME = {
179+
':': 'colon',
180+
'"': 'double quote',
181+
'<': 'less than',
182+
'>': 'greater than',
183+
'|': 'vertical bar',
184+
'*': 'asterisk',
185+
};
186+
187+
for (const [ character, name ] of Object.entries(ILLEGAL_CHARACTERS_IN_URL_PATHNAME)) {
188+
it(`replaces ${name} "${character}"`, () => {
189+
expect(new SourceDocument({ location: `https://example.com/before${character}after` }).id).to.equal('before_after');
190+
});
191+
}
192+
193+
const ILLEGAL_CHARACTERS_ENCODED_IN_URL = {
194+
'%5C': 'backslash',
195+
'%3F': 'question mark',
196+
};
197+
198+
for (const [ encoded, name ] of Object.entries(ILLEGAL_CHARACTERS_ENCODED_IN_URL)) {
199+
it(`replaces ${name} decoded from "${encoded}"`, () => {
200+
expect(new SourceDocument({ location: `https://example.com/before${encoded}after` }).id).to.equal('before_after');
201+
});
202+
}
203+
});
204+
});
205+
160206
describe('#toPersistence', () => {
161207
it('converts basic source document declarations into JSON representation', () => {
162208
const result = new SourceDocument({

0 commit comments

Comments
 (0)