Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions packages/core/src/converters/roundtrip.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ describe('markdown round-trip is byte-identical', () => {
'mail me@example.com ok',
'a <https://example.com> b',
'end https://example.com.',
// Uncommon-TLD autolinks render as plain text but must serialize unchanged
'visit https://example.zzz now',
'see www.foo.invalidtld here',
'mail me@foo.zzz ok',
'a <https://example.zzz> b',
'[text](https://example.zzz)',
// Embeds stay literal `![](url)` text; the embed renders as a decoration only
'![](https://www.youtube.com/watch?v=dQw4w9WgXcQ)',
'![](https://twitter.com/jack/status/20)',
Expand Down
81 changes: 81 additions & 0 deletions packages/core/src/extensions/common-tlds.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import { describe, expect, it } from 'vitest'

import { COMMON_TLDS, extractTld, hasAllowedTld } from './common-tlds.ts'

describe('extractTld', () => {
it.each([
['https://example.com', 'com'],
['http://example.com', 'com'],
['www.example.com', 'com'],
['me@example.com', 'com'],
['mailto:me@example.com', 'com'],
['xmpp:user@host.org', 'org'],
['https://a.b.example.co.uk', 'uk'],
['https://example.com:8080', 'com'],
['https://example.com/a/b?x=1#y', 'com'],
['https://user:pass@example.com', 'com'],
['first.last@mail.example.com', 'com'],
['https://EXAMPLE.COM', 'com'],
['https://example.рф', 'рф'],
['https://example.com.', 'com'],
['http://192.168.1.1', '1'],
])('extracts the TLD of %s as %s', (input, expected) => {
expect(extractTld(input)).toBe(expected)
})

it.each([['https://localhost'], ['mailto:foo'], ['nodots']])(
'returns undefined for %s (no dotted host)',
(input) => {
expect(extractTld(input)).toBeUndefined()
},
)
})

describe('hasAllowedTld', () => {
it.each([
'https://example.com',
'https://example.org',
'https://example.io',
'https://example.co',
'https://a.example.co.uk',
'https://example.dev',
'https://example.app',
'https://example.xyz',
'https://example.рф',
'me@example.com',
])('allows %s', (input) => {
expect(hasAllowedTld(input)).toBe(true)
})

it.each([
'https://example.zzz',
'https://example.invalidtld',
'https://example.museum',
'https://example.guru',
'https://example.ninja',
'https://example.123',
'http://192.168.1.1',
'https://example.c',
'https://localhost',
])('rejects %s', (input) => {
expect(hasAllowedTld(input)).toBe(false)
})
})

describe('COMMON_TLDS', () => {
it('contains representative common TLDs', () => {
for (const tld of ['com', 'net', 'org', 'io', 'uk', 'co', 'om', 'qa', 'dev', 'рф']) {
expect(COMMON_TLDS.has(tld)).toBe(true)
}
})

it('excludes uncommon and invalid TLDs', () => {
for (const tld of ['zzz', 'museum', 'guru', 'invalidtld', '123']) {
expect(COMMON_TLDS.has(tld)).toBe(false)
}
})

it('pins the set size', () => {
expect(COMMON_TLDS.size).toMatchInlineSnapshot(`288`)
})
})
39 changes: 39 additions & 0 deletions packages/core/src/extensions/common-tlds.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
const GENERIC_TLDS =
'com net org xyz info online top biz shop site icu store cyou club vip live рф app buzz tech space fun dev pro mobi life website cloud click work art asia tokyo blog link one world africa bar gov'

const COUNTRY_TLDS =
'ac ad ae af ag ai al am an ao aq ar as at au aw ax az ba bb bd be bf bg bh bi bj bm bn bo br bs bt bv bw by bz ca cc cd cf cg ch ci ck cl cm cn co cr cu cv cw cx cy cz de dj dk dm do dz ec ee eg er es et eu fi fj fk fm fo fr ga gb gd ge gf gg gh gi gl gm gn gp gq gr gs gt gu gw gy hk hm hn hr ht hu id ie il im in io iq ir is it je jm jo jp ke kg kh ki km kn kp kr kw ky kz la lb lc li lk lr ls lt lu lv ly ma mc md me mg mh mk ml mm mn mo mp mq mr ms mt mu mv mw mx my mz na nc ne nf ng ni nl no np nr nu nz om pa pe pf pg ph pk pl pm pn pr ps pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sj sk sl sm sn so sr st su sv sx sy sz tc td tf tg th tj tk tl tm tn to tr tt tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws ye yt za zm zw'

/**
* The TLDs a bare GFM literal autolink is allowed to use. Common generic
* TLDs plus every two-letter country code, mirroring reflect-editor's
* curated "popular TLD" list. Explicit `<...>` autolinks and `[](...)`
* links are not subject to this set.
*/
export const COMMON_TLDS: ReadonlySet<string> = new Set(
`${GENERIC_TLDS} ${COUNTRY_TLDS}`.split(' '),
)

/**
* Pull the TLD (last domain label) out of an autolink's visible text.
* Handles a leading `scheme:`, protocol-relative `//`, userinfo or an
* email local part (`@`), a port, a path/query/hash, and a trailing dot.
* Returns the lowercased TLD, or `undefined` when there is no dotted host.
*/
export function extractTld(urlText: string): string | undefined {
let host = urlText.replace(/^[a-z][a-z0-9+.-]*:/i, '')
host = host.replace(/^\/\//, '')
const atIndex = host.lastIndexOf('@')
if (atIndex >= 0) host = host.slice(atIndex + 1)
host = host.split(/[/?#:]/)[0]
host = host.replace(/\.+$/, '')
const dotIndex = host.lastIndexOf('.')
if (dotIndex < 0) return undefined
return host.slice(dotIndex + 1).toLowerCase()
}

/** Whether an autolink's visible text ends in an allowed common TLD. */
export function hasAllowedTld(urlText: string): boolean {
const tld = extractTld(urlText)
return tld != null && COMMON_TLDS.has(tld)
}
102 changes: 102 additions & 0 deletions packages/core/src/extensions/inline-mark-plugin.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,108 @@ describe('inlineMarkPlugin', () => {
expect(linkText!.attrs.href).toBe('https://example.com')
})

it('does not link a bare autolink with an uncommon TLD', () => {
using fixture = setupFixture()
const { n } = fixture
const doc = n.doc(n.paragraph('visit https://example.zzz now'))
fixture.set(doc)

const pos = findText(fixture.doc, 'https://example.zzz')
expect(marksAt(fixture.doc, pos + 1)).toEqual([])
})

it('does not link a www autolink with an uncommon TLD', () => {
using fixture = setupFixture()
const { n } = fixture
const doc = n.doc(n.paragraph('see www.foo.zzz here'))
fixture.set(doc)

const pos = findText(fixture.doc, 'www.foo.zzz')
expect(marksAt(fixture.doc, pos + 1)).toEqual([])
})

it('does not link an email autolink with an uncommon TLD', () => {
using fixture = setupFixture()
const { n } = fixture
const doc = n.doc(n.paragraph('mail me@foo.zzz ok'))
fixture.set(doc)

const pos = findText(fixture.doc, 'me@foo.zzz')
expect(marksAt(fixture.doc, pos + 1)).toEqual([])
})

it('keeps an angle-bracket autolink despite an uncommon TLD', () => {
using fixture = setupFixture()
const { n } = fixture
const doc = n.doc(n.paragraph('a <https://example.zzz> b'))
fixture.set(doc)

const pos = findText(fixture.doc, 'https://example.zzz')
const $pos = fixture.doc.resolve(pos + 1)
const linkText = $pos.marks().find((m) => m.type.name === 'mdLinkText')
expect(linkText?.attrs.href).toBe('https://example.zzz')
})

it('keeps an explicit link despite an uncommon TLD', () => {
using fixture = setupFixture()
const { n } = fixture
const doc = n.doc(n.paragraph('see [docs](https://example.zzz)'))
fixture.set(doc)

const pos = findText(fixture.doc, 'docs')
const $pos = fixture.doc.resolve(pos + 1)
const linkText = $pos.marks().find((m) => m.type.name === 'mdLinkText')
expect(linkText?.attrs.href).toBe('https://example.zzz')
})

it('does not link an uncommon-TLD autolink inside a heading', () => {
using fixture = setupFixture()
const { n } = fixture
const doc = n.doc(n.heading({ level: 2 }, 'see https://x.zzz'))
fixture.set(doc)

const pos = findText(fixture.doc, 'https://x.zzz')
expect(marksAt(fixture.doc, pos + 1)).toEqual([])
})

it('links a common-TLD autolink inside a table cell', () => {
using fixture = setupFixture()
const { n } = fixture
const doc = n.doc(n.table(n.tableRow(n.tableCell(n.paragraph('go https://x.com')))))
fixture.set(doc)

const pos = findText(fixture.doc, 'https://x.com')
expect(marksAt(fixture.doc, pos + 1)).toEqual(['mdLinkText'])
})

it('drops mdLinkText when the TLD is edited to an uncommon one', () => {
using fixture = setupFixture()
const { n } = fixture
const doc = n.doc(n.paragraph('visit https://example.com now'))
fixture.set(doc)

const url = findText(fixture.doc, 'https://example.com')
expect(marksAt(fixture.doc, url + 1)).toEqual(['mdLinkText'])
const com = findText(fixture.doc, 'com')
fixture.view.dispatch(fixture.state.tr.insertText('zzz', com, com + 3))
const after = findText(fixture.doc, 'https://example.zzz')
expect(marksAt(fixture.doc, after + 1)).toEqual([])
})

it('adds mdLinkText when the TLD is edited to a common one', () => {
using fixture = setupFixture()
const { n } = fixture
const doc = n.doc(n.paragraph('visit https://example.zzz now'))
fixture.set(doc)

const url = findText(fixture.doc, 'https://example.zzz')
expect(marksAt(fixture.doc, url + 1)).toEqual([])
const zzz = findText(fixture.doc, 'zzz')
fixture.view.dispatch(fixture.state.tr.insertText('com', zzz, zzz + 3))
const after = findText(fixture.doc, 'https://example.com')
expect(marksAt(fixture.doc, after + 1)).toEqual(['mdLinkText'])
})

it('marks `*foo*` inside headings as well', () => {
using fixture = setupFixture()
const { n } = fixture
Expand Down
Loading
Loading