From 87529acec5bc186acac57f9eee7e1ad956da7bcc Mon Sep 17 00:00:00 2001 From: Johannes Stein Date: Mon, 1 Jun 2026 09:13:11 +0200 Subject: [PATCH 1/6] fix(intl): apply Collator comparison options --- source/shared/IntlICU.pas | 17 ++++++++++---- source/units/Goccia.Values.IntlCollator.pas | 3 ++- .../units/Goccia.Values.StringObjectValue.pas | 2 +- .../Intl/Collator/prototype/compare.js | 23 +++++++++++++++++++ 4 files changed, 39 insertions(+), 6 deletions(-) diff --git a/source/shared/IntlICU.pas b/source/shared/IntlICU.pas index b985c604..ea859968 100644 --- a/source/shared/IntlICU.pas +++ b/source/shared/IntlICU.pas @@ -19,7 +19,7 @@ function TryICUGetLocaleCollations(const ALocale: string; out ACollations: IntlTypes.TStringArray): Boolean; function TryICUCompareStrings(const ALocale: string; const AStr1, AStr2: UnicodeString; - ASensitivity: TIntlCollatorSensitivity; AIgnorePunctuation: Boolean; + ASensitivity: TIntlCollatorSensitivity; AIgnorePunctuation, ANumeric: Boolean; out AResult: Integer): Boolean; function TryICUFormatNumber(const ALocale: string; AValue: Double; @@ -188,11 +188,12 @@ implementation UCOL_TERTIARY = 2; UCOL_QUATERNARY = 3; UCOL_IDENTICAL = 15; - UCOL_STRENGTH = 2; + UCOL_STRENGTH = 5; UCOL_CASE_LEVEL = 3; UCOL_ALTERNATE_HANDLING = 1; + UCOL_NUMERIC_COLLATION = 7; UCOL_SHIFTED = 20; - UCOL_ON = 1; + UCOL_ON = 17; UCOL_LESS = -1; UCOL_EQUAL = 0; UCOL_GREATER = 1; @@ -1546,7 +1547,7 @@ function TryICUGetLocaleCollations(const ALocale: string; end; function TryICUCompareStrings(const ALocale: string; const AStr1, AStr2: UnicodeString; - ASensitivity: TIntlCollatorSensitivity; AIgnorePunctuation: Boolean; + ASensitivity: TIntlCollatorSensitivity; AIgnorePunctuation, ANumeric: Boolean; out AResult: Integer): Boolean; var Status: TICUErrorCode; @@ -1582,6 +1583,14 @@ function TryICUCompareStrings(const ALocale: string; const AStr1, AStr2: Unicode if not ICUSucceeded(Status) then Exit; + if ANumeric then + begin + Status := ICU_SUCCESS; + IntlFunctions.UcolSetAttribute(Collator, UCOL_NUMERIC_COLLATION, UCOL_ON, Status); + if not ICUSucceeded(Status) then + Exit; + end; + if ASensitivity = icsCase then begin Status := ICU_SUCCESS; diff --git a/source/units/Goccia.Values.IntlCollator.pas b/source/units/Goccia.Values.IntlCollator.pas index 1c0f9bf4..e561e748 100644 --- a/source/units/Goccia.Values.IntlCollator.pas +++ b/source/units/Goccia.Values.IntlCollator.pas @@ -247,7 +247,8 @@ function TGocciaIntlCollatorValue.IntlCollatorCompare( Str2 := UnicodeString(AArgs.GetElement(1).ToStringLiteral.Value); if TryICUCompareStrings(C.FLocale, Str1, Str2, - SensitivityStringToEnum(C.FSensitivity), C.FIgnorePunctuation, CompareResult) then + SensitivityStringToEnum(C.FSensitivity), C.FIgnorePunctuation, C.FNumeric, + CompareResult) then Result := TGocciaNumberLiteralValue.Create(CompareResult) else Result := TGocciaNumberLiteralValue.Create(CompareStr(string(Str1), string(Str2))); diff --git a/source/units/Goccia.Values.StringObjectValue.pas b/source/units/Goccia.Values.StringObjectValue.pas index 5af85438..5593becd 100644 --- a/source/units/Goccia.Values.StringObjectValue.pas +++ b/source/units/Goccia.Values.StringObjectValue.pas @@ -1951,7 +1951,7 @@ function TGocciaStringObjectValue.StringLocaleCompare(const AArgs: TGocciaArgume Locale := AArgs.GetElement(1).ToStringLiteral.Value; if TryICUCompareStrings(Locale, UnicodeString(StringValue), - UnicodeString(ThatString), icsVariant, False, ICUResult) then + UnicodeString(ThatString), icsVariant, False, False, ICUResult) then Result := TGocciaNumberLiteralValue.Create(ICUResult) else if StringValue < ThatString then Result := TGocciaNumberLiteralValue.Create(-1) diff --git a/tests/built-ins/Intl/Collator/prototype/compare.js b/tests/built-ins/Intl/Collator/prototype/compare.js index c5c180ab..7c8a1a1b 100644 --- a/tests/built-ins/Intl/Collator/prototype/compare.js +++ b/tests/built-ins/Intl/Collator/prototype/compare.js @@ -55,4 +55,27 @@ describe.runIf(isIntl)("Intl.Collator.prototype.compare", () => { expect(sorted[1]).toBe("banana"); expect(sorted[2]).toBe("cherry"); }); + + test("base sensitivity ignores case and accents", () => { + const collator = new Intl.Collator("en", { sensitivity: "base" }); + expect(collator.compare("a", "A")).toBe(0); + expect(collator.compare("a", "\u00e1")).toBe(0); + }); + + test("accent sensitivity ignores case but distinguishes accents", () => { + const collator = new Intl.Collator("en", { sensitivity: "accent" }); + expect(collator.compare("a", "A")).toBe(0); + expect(collator.compare("a", "\u00e1") === 0).toBe(false); + }); + + test("case sensitivity ignores accents but distinguishes case", () => { + const collator = new Intl.Collator("en", { sensitivity: "case" }); + expect(collator.compare("a", "\u00e1")).toBe(0); + expect(collator.compare("a", "A") === 0).toBe(false); + }); + + test("numeric collation compares decimal digit sequences by numeric value", () => { + const collator = new Intl.Collator("en", { numeric: true }); + expect(collator.compare("2", "10") < 0).toBe(true); + }); }); From b785c2d87127e48ccb035ce7e94979fbf3d8623d Mon Sep 17 00:00:00 2001 From: Johannes Stein Date: Mon, 1 Jun 2026 11:45:44 +0200 Subject: [PATCH 2/6] fix(intl): align localeCompare with Collator --- source/units/Goccia.Values.ArrayValue.pas | 130 ++++++++++++++---- source/units/Goccia.Values.IntlCollator.pas | 19 ++- .../units/Goccia.Values.StringObjectValue.pas | 19 ++- tests/built-ins/Array/prototype/sort.js | 13 ++ .../Intl/Collator/prototype/compare.js | 11 ++ .../String/prototype/localeCompare.js | 5 + 6 files changed, 156 insertions(+), 41 deletions(-) diff --git a/source/units/Goccia.Values.ArrayValue.pas b/source/units/Goccia.Values.ArrayValue.pas index ff079cb9..7cd3ed44 100644 --- a/source/units/Goccia.Values.ArrayValue.pas +++ b/source/units/Goccia.Values.ArrayValue.pas @@ -835,37 +835,119 @@ function CallCompareFunc(const ACompareFunc: TGocciaFunctionBase; const ACallArg Result := CompResult.Value; end; -procedure QuickSortElements(const AElements: TGocciaValueList; const ACompareFunc: TGocciaFunctionBase; - const ACallArgs: TGocciaArgumentsCollection; const AThisValue: TGocciaValue; const ALo, AHi: Integer); +procedure StableSortElements(const AElements: TGocciaValueList; const ACompareFunc: TGocciaFunctionBase; + const ACallArgs: TGocciaArgumentsCollection; const AThisValue: TGocciaValue); var - I, J: Integer; - Pivot: TGocciaValue; + Buffer: array of TGocciaValue; + + procedure SortRange(const ALo, AHi: Integer); + var + Mid, I, J, K: Integer; + begin + if AHi - ALo < 2 then + Exit; + + Mid := ALo + ((AHi - ALo) div 2); + SortRange(ALo, Mid); + SortRange(Mid, AHi); + + I := ALo; + J := Mid; + K := ALo; + while (I < Mid) and (J < AHi) do + begin + if CallCompareFunc(ACompareFunc, ACallArgs, AElements[I], AElements[J], AThisValue) <= 0 then + begin + Buffer[K] := AElements[I]; + Inc(I); + end + else + begin + Buffer[K] := AElements[J]; + Inc(J); + end; + Inc(K); + end; + while I < Mid do + begin + Buffer[K] := AElements[I]; + Inc(I); + Inc(K); + end; + while J < AHi do + begin + Buffer[K] := AElements[J]; + Inc(J); + Inc(K); + end; + + for K := ALo to AHi - 1 do + AElements[K] := Buffer[K]; + end; + begin - if ALo >= AHi then Exit; + if AElements.Count < 2 then + Exit; + + SetLength(Buffer, AElements.Count); + SortRange(0, AElements.Count); +end; - Pivot := AElements[(ALo + AHi) div 2]; - I := ALo; - J := AHi; +procedure StableSortElementsDefault(const AElements: TGocciaValueList); +var + Buffer: array of TGocciaValue; - while I <= J do + procedure SortRange(const ALo, AHi: Integer); + var + Mid, I, J, K: Integer; begin - while CallCompareFunc(ACompareFunc, ACallArgs, AElements[I], Pivot, AThisValue) < 0 do - Inc(I); - while CallCompareFunc(ACompareFunc, ACallArgs, AElements[J], Pivot, AThisValue) > 0 do - Dec(J); + if AHi - ALo < 2 then + Exit; - if I <= J then + Mid := ALo + ((AHi - ALo) div 2); + SortRange(ALo, Mid); + SortRange(Mid, AHi); + + I := ALo; + J := Mid; + K := ALo; + while (I < Mid) and (J < AHi) do begin - AElements.Exchange(I, J); + if DefaultCompare(AElements[I], AElements[J]) <= 0 then + begin + Buffer[K] := AElements[I]; + Inc(I); + end + else + begin + Buffer[K] := AElements[J]; + Inc(J); + end; + Inc(K); + end; + while I < Mid do + begin + Buffer[K] := AElements[I]; Inc(I); - Dec(J); + Inc(K); end; + while J < AHi do + begin + Buffer[K] := AElements[J]; + Inc(J); + Inc(K); + end; + + for K := ALo to AHi - 1 do + AElements[K] := Buffer[K]; end; - if ALo < J then - QuickSortElements(AElements, ACompareFunc, ACallArgs, AThisValue, ALo, J); - if I < AHi then - QuickSortElements(AElements, ACompareFunc, ACallArgs, AThisValue, I, AHi); +begin + if AElements.Count < 2 then + Exit; + + SetLength(Buffer, AElements.Count); + SortRange(0, AElements.Count); end; constructor TGocciaArrayValue.Create(const AClass: TGocciaClassValue = nil; @@ -2757,12 +2839,12 @@ function TGocciaArrayValue.ArrayToSorted(const AArgs: TGocciaArgumentsCollection begin CallArgs := TGocciaArgumentsCollection.Create([nil, nil]); try - QuickSortElements(ResultArray.Elements, TGocciaFunctionBase(CustomSortFunction), CallArgs, AThisValue, 0, ResultArray.Elements.Count - 1); + StableSortElements(ResultArray.Elements, TGocciaFunctionBase(CustomSortFunction), CallArgs, AThisValue); finally CallArgs.Free; end; end else - ResultArray.Elements.Sort(TComparer.Construct(DefaultCompare)); + StableSortElementsDefault(ResultArray.Elements); // Step 7: Return A Result := ResultArray; @@ -3269,12 +3351,12 @@ function TGocciaArrayValue.ArraySort(const AArgs: TGocciaArgumentsCollection; co CallArgs := TGocciaArgumentsCollection.Create([nil, nil]); try if TempArr.Elements.Count > 1 then - QuickSortElements(TempArr.Elements, TGocciaFunctionBase(CustomSortFunction), CallArgs, AThisValue, 0, TempArr.Elements.Count - 1); + StableSortElements(TempArr.Elements, TGocciaFunctionBase(CustomSortFunction), CallArgs, AThisValue); finally CallArgs.Free; end; end else if TempArr.Elements.Count > 1 then - TempArr.Elements.Sort(TComparer.Construct(DefaultCompare)); + StableSortElementsDefault(TempArr.Elements); // Write sorted elements back to front indices for I := 0 to TempArr.Elements.Count - 1 do diff --git a/source/units/Goccia.Values.IntlCollator.pas b/source/units/Goccia.Values.IntlCollator.pas index e561e748..1fe70d17 100644 --- a/source/units/Goccia.Values.IntlCollator.pas +++ b/source/units/Goccia.Values.IntlCollator.pas @@ -15,6 +15,7 @@ interface TGocciaIntlCollatorValue = class(TGocciaObjectValue) private FLocale: string; + FICULocale: string; FSensitivity: string; FUsage: string; FIgnorePunctuation: Boolean; @@ -30,6 +31,7 @@ TGocciaIntlCollatorValue = class(TGocciaObjectValue) function ToStringTag: string; override; procedure MarkReferences; override; class procedure ExposePrototype(const AConstructor: TGocciaObjectValue); + function CompareStrings(const AString1, AString2: string): Integer; published function IntlCollatorCompareGetter(const AArgs: TGocciaArgumentsCollection; const AThisValue: TGocciaValue): TGocciaValue; function IntlCollatorCompare(const AArgs: TGocciaArgumentsCollection; const AThisValue: TGocciaValue): TGocciaValue; @@ -135,6 +137,7 @@ constructor TGocciaIntlCollatorValue.Create(const ALocale: string; const AOption FLocale := DefaultLocale else FLocale := Canonical; + FICULocale := LocaleWithoutUnicodeExtension(FLocale); // Defaults FSensitivity := 'variant'; @@ -239,19 +242,21 @@ function TGocciaIntlCollatorValue.IntlCollatorCompare( var C: TGocciaIntlCollatorValue; Str1, Str2: UnicodeString; - CompareResult: Integer; begin C := AsCollator(AThisValue, 'Intl.Collator.prototype.compare'); // Per ECMA-402, missing arguments are ToString-coerced (undefined -> "undefined"). Str1 := UnicodeString(AArgs.GetElement(0).ToStringLiteral.Value); Str2 := UnicodeString(AArgs.GetElement(1).ToStringLiteral.Value); - if TryICUCompareStrings(C.FLocale, Str1, Str2, - SensitivityStringToEnum(C.FSensitivity), C.FIgnorePunctuation, C.FNumeric, - CompareResult) then - Result := TGocciaNumberLiteralValue.Create(CompareResult) - else - Result := TGocciaNumberLiteralValue.Create(CompareStr(string(Str1), string(Str2))); + Result := TGocciaNumberLiteralValue.Create(C.CompareStrings(string(Str1), string(Str2))); +end; + +function TGocciaIntlCollatorValue.CompareStrings(const AString1, AString2: string): Integer; +begin + if TryICUCompareStrings(FICULocale, UnicodeString(AString1), UnicodeString(AString2), + SensitivityStringToEnum(FSensitivity), FIgnorePunctuation, FNumeric, Result) then + Exit; + Result := CompareStr(AString1, AString2); end; function TGocciaIntlCollatorValue.IntlCollatorResolvedOptions(const AArgs: TGocciaArgumentsCollection; const AThisValue: TGocciaValue): TGocciaValue; diff --git a/source/units/Goccia.Values.StringObjectValue.pas b/source/units/Goccia.Values.StringObjectValue.pas index 5593becd..121fece9 100644 --- a/source/units/Goccia.Values.StringObjectValue.pas +++ b/source/units/Goccia.Values.StringObjectValue.pas @@ -103,6 +103,7 @@ implementation Goccia.Utils, Goccia.Values.ArrayValue, Goccia.Values.ErrorHelper, + Goccia.Values.IntlCollator, Goccia.Values.Iterator.Concrete, Goccia.Values.Iterator.RegExp, Goccia.Values.ProxyValue, @@ -1937,7 +1938,8 @@ function TGocciaStringObjectValue.StringCodePointAt(const AArgs: TGocciaArgument function TGocciaStringObjectValue.StringLocaleCompare(const AArgs: TGocciaArgumentsCollection; const AThisValue: TGocciaValue): TGocciaValue; var StringValue, ThatString, Locale: string; - ICUResult: Integer; + Options: TGocciaObjectValue; + Collator: TGocciaIntlCollatorValue; begin StringValue := ExtractStringValue(AThisValue); @@ -1950,15 +1952,12 @@ function TGocciaStringObjectValue.StringLocaleCompare(const AArgs: TGocciaArgume if (AArgs.Length > 1) and not (AArgs.GetElement(1) is TGocciaUndefinedLiteralValue) then Locale := AArgs.GetElement(1).ToStringLiteral.Value; - if TryICUCompareStrings(Locale, UnicodeString(StringValue), - UnicodeString(ThatString), icsVariant, False, False, ICUResult) then - Result := TGocciaNumberLiteralValue.Create(ICUResult) - else if StringValue < ThatString then - Result := TGocciaNumberLiteralValue.Create(-1) - else if StringValue > ThatString then - Result := TGocciaNumberLiteralValue.Create(1) - else - Result := TGocciaNumberLiteralValue.Create(0); + Options := nil; + if (AArgs.Length > 2) and (AArgs.GetElement(2) is TGocciaObjectValue) then + Options := TGocciaObjectValue(AArgs.GetElement(2)); + + Collator := TGocciaIntlCollatorValue.Create(Locale, Options); + Result := TGocciaNumberLiteralValue.Create(Collator.CompareStrings(StringValue, ThatString)); end; // ES2026 ยง22.1.3.13 String.prototype.normalize([form]) diff --git a/tests/built-ins/Array/prototype/sort.js b/tests/built-ins/Array/prototype/sort.js index aea61407..e646d5b3 100644 --- a/tests/built-ins/Array/prototype/sort.js +++ b/tests/built-ins/Array/prototype/sort.js @@ -84,6 +84,19 @@ describe("Array.prototype.sort", () => { expect(arr).toEqual([1, 1, 1]); }); + test("sort is stable when comparator returns zero", () => { + const arr = [ + { group: 1, id: "a" }, + { group: 1, id: "b" }, + { group: 0, id: "c" }, + { group: 1, id: "d" }, + ]; + + arr.sort((a, b) => a.group - b.group); + + expect(arr.map((item) => item.id).join("")).toBe("cabd"); + }); + test("default sort converts to string comparison", () => { const arr = [80, 9, 700, 40, 1, 5, 200]; arr.sort(); diff --git a/tests/built-ins/Intl/Collator/prototype/compare.js b/tests/built-ins/Intl/Collator/prototype/compare.js index 7c8a1a1b..70cdbff9 100644 --- a/tests/built-ins/Intl/Collator/prototype/compare.js +++ b/tests/built-ins/Intl/Collator/prototype/compare.js @@ -78,4 +78,15 @@ describe.runIf(isIntl)("Intl.Collator.prototype.compare", () => { const collator = new Intl.Collator("en", { numeric: true }); expect(collator.compare("2", "10") < 0).toBe(true); }); + + test("ignored Unicode extension values do not affect comparison", () => { + const values = ["\u212b", "\u00c5", "A\u030a", "hello"]; + const defaultCollator = new Intl.Collator(); + const locale = defaultCollator.resolvedOptions().locale; + const ignoredExtensionCollator = new Intl.Collator(locale + "-u-co-search"); + + expect(values.slice().sort(ignoredExtensionCollator.compare).join("|")).toBe( + values.slice().sort(defaultCollator.compare).join("|"), + ); + }); }); diff --git a/tests/built-ins/String/prototype/localeCompare.js b/tests/built-ins/String/prototype/localeCompare.js index 561c6535..f343fc52 100644 --- a/tests/built-ins/String/prototype/localeCompare.js +++ b/tests/built-ins/String/prototype/localeCompare.js @@ -23,4 +23,9 @@ describe("String.prototype.localeCompare", () => { const result = "A".localeCompare("a"); expect(result !== 0).toBe(true); }); + + test("uses Intl.Collator options", () => { + expect("A".localeCompare("a", "en", { sensitivity: "base" })).toBe(0); + expect("2".localeCompare("10", "en", { numeric: true }) < 0).toBe(true); + }); }); From e48cae33283f7fba4c03b7d77232ff5eb704dd1b Mon Sep 17 00:00:00 2001 From: Johannes Stein Date: Mon, 1 Jun 2026 12:52:16 +0200 Subject: [PATCH 3/6] fix(intl): resolve Collator Unicode extensions --- source/shared/IntlICU.pas | 30 +- source/units/Goccia.Values.IntlCollator.pas | 266 +++++++++++++++++- tests/built-ins/Intl/Collator/constructor.js | 18 ++ .../Intl/Collator/prototype/compare.js | 5 + 4 files changed, 311 insertions(+), 8 deletions(-) diff --git a/source/shared/IntlICU.pas b/source/shared/IntlICU.pas index ea859968..a2447313 100644 --- a/source/shared/IntlICU.pas +++ b/source/shared/IntlICU.pas @@ -20,7 +20,7 @@ function TryICUGetLocaleCollations(const ALocale: string; function TryICUCompareStrings(const ALocale: string; const AStr1, AStr2: UnicodeString; ASensitivity: TIntlCollatorSensitivity; AIgnorePunctuation, ANumeric: Boolean; - out AResult: Integer): Boolean; + const ACaseFirst: string; out AResult: Integer): Boolean; function TryICUFormatNumber(const ALocale: string; AValue: Double; const AOptions: TIntlNumberFormatOptions; out AFormatted: string): Boolean; @@ -189,11 +189,15 @@ implementation UCOL_QUATERNARY = 3; UCOL_IDENTICAL = 15; UCOL_STRENGTH = 5; + UCOL_CASE_FIRST = 2; UCOL_CASE_LEVEL = 3; UCOL_ALTERNATE_HANDLING = 1; UCOL_NUMERIC_COLLATION = 7; + UCOL_OFF = 16; UCOL_SHIFTED = 20; UCOL_ON = 17; + UCOL_LOWER_FIRST = 24; + UCOL_UPPER_FIRST = 25; UCOL_LESS = -1; UCOL_EQUAL = 0; UCOL_GREATER = 1; @@ -1548,7 +1552,7 @@ function TryICUGetLocaleCollations(const ALocale: string; function TryICUCompareStrings(const ALocale: string; const AStr1, AStr2: UnicodeString; ASensitivity: TIntlCollatorSensitivity; AIgnorePunctuation, ANumeric: Boolean; - out AResult: Integer): Boolean; + const ACaseFirst: string; out AResult: Integer): Boolean; var Status: TICUErrorCode; Collator: Pointer; @@ -1591,6 +1595,28 @@ function TryICUCompareStrings(const ALocale: string; const AStr1, AStr2: Unicode Exit; end; + if ACaseFirst = 'upper' then + begin + Status := ICU_SUCCESS; + IntlFunctions.UcolSetAttribute(Collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, Status); + if not ICUSucceeded(Status) then + Exit; + end + else if ACaseFirst = 'lower' then + begin + Status := ICU_SUCCESS; + IntlFunctions.UcolSetAttribute(Collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, Status); + if not ICUSucceeded(Status) then + Exit; + end + else if ACaseFirst = 'false' then + begin + Status := ICU_SUCCESS; + IntlFunctions.UcolSetAttribute(Collator, UCOL_CASE_FIRST, UCOL_OFF, Status); + if not ICUSucceeded(Status) then + Exit; + end; + if ASensitivity = icsCase then begin Status := ICU_SUCCESS; diff --git a/source/units/Goccia.Values.IntlCollator.pas b/source/units/Goccia.Values.IntlCollator.pas index 1fe70d17..6fe37df1 100644 --- a/source/units/Goccia.Values.IntlCollator.pas +++ b/source/units/Goccia.Values.IntlCollator.pas @@ -123,21 +123,187 @@ function SensitivityStringToEnum(const AValue: string): TIntlCollatorSensitivity Result := icsVariant; end; +function IsValidCaseFirstValue(const AValue: string): Boolean; +begin + Result := (AValue = 'upper') or (AValue = 'lower') or (AValue = 'false'); +end; + +function NormalizeCollationValue(const AValue: string): string; +begin + if AValue = 'phonebook' then + Result := 'phonebk' + else if AValue = 'traditional' then + Result := 'trad' + else if AValue = 'dictionary' then + Result := 'dict' + else + Result := AValue; +end; + +function IsSupportedCollationValue(const ALocale, AValue: string): Boolean; +const + SupportedCollations: array[0..16] of string = ( + 'big5han', 'compat', 'dict', 'direct', 'ducet', 'emoji', 'eor', + 'gb2312', 'phonebk', 'phonetic', 'pinyin', 'reformed', 'searchjl', + 'stroke', 'trad', 'unihan', 'zhuyin'); +var + Collations: IntlTypes.TStringArray; + I: Integer; +begin + Result := False; + if (AValue = '') or (AValue = 'standard') or (AValue = 'search') then + Exit; + + if TryICUGetLocaleCollations(LocaleWithoutUnicodeExtension(ALocale), Collations) then + begin + for I := 0 to High(Collations) do + begin + if SameText(NormalizeCollationValue(Collations[I]), AValue) then + begin + Result := True; + Exit; + end; + end; + Exit; + end; + + for I := Low(SupportedCollations) to High(SupportedCollations) do + begin + if AValue = SupportedCollations[I] then + begin + Result := True; + Exit; + end; + end; +end; + +function TryGetUnicodeExtensionKey(const ALocale, AKey: string; out AValue: string): Boolean; +var + ExtensionStart, Index, NextDash: Integer; + Tail, Subtag, KeyLower: string; +begin + Result := False; + AValue := ''; + KeyLower := LowerCase(AKey); + ExtensionStart := Pos('-u-', LowerCase(ALocale)); + if ExtensionStart = 0 then + Exit; + + Tail := Copy(ALocale, ExtensionStart + 3, MaxInt); + Index := 1; + while Index <= Length(Tail) do + begin + NextDash := Pos('-', Copy(Tail, Index, MaxInt)); + if NextDash = 0 then + NextDash := Length(Tail) + 1 + else + NextDash := Index + NextDash - 1; + Subtag := LowerCase(Copy(Tail, Index, NextDash - Index)); + Index := NextDash + 1; + + if Length(Subtag) <> 2 then + Continue; + + if Subtag = KeyLower then + begin + Result := True; + while Index <= Length(Tail) do + begin + NextDash := Pos('-', Copy(Tail, Index, MaxInt)); + if NextDash = 0 then + NextDash := Length(Tail) + 1 + else + NextDash := Index + NextDash - 1; + Subtag := LowerCase(Copy(Tail, Index, NextDash - Index)); + if Length(Subtag) = 2 then + Exit; + if AValue = '' then + AValue := Subtag + else + AValue := AValue + '-' + Subtag; + Index := NextDash + 1; + end; + Exit; + end; + end; +end; + +function RemoveUnicodeExtensionKey(const ALocale, AKey: string): string; +var + ExtensionStart, Index, NextDash: Integer; + Base, Tail, Subtag, KeyLower, NewTail: string; + Removing: Boolean; +begin + KeyLower := LowerCase(AKey); + ExtensionStart := Pos('-u-', LowerCase(ALocale)); + if ExtensionStart = 0 then + begin + Result := ALocale; + Exit; + end; + + Base := Copy(ALocale, 1, ExtensionStart - 1); + Tail := Copy(ALocale, ExtensionStart + 3, MaxInt); + NewTail := ''; + Removing := False; + Index := 1; + while Index <= Length(Tail) do + begin + NextDash := Pos('-', Copy(Tail, Index, MaxInt)); + if NextDash = 0 then + NextDash := Length(Tail) + 1 + else + NextDash := Index + NextDash - 1; + Subtag := Copy(Tail, Index, NextDash - Index); + Index := NextDash + 1; + + if Length(Subtag) = 2 then + Removing := LowerCase(Subtag) = KeyLower; + if Removing then + Continue; + if NewTail <> '' then + NewTail := NewTail + '-'; + NewTail := NewTail + Subtag; + end; + + if NewTail = '' then + Result := Base + else + Result := Base + '-u-' + NewTail; +end; + +function AddUnicodeExtensionKey(const ALocale, AKey, AValue: string): string; +var + ExtensionStart: Integer; + Addition: string; +begin + Addition := AKey; + if AValue <> '' then + Addition := Addition + '-' + AValue; + + ExtensionStart := Pos('-u-', LowerCase(ALocale)); + if ExtensionStart = 0 then + Result := ALocale + '-u-' + Addition + else + Result := ALocale + '-' + Addition; +end; + { TGocciaIntlCollatorValue } constructor TGocciaIntlCollatorValue.Create(const ALocale: string; const AOptions: TGocciaObjectValue); var - Canonical: string; + Canonical, RawLocale: string; V: TGocciaValue; - Ignored: string; + Ignored, LocaleNumeric, LocaleCaseFirst, LocaleCollation, CollationOption: string; + NumericOptionPresent, CaseFirstOptionPresent, CollationOptionPresent: Boolean; begin inherited Create; + RawLocale := ALocale; Canonical := CanonicalizeUnicodeLocaleId(ALocale); if Canonical = '' then FLocale := DefaultLocale else FLocale := Canonical; - FICULocale := LocaleWithoutUnicodeExtension(FLocale); // Defaults FSensitivity := 'variant'; @@ -146,6 +312,61 @@ constructor TGocciaIntlCollatorValue.Create(const ALocale: string; const AOption FNumeric := False; FCaseFirst := 'false'; FCollation := 'default'; + NumericOptionPresent := False; + CaseFirstOptionPresent := False; + CollationOptionPresent := False; + + if TryGetUnicodeExtensionKey(FLocale, 'kn', LocaleNumeric) or + TryGetUnicodeExtensionKey(RawLocale, 'kn', LocaleNumeric) then + begin + if (LocaleNumeric = '') or (LocaleNumeric = 'true') then + begin + FNumeric := True + end + else if LocaleNumeric = 'false' then + FNumeric := False + else + FLocale := RemoveUnicodeExtensionKey(FLocale, 'kn'); + if not TryGetUnicodeExtensionKey(FLocale, 'kn', Ignored) and + ((LocaleNumeric = '') or (LocaleNumeric = 'true') or (LocaleNumeric = 'false')) then + begin + if LocaleNumeric = 'true' then + FLocale := AddUnicodeExtensionKey(FLocale, 'kn', '') + else + FLocale := AddUnicodeExtensionKey(FLocale, 'kn', LocaleNumeric); + end; + end; + + if TryGetUnicodeExtensionKey(FLocale, 'kf', LocaleCaseFirst) or + TryGetUnicodeExtensionKey(RawLocale, 'kf', LocaleCaseFirst) then + begin + if IsValidCaseFirstValue(LocaleCaseFirst) then + begin + FCaseFirst := LocaleCaseFirst + end + else + FLocale := RemoveUnicodeExtensionKey(FLocale, 'kf'); + if not TryGetUnicodeExtensionKey(FLocale, 'kf', Ignored) and + IsValidCaseFirstValue(LocaleCaseFirst) then + FLocale := AddUnicodeExtensionKey(FLocale, 'kf', LocaleCaseFirst); + end; + + if TryGetUnicodeExtensionKey(FLocale, 'co', LocaleCollation) or + TryGetUnicodeExtensionKey(RawLocale, 'co', LocaleCollation) then + begin + LocaleCollation := NormalizeCollationValue(LocaleCollation); + if IsSupportedCollationValue(FLocale, LocaleCollation) then + begin + FCollation := LocaleCollation; + if TryGetUnicodeExtensionKey(FLocale, 'co', Ignored) and + (NormalizeCollationValue(Ignored) <> LocaleCollation) then + FLocale := RemoveUnicodeExtensionKey(FLocale, 'co'); + if not TryGetUnicodeExtensionKey(FLocale, 'co', Ignored) then + FLocale := AddUnicodeExtensionKey(FLocale, 'co', LocaleCollation); + end + else + FLocale := RemoveUnicodeExtensionKey(FLocale, 'co'); + end; if Assigned(AOptions) then begin @@ -156,12 +377,44 @@ constructor TGocciaIntlCollatorValue.Create(const ALocale: string; const AOption FIgnorePunctuation := V.ToBooleanLiteral.Value; V := AOptions.GetProperty('numeric'); if Assigned(V) and not (V is TGocciaUndefinedLiteralValue) then + begin + NumericOptionPresent := True; FNumeric := V.ToBooleanLiteral.Value; - ReadValidatedStringOption(AOptions, 'caseFirst', FCaseFirst); - TryReadStringOption(AOptions, 'collation', FCollation); + end; + CaseFirstOptionPresent := TryReadStringOption(AOptions, 'caseFirst', Ignored); + if CaseFirstOptionPresent then + begin + if ContainsNulCharacter(Ignored) then + ThrowRangeError(Format(SErrorIntlInvalidOption, [Ignored, 'caseFirst'])); + FCaseFirst := Ignored; + end; + CollationOptionPresent := TryReadStringOption(AOptions, 'collation', CollationOption); + CollationOption := NormalizeCollationValue(CollationOption); + if CollationOptionPresent and IsSupportedCollationValue(FLocale, CollationOption) then + FCollation := CollationOption; ReadValidatedStringOption(AOptions, 'localeMatcher', Ignored); end; + if NumericOptionPresent and TryGetUnicodeExtensionKey(FLocale, 'kn', LocaleNumeric) then + begin + if ((LocaleNumeric = '') or (LocaleNumeric = 'true')) <> FNumeric then + FLocale := RemoveUnicodeExtensionKey(FLocale, 'kn'); + end; + if CaseFirstOptionPresent and TryGetUnicodeExtensionKey(FLocale, 'kf', LocaleCaseFirst) then + begin + if LocaleCaseFirst <> FCaseFirst then + FLocale := RemoveUnicodeExtensionKey(FLocale, 'kf'); + end; + if CollationOptionPresent and TryGetUnicodeExtensionKey(FLocale, 'co', LocaleCollation) then + begin + if NormalizeCollationValue(LocaleCollation) <> FCollation then + FLocale := RemoveUnicodeExtensionKey(FLocale, 'co'); + end; + + FICULocale := LocaleWithoutUnicodeExtension(FLocale); + if FCollation <> 'default' then + FICULocale := AddUnicodeExtensionKey(FICULocale, 'co', FCollation); + InitializePrototype; if Assigned(GetIntlCollatorShared) then FPrototype := GetIntlCollatorShared.Prototype; @@ -254,7 +507,8 @@ function TGocciaIntlCollatorValue.IntlCollatorCompare( function TGocciaIntlCollatorValue.CompareStrings(const AString1, AString2: string): Integer; begin if TryICUCompareStrings(FICULocale, UnicodeString(AString1), UnicodeString(AString2), - SensitivityStringToEnum(FSensitivity), FIgnorePunctuation, FNumeric, Result) then + SensitivityStringToEnum(FSensitivity), FIgnorePunctuation, FNumeric, + FCaseFirst, Result) then Exit; Result := CompareStr(AString1, AString2); end; diff --git a/tests/built-ins/Intl/Collator/constructor.js b/tests/built-ins/Intl/Collator/constructor.js index 48c08d1f..c735da65 100644 --- a/tests/built-ins/Intl/Collator/constructor.js +++ b/tests/built-ins/Intl/Collator/constructor.js @@ -26,4 +26,22 @@ describe.runIf(isIntl)("Intl.Collator constructor", () => { const options = collator.resolvedOptions(); expect(typeof options.locale).toBe("string"); }); + + test("resolves supported Unicode extension keys", () => { + const numeric = new Intl.Collator("en-u-kn-true").resolvedOptions(); + expect(numeric.locale).toBe("en-u-kn"); + expect(numeric.numeric).toBe(true); + + const numericOverride = new Intl.Collator("en-u-kn-false", { numeric: true }).resolvedOptions(); + expect(numericOverride.locale).toBe("en"); + expect(numericOverride.numeric).toBe(true); + + const caseFirst = new Intl.Collator("en-u-kf-lower").resolvedOptions(); + expect(caseFirst.locale).toBe("en-u-kf-lower"); + expect(caseFirst.caseFirst).toBe("lower"); + + const collation = new Intl.Collator("de-u-co-phonebk").resolvedOptions(); + expect(collation.locale).toBe("de-u-co-phonebk"); + expect(collation.collation).toBe("phonebk"); + }); }); diff --git a/tests/built-ins/Intl/Collator/prototype/compare.js b/tests/built-ins/Intl/Collator/prototype/compare.js index 70cdbff9..009fac03 100644 --- a/tests/built-ins/Intl/Collator/prototype/compare.js +++ b/tests/built-ins/Intl/Collator/prototype/compare.js @@ -79,6 +79,11 @@ describe.runIf(isIntl)("Intl.Collator.prototype.compare", () => { expect(collator.compare("2", "10") < 0).toBe(true); }); + test("numeric Unicode extension compares decimal digit sequences by numeric value", () => { + const collator = new Intl.Collator("en-u-kn-true"); + expect(collator.compare("2", "10") < 0).toBe(true); + }); + test("ignored Unicode extension values do not affect comparison", () => { const values = ["\u212b", "\u00c5", "A\u030a", "hello"]; const defaultCollator = new Intl.Collator(); From dddd46af01b5b69d281aa8134b0db143195e5f67 Mon Sep 17 00:00:00 2001 From: Johannes Stein Date: Mon, 1 Jun 2026 13:53:45 +0200 Subject: [PATCH 4/6] fix(intl): align Collator collation edge cases --- source/units/Goccia.Builtins.Intl.pas | 7 - source/units/Goccia.Values.IntlCollator.pas | 195 ++++++++++++++----- tests/built-ins/Intl/Collator/constructor.js | 5 + tests/built-ins/Intl/supportedValuesOf.js | 27 +++ 4 files changed, 180 insertions(+), 54 deletions(-) diff --git a/source/units/Goccia.Builtins.Intl.pas b/source/units/Goccia.Builtins.Intl.pas index bc51e7f2..8b85c11e 100644 --- a/source/units/Goccia.Builtins.Intl.pas +++ b/source/units/Goccia.Builtins.Intl.pas @@ -213,21 +213,14 @@ function TGocciaIntlBuiltin.SupportedValuesOf(const AArgs: TGocciaArgumentsColle end else if Key = 'collation' then begin - AddString('big5han'); AddString('compat'); AddString('dict'); - AddString('direct'); - AddString('ducet'); AddString('emoji'); AddString('eor'); - AddString('gb2312'); AddString('phonebk'); AddString('phonetic'); AddString('pinyin'); - AddString('reformed'); - AddString('search'); AddString('searchjl'); - AddString('standard'); AddString('stroke'); AddString('trad'); AddString('unihan'); diff --git a/source/units/Goccia.Values.IntlCollator.pas b/source/units/Goccia.Values.IntlCollator.pas index 6fe37df1..5979b1f5 100644 --- a/source/units/Goccia.Values.IntlCollator.pas +++ b/source/units/Goccia.Values.IntlCollator.pas @@ -140,6 +140,102 @@ function NormalizeCollationValue(const AValue: string): string; Result := AValue; end; +function FindSingletonExtensionStart(const ALocale, ASingleton: string; out AStart: Integer): Boolean; +var + Index, NextDash, SubtagStart: Integer; + Subtag, SingletonLower: string; +begin + Result := False; + AStart := 0; + SingletonLower := LowerCase(ASingleton); + Index := 1; + while Index <= Length(ALocale) do + begin + SubtagStart := Index; + NextDash := Pos('-', Copy(ALocale, Index, MaxInt)); + if NextDash = 0 then + NextDash := Length(ALocale) + 1 + else + NextDash := Index + NextDash - 1; + Subtag := LowerCase(Copy(ALocale, Index, NextDash - Index)); + + if Length(Subtag) = 1 then + begin + if Subtag = SingletonLower then + begin + AStart := SubtagStart - 1; + Result := True; + Exit; + end; + if Subtag = 'x' then + Exit; + end; + + Index := NextDash + 1; + end; +end; + +function FindUnicodeExtensionRangeFrom(const ALocale: string; ASearchStart: Integer; + out AStart, AEnd: Integer): Boolean; +var + Index, NextDash, SubtagStart: Integer; + Subtag: string; +begin + Result := False; + AStart := 0; + AEnd := 0; + Index := ASearchStart; + if Index < 1 then + Index := 1; + while Index <= Length(ALocale) do + begin + SubtagStart := Index; + NextDash := Pos('-', Copy(ALocale, Index, MaxInt)); + if NextDash = 0 then + NextDash := Length(ALocale) + 1 + else + NextDash := Index + NextDash - 1; + Subtag := LowerCase(Copy(ALocale, Index, NextDash - Index)); + + if Length(Subtag) = 1 then + begin + if Subtag = 'x' then + Exit; + if Subtag = 'u' then + begin + AStart := SubtagStart - 1; + AEnd := Length(ALocale) + 1; + Index := NextDash + 1; + while Index <= Length(ALocale) do + begin + SubtagStart := Index; + NextDash := Pos('-', Copy(ALocale, Index, MaxInt)); + if NextDash = 0 then + NextDash := Length(ALocale) + 1 + else + NextDash := Index + NextDash - 1; + Subtag := LowerCase(Copy(ALocale, Index, NextDash - Index)); + if Length(Subtag) = 1 then + begin + AEnd := SubtagStart - 1; + Break; + end; + Index := NextDash + 1; + end; + Result := True; + Exit; + end; + end; + + Index := NextDash + 1; + end; +end; + +function FindUnicodeExtensionRange(const ALocale: string; out AStart, AEnd: Integer): Boolean; +begin + Result := FindUnicodeExtensionRangeFrom(ALocale, 1, AStart, AEnd); +end; + function IsSupportedCollationValue(const ALocale, AValue: string): Boolean; const SupportedCollations: array[0..16] of string = ( @@ -179,71 +275,73 @@ function IsSupportedCollationValue(const ALocale, AValue: string): Boolean; function TryGetUnicodeExtensionKey(const ALocale, AKey: string; out AValue: string): Boolean; var - ExtensionStart, Index, NextDash: Integer; + SearchStart, ExtensionStart, ExtensionEnd, Index, NextDash: Integer; Tail, Subtag, KeyLower: string; begin Result := False; AValue := ''; KeyLower := LowerCase(AKey); - ExtensionStart := Pos('-u-', LowerCase(ALocale)); - if ExtensionStart = 0 then - Exit; - - Tail := Copy(ALocale, ExtensionStart + 3, MaxInt); - Index := 1; - while Index <= Length(Tail) do + SearchStart := 1; + while FindUnicodeExtensionRangeFrom(ALocale, SearchStart, ExtensionStart, ExtensionEnd) do begin - NextDash := Pos('-', Copy(Tail, Index, MaxInt)); - if NextDash = 0 then - NextDash := Length(Tail) + 1 - else - NextDash := Index + NextDash - 1; - Subtag := LowerCase(Copy(Tail, Index, NextDash - Index)); - Index := NextDash + 1; + Tail := Copy(ALocale, ExtensionStart + 3, ExtensionEnd - ExtensionStart - 3); + Index := 1; + while Index <= Length(Tail) do + begin + NextDash := Pos('-', Copy(Tail, Index, MaxInt)); + if NextDash = 0 then + NextDash := Length(Tail) + 1 + else + NextDash := Index + NextDash - 1; + Subtag := LowerCase(Copy(Tail, Index, NextDash - Index)); + Index := NextDash + 1; - if Length(Subtag) <> 2 then - Continue; + if Length(Subtag) <> 2 then + Continue; - if Subtag = KeyLower then - begin - Result := True; - while Index <= Length(Tail) do + if Subtag = KeyLower then begin - NextDash := Pos('-', Copy(Tail, Index, MaxInt)); - if NextDash = 0 then - NextDash := Length(Tail) + 1 - else - NextDash := Index + NextDash - 1; - Subtag := LowerCase(Copy(Tail, Index, NextDash - Index)); - if Length(Subtag) = 2 then - Exit; - if AValue = '' then - AValue := Subtag - else - AValue := AValue + '-' + Subtag; - Index := NextDash + 1; + Result := True; + while Index <= Length(Tail) do + begin + NextDash := Pos('-', Copy(Tail, Index, MaxInt)); + if NextDash = 0 then + NextDash := Length(Tail) + 1 + else + NextDash := Index + NextDash - 1; + Subtag := LowerCase(Copy(Tail, Index, NextDash - Index)); + if Length(Subtag) = 2 then + Exit; + if AValue = '' then + AValue := Subtag + else + AValue := AValue + '-' + Subtag; + Index := NextDash + 1; + end; + Exit; end; - Exit; end; + + SearchStart := ExtensionEnd + 1; end; end; function RemoveUnicodeExtensionKey(const ALocale, AKey: string): string; var - ExtensionStart, Index, NextDash: Integer; - Base, Tail, Subtag, KeyLower, NewTail: string; + ExtensionStart, ExtensionEnd, Index, NextDash: Integer; + Base, Tail, Suffix, Subtag, KeyLower, NewTail: string; Removing: Boolean; begin KeyLower := LowerCase(AKey); - ExtensionStart := Pos('-u-', LowerCase(ALocale)); - if ExtensionStart = 0 then + if not FindUnicodeExtensionRange(ALocale, ExtensionStart, ExtensionEnd) then begin Result := ALocale; Exit; end; Base := Copy(ALocale, 1, ExtensionStart - 1); - Tail := Copy(ALocale, ExtensionStart + 3, MaxInt); + Tail := Copy(ALocale, ExtensionStart + 3, ExtensionEnd - ExtensionStart - 3); + Suffix := Copy(ALocale, ExtensionEnd, MaxInt); NewTail := ''; Removing := False; Index := 1; @@ -267,25 +365,28 @@ function RemoveUnicodeExtensionKey(const ALocale, AKey: string): string; end; if NewTail = '' then - Result := Base + Result := Base + Suffix else - Result := Base + '-u-' + NewTail; + Result := Base + '-u-' + NewTail + Suffix; end; function AddUnicodeExtensionKey(const ALocale, AKey, AValue: string): string; var - ExtensionStart: Integer; + ExtensionStart, ExtensionEnd, PrivateUseStart: Integer; Addition: string; begin Addition := AKey; if AValue <> '' then Addition := Addition + '-' + AValue; - ExtensionStart := Pos('-u-', LowerCase(ALocale)); - if ExtensionStart = 0 then - Result := ALocale + '-u-' + Addition + if FindUnicodeExtensionRange(ALocale, ExtensionStart, ExtensionEnd) then + Result := Copy(ALocale, 1, ExtensionEnd - 1) + '-' + Addition + + Copy(ALocale, ExtensionEnd, MaxInt) + else if FindSingletonExtensionStart(ALocale, 'x', PrivateUseStart) then + Result := Copy(ALocale, 1, PrivateUseStart - 1) + '-u-' + Addition + + Copy(ALocale, PrivateUseStart, MaxInt) else - Result := ALocale + '-' + Addition; + Result := ALocale + '-u-' + Addition end; { TGocciaIntlCollatorValue } diff --git a/tests/built-ins/Intl/Collator/constructor.js b/tests/built-ins/Intl/Collator/constructor.js index c735da65..1eca19d8 100644 --- a/tests/built-ins/Intl/Collator/constructor.js +++ b/tests/built-ins/Intl/Collator/constructor.js @@ -44,4 +44,9 @@ describe.runIf(isIntl)("Intl.Collator constructor", () => { expect(collation.locale).toBe("de-u-co-phonebk"); expect(collation.collation).toBe("phonebk"); }); + + test("ignores Unicode extension-like private-use subtags", () => { + const options = new Intl.Collator("de-x-u-co-phonebk").resolvedOptions(); + expect(options.collation).toBe("default"); + }); }); diff --git a/tests/built-ins/Intl/supportedValuesOf.js b/tests/built-ins/Intl/supportedValuesOf.js index e7bca48d..2860fb70 100644 --- a/tests/built-ins/Intl/supportedValuesOf.js +++ b/tests/built-ins/Intl/supportedValuesOf.js @@ -17,6 +17,33 @@ describe.runIf(isIntl)("Intl.supportedValuesOf", () => { expect(Array.isArray(result)).toBe(true); }); + test("collation values exclude Collator-disallowed entries", () => { + const result = Intl.supportedValuesOf("collation"); + expect(result.includes("big5han")).toBe(false); + expect(result.includes("direct")).toBe(false); + expect(result.includes("ducet")).toBe(false); + expect(result.includes("gb2312")).toBe(false); + expect(result.includes("reformed")).toBe(false); + expect(result.includes("search")).toBe(false); + expect(result.includes("standard")).toBe(false); + }); + + test("collation values are accepted by Intl.Collator", () => { + const locales = ["en", "ar", "de", "es", "hi", "ko", "ln", "si", "sv", "zh"]; + + for (const collation of Intl.supportedValuesOf("collation")) { + let supported = false; + for (const locale of locales) { + const options = new Intl.Collator(locale, { collation }).resolvedOptions(); + if (options.collation === collation) { + supported = true; + break; + } + } + expect(supported).toBe(true); + } + }); + test("returns an array for 'currency'", () => { const result = Intl.supportedValuesOf("currency"); expect(Array.isArray(result)).toBe(true); From e5dcc7b154dcb5bbd7c90a318f79d6eae8cb965c Mon Sep 17 00:00:00 2001 From: Johannes Stein Date: Tue, 2 Jun 2026 07:04:27 +0200 Subject: [PATCH 5/6] fix: address PR review findings --- source/units/Goccia.Builtins.Intl.pas | 60 ++++++++++++++++-- source/units/Goccia.Values.ArrayValue.pas | 18 ++++++ source/units/Goccia.Values.IntlCollator.pas | 37 ++++++++++- .../units/Goccia.Values.StringObjectValue.pas | 61 +++++++++++++++++-- tests/built-ins/Array/prototype/sort.js | 14 +++++ tests/built-ins/Array/prototype/toSorted.js | 6 ++ tests/built-ins/Intl/Collator/constructor.js | 27 ++++++++ .../String/prototype/localeCompare.js | 12 ++++ 8 files changed, 224 insertions(+), 11 deletions(-) diff --git a/source/units/Goccia.Builtins.Intl.pas b/source/units/Goccia.Builtins.Intl.pas index 8b85c11e..64fdade1 100644 --- a/source/units/Goccia.Builtins.Intl.pas +++ b/source/units/Goccia.Builtins.Intl.pas @@ -75,7 +75,8 @@ implementation Goccia.Values.IntlSegmenter, Goccia.Values.NativeFunction, Goccia.Values.ObjectPropertyDescriptor, - Goccia.Values.SymbolValue; + Goccia.Values.SymbolValue, + Goccia.Values.ToObject; { TGocciaIntlBuiltin } @@ -704,6 +705,57 @@ procedure TGocciaIntlBuiltin.RegisterLocale; { Collator } +function CollatorLocaleArgumentToLocale(const AArg: TGocciaValue): string; +var + Element: TGocciaValue; + Tag, Canonical: string; + FirstUnicodeExtension, SecondUnicodeExtension: Integer; + LowerTag, Tail: string; +begin + Result := ''; + if (AArg is TGocciaUndefinedLiteralValue) or (AArg = nil) then + Exit; + + if AArg is TGocciaStringLiteralValue then + Tag := TGocciaStringLiteralValue(AArg).Value + else if AArg is TGocciaArrayValue then + begin + if TGocciaArrayValue(AArg).GetLength = 0 then + Exit; + Element := TGocciaArrayValue(AArg).GetElement(0); + if Element is TGocciaStringLiteralValue then + Tag := TGocciaStringLiteralValue(Element).Value + else if Element is TGocciaObjectValue then + Tag := Element.ToStringLiteral.Value + else + ThrowTypeError('locales array elements must be strings or objects'); + end + else if AArg is TGocciaObjectValue then + Tag := AArg.ToStringLiteral.Value + else + ThrowTypeError('locales argument must be a string, object, array, or undefined'); + + Canonical := CanonicalizeUnicodeLocaleId(Tag); + if Canonical = '' then + begin + LowerTag := LowerCase(Tag); + FirstUnicodeExtension := Pos('-u-', LowerTag); + if FirstUnicodeExtension <> 0 then + begin + Tail := Copy(LowerTag, FirstUnicodeExtension + 3, MaxInt); + SecondUnicodeExtension := Pos('-u-', Tail); + if SecondUnicodeExtension <> 0 then + begin + SecondUnicodeExtension := FirstUnicodeExtension + 3 + SecondUnicodeExtension - 1; + Canonical := CanonicalizeUnicodeLocaleId(Copy(Tag, 1, SecondUnicodeExtension - 1)); + end; + end; + if Canonical = '' then + ThrowRangeError(Format('invalid language tag: %s', [Tag])); + end; + Result := Tag; +end; + function TGocciaIntlBuiltin.CollatorConstructorFn(const AArgs: TGocciaArgumentsCollection; const AThisValue: TGocciaValue): TGocciaValue; var @@ -712,10 +764,10 @@ function TGocciaIntlBuiltin.CollatorConstructorFn(const AArgs: TGocciaArgumentsC begin Locale := ''; if AArgs.Length >= 1 then - Locale := AArgs.GetElement(0).ToStringLiteral.Value; + Locale := CollatorLocaleArgumentToLocale(AArgs.GetElement(0)); Options := nil; - if (AArgs.Length >= 2) and (AArgs.GetElement(1) is TGocciaObjectValue) then - Options := TGocciaObjectValue(AArgs.GetElement(1)); + if (AArgs.Length >= 2) and not (AArgs.GetElement(1) is TGocciaUndefinedLiteralValue) then + Options := ToObject(AArgs.GetElement(1)); Result := TGocciaIntlCollatorValue.Create(Locale, Options); end; diff --git a/source/units/Goccia.Values.ArrayValue.pas b/source/units/Goccia.Values.ArrayValue.pas index 7cd3ed44..86c46c6d 100644 --- a/source/units/Goccia.Values.ArrayValue.pas +++ b/source/units/Goccia.Values.ArrayValue.pas @@ -464,6 +464,15 @@ function DefaultCompare(constref A, B: TGocciaValue): Integer; var StrA, StrB: string; begin + if A is TGocciaUndefinedLiteralValue then + begin + if B is TGocciaUndefinedLiteralValue then + Exit(0); + Exit(1); + end; + if B is TGocciaUndefinedLiteralValue then + Exit(-1); + StrA := A.ToStringLiteral.Value; StrB := B.ToStringLiteral.Value; if StrA < StrB then @@ -801,6 +810,15 @@ function CallCompareFunc(const ACompareFunc: TGocciaFunctionBase; const ACallArg PreviousContinuation: TGocciaGeneratorContinuation; CompareRoot, AValueRoot, BValueRoot, ThisRoot: TGocciaTempRoot; begin + if A is TGocciaUndefinedLiteralValue then + begin + if B is TGocciaUndefinedLiteralValue then + Exit(0); + Exit(1); + end; + if B is TGocciaUndefinedLiteralValue then + Exit(-1); + ACallArgs.SetElement(0, A); ACallArgs.SetElement(1, B); InitializeTempRoot(CompareRoot); diff --git a/source/units/Goccia.Values.IntlCollator.pas b/source/units/Goccia.Values.IntlCollator.pas index 5979b1f5..dd1bd5c9 100644 --- a/source/units/Goccia.Values.IntlCollator.pas +++ b/source/units/Goccia.Values.IntlCollator.pas @@ -128,6 +128,34 @@ function IsValidCaseFirstValue(const AValue: string): Boolean; Result := (AValue = 'upper') or (AValue = 'lower') or (AValue = 'false'); end; +procedure ValidateStringOptionValue(const AValue, AName: string; const AAllowed: array of string); +var + I: Integer; +begin + for I := Low(AAllowed) to High(AAllowed) do + begin + if AValue = AAllowed[I] then + Exit; + end; + ThrowRangeError(Format(SErrorIntlInvalidOption, [AValue, AName])); +end; + +function ReadCollatorStringOption(const AOptions: TGocciaObjectValue; + const AName: string; var AValue: string; const AAllowed: array of string): Boolean; +var + S: string; +begin + Result := False; + if TryReadStringOption(AOptions, AName, S) then + begin + if ContainsNulCharacter(S) then + ThrowRangeError(Format(SErrorIntlInvalidOption, [S, AName])); + ValidateStringOptionValue(S, AName, AAllowed); + AValue := S; + Result := True; + end; +end; + function NormalizeCollationValue(const AValue: string): string; begin if AValue = 'phonebook' then @@ -471,8 +499,9 @@ constructor TGocciaIntlCollatorValue.Create(const ALocale: string; const AOption if Assigned(AOptions) then begin - ReadValidatedStringOption(AOptions, 'sensitivity', FSensitivity); - ReadValidatedStringOption(AOptions, 'usage', FUsage); + ReadCollatorStringOption(AOptions, 'sensitivity', FSensitivity, + ['base', 'accent', 'case', 'variant']); + ReadCollatorStringOption(AOptions, 'usage', FUsage, ['sort', 'search']); V := AOptions.GetProperty('ignorePunctuation'); if Assigned(V) and not (V is TGocciaUndefinedLiteralValue) then FIgnorePunctuation := V.ToBooleanLiteral.Value; @@ -487,13 +516,15 @@ constructor TGocciaIntlCollatorValue.Create(const ALocale: string; const AOption begin if ContainsNulCharacter(Ignored) then ThrowRangeError(Format(SErrorIntlInvalidOption, [Ignored, 'caseFirst'])); + if not IsValidCaseFirstValue(Ignored) then + ThrowRangeError(Format(SErrorIntlInvalidOption, [Ignored, 'caseFirst'])); FCaseFirst := Ignored; end; CollationOptionPresent := TryReadStringOption(AOptions, 'collation', CollationOption); CollationOption := NormalizeCollationValue(CollationOption); if CollationOptionPresent and IsSupportedCollationValue(FLocale, CollationOption) then FCollation := CollationOption; - ReadValidatedStringOption(AOptions, 'localeMatcher', Ignored); + ReadCollatorStringOption(AOptions, 'localeMatcher', Ignored, ['lookup', 'best fit']); end; if NumericOptionPresent and TryGetUnicodeExtensionKey(FLocale, 'kn', LocaleNumeric) then diff --git a/source/units/Goccia.Values.StringObjectValue.pas b/source/units/Goccia.Values.StringObjectValue.pas index 121fece9..65d43603 100644 --- a/source/units/Goccia.Values.StringObjectValue.pas +++ b/source/units/Goccia.Values.StringObjectValue.pas @@ -90,6 +90,7 @@ implementation SysUtils, IntlICU, + IntlLocaleResolver, IntlTypes, TextSemantics, @@ -107,7 +108,8 @@ implementation Goccia.Values.Iterator.Concrete, Goccia.Values.Iterator.RegExp, Goccia.Values.ProxyValue, - Goccia.Values.SymbolValue; + Goccia.Values.SymbolValue, + Goccia.Values.ToObject; // String.prototype lives in a per-realm slot. Method host and member // definitions stay process-wide (immutable across realms). @@ -126,6 +128,57 @@ function GetSharedStringPrototype: TGocciaObjectValue; inline; Result := nil; end; +function LocaleCompareArgumentToLocale(const AArg: TGocciaValue): string; +var + Element: TGocciaValue; + Tag, Canonical: string; + FirstUnicodeExtension, SecondUnicodeExtension: Integer; + LowerTag, Tail: string; +begin + Result := ''; + if (AArg is TGocciaUndefinedLiteralValue) or (AArg = nil) then + Exit; + + if AArg is TGocciaStringLiteralValue then + Tag := TGocciaStringLiteralValue(AArg).Value + else if AArg is TGocciaArrayValue then + begin + if TGocciaArrayValue(AArg).GetLength = 0 then + Exit; + Element := TGocciaArrayValue(AArg).GetElement(0); + if Element is TGocciaStringLiteralValue then + Tag := TGocciaStringLiteralValue(Element).Value + else if Element is TGocciaObjectValue then + Tag := Element.ToStringLiteral.Value + else + ThrowTypeError('locales array elements must be strings or objects'); + end + else if AArg is TGocciaObjectValue then + Tag := AArg.ToStringLiteral.Value + else + ThrowTypeError('locales argument must be a string, object, array, or undefined'); + + Canonical := CanonicalizeUnicodeLocaleId(Tag); + if Canonical = '' then + begin + LowerTag := LowerCase(Tag); + FirstUnicodeExtension := Pos('-u-', LowerTag); + if FirstUnicodeExtension <> 0 then + begin + Tail := Copy(LowerTag, FirstUnicodeExtension + 3, MaxInt); + SecondUnicodeExtension := Pos('-u-', Tail); + if SecondUnicodeExtension <> 0 then + begin + SecondUnicodeExtension := FirstUnicodeExtension + 3 + SecondUnicodeExtension - 1; + Canonical := CanonicalizeUnicodeLocaleId(Copy(Tag, 1, SecondUnicodeExtension - 1)); + end; + end; + if Canonical = '' then + ThrowRangeError(Format('invalid language tag: %s', [Tag])); + end; + Result := Tag; +end; + { TGocciaStringObjectValue } function CoerceRegExpValue(const AValue: TGocciaValue; @@ -1950,11 +2003,11 @@ function TGocciaStringObjectValue.StringLocaleCompare(const AArgs: TGocciaArgume Locale := ''; if (AArgs.Length > 1) and not (AArgs.GetElement(1) is TGocciaUndefinedLiteralValue) then - Locale := AArgs.GetElement(1).ToStringLiteral.Value; + Locale := LocaleCompareArgumentToLocale(AArgs.GetElement(1)); Options := nil; - if (AArgs.Length > 2) and (AArgs.GetElement(2) is TGocciaObjectValue) then - Options := TGocciaObjectValue(AArgs.GetElement(2)); + if (AArgs.Length > 2) and not (AArgs.GetElement(2) is TGocciaUndefinedLiteralValue) then + Options := ToObject(AArgs.GetElement(2)); Collator := TGocciaIntlCollatorValue.Create(Locale, Options); Result := TGocciaNumberLiteralValue.Create(Collator.CompareStrings(StringValue, ThatString)); diff --git a/tests/built-ins/Array/prototype/sort.js b/tests/built-ins/Array/prototype/sort.js index e646d5b3..3819864f 100644 --- a/tests/built-ins/Array/prototype/sort.js +++ b/tests/built-ins/Array/prototype/sort.js @@ -103,6 +103,20 @@ describe("Array.prototype.sort", () => { expect(arr).toEqual([1, 200, 40, 5, 700, 80, 9]); }); + test("sort moves undefined after defined values", () => { + const arr = [undefined, "z", "a"]; + arr.sort(); + expect(arr[0]).toBe("a"); + expect(arr[1]).toBe("z"); + expect(arr[2]).toBe(undefined); + }); + + test("sort moves undefined after defined values with comparator", () => { + const arr = [undefined, "z", "a"]; + arr.sort(() => -1); + expect(arr[2]).toBe(undefined); + }); + test("sort moves holes to end in sparse array", () => { const arr = [3, , 1]; arr.sort(); diff --git a/tests/built-ins/Array/prototype/toSorted.js b/tests/built-ins/Array/prototype/toSorted.js index 1e161c03..c42bb091 100644 --- a/tests/built-ins/Array/prototype/toSorted.js +++ b/tests/built-ins/Array/prototype/toSorted.js @@ -32,6 +32,12 @@ test("Array.prototype.toSorted with custom sort function", () => { expect(sorted2).toEqual([1, 2, 3, 4, 5]); }); +test("toSorted moves undefined after defined values", () => { + const arr = [undefined, "z", "a"]; + expect(arr.toSorted()).toEqual(["a", "z", undefined]); + expect(arr.toSorted(() => -1)[2]).toBe(undefined); +}); + test("toSorted returns a new array, not the original", () => { const arr = [3, 1, 2]; const sorted = arr.toSorted(); diff --git a/tests/built-ins/Intl/Collator/constructor.js b/tests/built-ins/Intl/Collator/constructor.js index 1eca19d8..f5e237f8 100644 --- a/tests/built-ins/Intl/Collator/constructor.js +++ b/tests/built-ins/Intl/Collator/constructor.js @@ -16,6 +16,18 @@ describe.runIf(isIntl)("Intl.Collator constructor", () => { expect(collator).toBeInstanceOf(Intl.Collator); }); + test("creates an instance with an array locale argument", () => { + const collator = new Intl.Collator(["en-US"]); + expect(collator).toBeInstanceOf(Intl.Collator); + }); + + test("rejects invalid locale arguments", () => { + expect(() => new Intl.Collator(null)).toThrow(TypeError); + expect(() => new Intl.Collator([NaN])).toThrow(TypeError); + expect(() => new Intl.Collator(["i"])).toThrow(RangeError); + expect(() => new Intl.Collator(["de_DE"])).toThrow(RangeError); + }); + test("compare property is a function", () => { const collator = new Intl.Collator(); expect(typeof collator.compare).toBe("function"); @@ -49,4 +61,19 @@ describe.runIf(isIntl)("Intl.Collator constructor", () => { const options = new Intl.Collator("de-x-u-co-phonebk").resolvedOptions(); expect(options.collation).toBe("default"); }); + + test("rejects invalid caseFirst option values", () => { + expect(() => new Intl.Collator("en", { caseFirst: "invalid" })).toThrow(RangeError); + }); + + test("rejects invalid string option values", () => { + expect(() => new Intl.Collator("en", { usage: "invalid" })).toThrow(RangeError); + expect(() => new Intl.Collator("en", { sensitivity: "invalid" })).toThrow(RangeError); + expect(() => new Intl.Collator("en", { localeMatcher: "invalid" })).toThrow(RangeError); + }); + + test("object-coerces non-undefined options", () => { + expect(() => new Intl.Collator("en", null)).toThrow(TypeError); + expect(new Intl.Collator("en", true)).toBeInstanceOf(Intl.Collator); + }); }); diff --git a/tests/built-ins/String/prototype/localeCompare.js b/tests/built-ins/String/prototype/localeCompare.js index f343fc52..c8a505ea 100644 --- a/tests/built-ins/String/prototype/localeCompare.js +++ b/tests/built-ins/String/prototype/localeCompare.js @@ -28,4 +28,16 @@ describe("String.prototype.localeCompare", () => { expect("A".localeCompare("a", "en", { sensitivity: "base" })).toBe(0); expect("2".localeCompare("10", "en", { numeric: true }) < 0).toBe(true); }); + + test("object-coerces non-undefined options", () => { + expect(() => "a".localeCompare("b", "en", null)).toThrow(TypeError); + expect(typeof "a".localeCompare("b", "en", true)).toBe("number"); + }); + + test("rejects invalid locale arguments", () => { + expect(() => "a".localeCompare("b", null)).toThrow(TypeError); + expect(() => "a".localeCompare("b", [NaN])).toThrow(TypeError); + expect(() => "a".localeCompare("b", ["i"])).toThrow(RangeError); + expect(() => "a".localeCompare("b", ["de_DE"])).toThrow(RangeError); + }); }); From f8ceba7410f5fb1e2c7152abe93bb699cf8ecbc2 Mon Sep 17 00:00:00 2001 From: Johannes Stein Date: Tue, 2 Jun 2026 07:21:15 +0200 Subject: [PATCH 6/6] fix(intl): align supported collation values --- source/units/Goccia.Builtins.Intl.pas | 7 +++++++ source/units/Goccia.Values.IntlCollator.pas | 1 - tests/built-ins/Intl/supportedValuesOf.js | 12 ++++++------ 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/source/units/Goccia.Builtins.Intl.pas b/source/units/Goccia.Builtins.Intl.pas index 64fdade1..6420435f 100644 --- a/source/units/Goccia.Builtins.Intl.pas +++ b/source/units/Goccia.Builtins.Intl.pas @@ -214,13 +214,18 @@ function TGocciaIntlBuiltin.SupportedValuesOf(const AArgs: TGocciaArgumentsColle end else if Key = 'collation' then begin + AddString('big5han'); AddString('compat'); AddString('dict'); + AddString('direct'); + AddString('ducet'); AddString('emoji'); AddString('eor'); + AddString('gb2312'); AddString('phonebk'); AddString('phonetic'); AddString('pinyin'); + AddString('reformed'); AddString('searchjl'); AddString('stroke'); AddString('trad'); @@ -712,6 +717,8 @@ function CollatorLocaleArgumentToLocale(const AArg: TGocciaValue): string; FirstUnicodeExtension, SecondUnicodeExtension: Integer; LowerTag, Tail: string; begin + // TODO: CollatorLocaleArgumentToLocale duplicates LocaleCompareArgumentToLocale; + // extract both into a shared Intl locale parsing utility. Result := ''; if (AArg is TGocciaUndefinedLiteralValue) or (AArg = nil) then Exit; diff --git a/source/units/Goccia.Values.IntlCollator.pas b/source/units/Goccia.Values.IntlCollator.pas index dd1bd5c9..ab4f1bcf 100644 --- a/source/units/Goccia.Values.IntlCollator.pas +++ b/source/units/Goccia.Values.IntlCollator.pas @@ -288,7 +288,6 @@ function IsSupportedCollationValue(const ALocale, AValue: string): Boolean; Exit; end; end; - Exit; end; for I := Low(SupportedCollations) to High(SupportedCollations) do diff --git a/tests/built-ins/Intl/supportedValuesOf.js b/tests/built-ins/Intl/supportedValuesOf.js index 2860fb70..ea770ea1 100644 --- a/tests/built-ins/Intl/supportedValuesOf.js +++ b/tests/built-ins/Intl/supportedValuesOf.js @@ -17,13 +17,13 @@ describe.runIf(isIntl)("Intl.supportedValuesOf", () => { expect(Array.isArray(result)).toBe(true); }); - test("collation values exclude Collator-disallowed entries", () => { + test("collation values match Collator-accepted entries", () => { const result = Intl.supportedValuesOf("collation"); - expect(result.includes("big5han")).toBe(false); - expect(result.includes("direct")).toBe(false); - expect(result.includes("ducet")).toBe(false); - expect(result.includes("gb2312")).toBe(false); - expect(result.includes("reformed")).toBe(false); + expect(result.includes("big5han")).toBe(true); + expect(result.includes("direct")).toBe(true); + expect(result.includes("ducet")).toBe(true); + expect(result.includes("gb2312")).toBe(true); + expect(result.includes("reformed")).toBe(true); expect(result.includes("search")).toBe(false); expect(result.includes("standard")).toBe(false); });