From 5123b0e2c3d9ba47e1e17397fd0bc7fb6a1755c9 Mon Sep 17 00:00:00 2001 From: Johannes Stein Date: Tue, 2 Jun 2026 08:08:46 +0200 Subject: [PATCH] fix(intl): parse Locale unicode extension keys --- source/units/Goccia.Intl.Helpers.pas | 222 +++++++++++++++-- source/units/Goccia.Values.IntlCollator.pas | 262 ++------------------ source/units/Goccia.Values.IntlLocale.pas | 94 +++---- tests/built-ins/Intl/Locale/constructor.js | 39 +++ 4 files changed, 300 insertions(+), 317 deletions(-) diff --git a/source/units/Goccia.Intl.Helpers.pas b/source/units/Goccia.Intl.Helpers.pas index e8843d0c..d7c8745a 100644 --- a/source/units/Goccia.Intl.Helpers.pas +++ b/source/units/Goccia.Intl.Helpers.pas @@ -28,6 +28,8 @@ procedure ReadValidatedStringOption(const AOptions: TGocciaObjectValue; function LocaleWithoutUnicodeExtension(const ALocale: string): string; function TryGetUnicodeLocaleExtensionKeyword(const ALocale, AKey: string; out AValue: string): Boolean; +function RemoveUnicodeLocaleExtensionKeyword(const ALocale, AKey: string): string; +function AddUnicodeLocaleExtensionKeyword(const ALocale, AKey, AValue: string): string; function IsSupportedNumberingSystem(const AValue: string): Boolean; implementation @@ -87,49 +89,227 @@ procedure ReadValidatedStringOption(const AOptions: TGocciaObjectValue; end; end; +function FindSingletonExtensionStart(const ALocale, ASingleton: string; out AStart: Integer): Boolean; +var + Index, NextDash, SubtagStart: Integer; + Subtag, SingletonLower: string; +begin + Result := False; + AStart := 0; + SingletonLower := LowerCase(ASingleton); + Index := 1; + while Index <= Length(ALocale) do + begin + SubtagStart := Index; + NextDash := Pos('-', Copy(ALocale, Index, MaxInt)); + if NextDash = 0 then + NextDash := Length(ALocale) + 1 + else + NextDash := Index + NextDash - 1; + Subtag := LowerCase(Copy(ALocale, Index, NextDash - Index)); + + if Length(Subtag) = 1 then + begin + if Subtag = SingletonLower then + begin + AStart := SubtagStart - 1; + Result := True; + Exit; + end; + if Subtag = 'x' then + Exit; + end; + + Index := NextDash + 1; + end; +end; + +function FindUnicodeExtensionRangeFrom(const ALocale: string; ASearchStart: Integer; + out AStart, AEnd: Integer): Boolean; +var + Index, NextDash, SubtagStart: Integer; + Subtag: string; +begin + Result := False; + AStart := 0; + AEnd := 0; + Index := ASearchStart; + if Index < 1 then + Index := 1; + while Index <= Length(ALocale) do + begin + SubtagStart := Index; + NextDash := Pos('-', Copy(ALocale, Index, MaxInt)); + if NextDash = 0 then + NextDash := Length(ALocale) + 1 + else + NextDash := Index + NextDash - 1; + Subtag := LowerCase(Copy(ALocale, Index, NextDash - Index)); + + if Length(Subtag) = 1 then + begin + if Subtag = 'x' then + Exit; + if Subtag = 'u' then + begin + AStart := SubtagStart - 1; + AEnd := Length(ALocale) + 1; + Index := NextDash + 1; + while Index <= Length(ALocale) do + begin + SubtagStart := Index; + NextDash := Pos('-', Copy(ALocale, Index, MaxInt)); + if NextDash = 0 then + NextDash := Length(ALocale) + 1 + else + NextDash := Index + NextDash - 1; + Subtag := LowerCase(Copy(ALocale, Index, NextDash - Index)); + if Length(Subtag) = 1 then + begin + AEnd := SubtagStart - 1; + Break; + end; + Index := NextDash + 1; + end; + Result := True; + Exit; + end; + end; + + Index := NextDash + 1; + end; +end; + +function FindUnicodeExtensionRange(const ALocale: string; out AStart, AEnd: Integer): Boolean; +begin + Result := FindUnicodeExtensionRangeFrom(ALocale, 1, AStart, AEnd); +end; + function LocaleWithoutUnicodeExtension(const ALocale: string): string; var - ExtensionStart: Integer; + ExtensionStart, ExtensionEnd: Integer; begin - ExtensionStart := Pos('-u-', ALocale); - if ExtensionStart = 0 then - Result := ALocale + if FindUnicodeExtensionRange(ALocale, ExtensionStart, ExtensionEnd) then + Result := Copy(ALocale, 1, ExtensionStart - 1) + Copy(ALocale, ExtensionEnd, MaxInt) else - Result := Copy(ALocale, 1, ExtensionStart - 1); + Result := ALocale; end; function TryGetUnicodeLocaleExtensionKeyword(const ALocale, AKey: string; out AValue: string): Boolean; var - ExtensionStart, Index, NextDash: Integer; - Tail, Subtag: string; + SearchStart, ExtensionStart, ExtensionEnd, Index, NextDash: Integer; + Tail, Subtag, KeyLower: string; begin Result := False; AValue := ''; - ExtensionStart := Pos('-u-', ALocale); - if ExtensionStart = 0 then + KeyLower := LowerCase(AKey); + SearchStart := 1; + while FindUnicodeExtensionRangeFrom(ALocale, SearchStart, ExtensionStart, ExtensionEnd) do + begin + Tail := Copy(ALocale, ExtensionStart + 3, ExtensionEnd - ExtensionStart - 3); + Index := 1; + while Index <= Length(Tail) do + begin + NextDash := Pos('-', Copy(Tail, Index, MaxInt)); + if NextDash = 0 then + NextDash := Length(Tail) + 1 + else + NextDash := Index + NextDash - 1; + Subtag := LowerCase(Copy(Tail, Index, NextDash - Index)); + Index := NextDash + 1; + + if Length(Subtag) <> 2 then + Continue; + + if Subtag = KeyLower then + begin + Result := True; + while Index <= Length(Tail) do + begin + NextDash := Pos('-', Copy(Tail, Index, MaxInt)); + if NextDash = 0 then + NextDash := Length(Tail) + 1 + else + NextDash := Index + NextDash - 1; + Subtag := LowerCase(Copy(Tail, Index, NextDash - Index)); + if Length(Subtag) = 2 then + Exit; + if AValue = '' then + AValue := Subtag + else + AValue := AValue + '-' + Subtag; + Index := NextDash + 1; + end; + Exit; + end; + end; + + SearchStart := ExtensionEnd + 1; + end; +end; + +function RemoveUnicodeLocaleExtensionKeyword(const ALocale, AKey: string): string; +var + ExtensionStart, ExtensionEnd, Index, NextDash: Integer; + Base, Tail, Suffix, Subtag, KeyLower, NewTail: string; + Removing: Boolean; +begin + KeyLower := LowerCase(AKey); + if not FindUnicodeExtensionRange(ALocale, ExtensionStart, ExtensionEnd) then + begin + Result := ALocale; Exit; + end; - Tail := Copy(ALocale, ExtensionStart + 3, MaxInt); + Base := Copy(ALocale, 1, ExtensionStart - 1); + Tail := Copy(ALocale, ExtensionStart + 3, ExtensionEnd - ExtensionStart - 3); + Suffix := Copy(ALocale, ExtensionEnd, MaxInt); + NewTail := ''; + Removing := False; Index := 1; while Index <= Length(Tail) do begin - NextDash := PosEx('-', Tail, Index); + NextDash := Pos('-', Copy(Tail, Index, MaxInt)); if NextDash = 0 then - NextDash := Length(Tail) + 1; + NextDash := Length(Tail) + 1 + else + NextDash := Index + NextDash - 1; Subtag := Copy(Tail, Index, NextDash - Index); Index := NextDash + 1; - if SameText(Subtag, AKey) then - begin - NextDash := PosEx('-', Tail, Index); - if NextDash = 0 then - NextDash := Length(Tail) + 1; - AValue := Copy(Tail, Index, NextDash - Index); - Result := AValue <> ''; - Exit; - end; + if Length(Subtag) = 2 then + Removing := LowerCase(Subtag) = KeyLower; + if Removing then + Continue; + if NewTail <> '' then + NewTail := NewTail + '-'; + NewTail := NewTail + Subtag; end; + + if NewTail = '' then + Result := Base + Suffix + else + Result := Base + '-u-' + NewTail + Suffix; +end; + +function AddUnicodeLocaleExtensionKeyword(const ALocale, AKey, AValue: string): string; +var + ExtensionStart, ExtensionEnd, PrivateUseStart: Integer; + Addition: string; +begin + Addition := AKey; + if AValue <> '' then + Addition := Addition + '-' + AValue; + + if FindUnicodeExtensionRange(ALocale, ExtensionStart, ExtensionEnd) then + Result := Copy(ALocale, 1, ExtensionEnd - 1) + '-' + Addition + + Copy(ALocale, ExtensionEnd, MaxInt) + else if FindSingletonExtensionStart(ALocale, 'x', PrivateUseStart) then + Result := Copy(ALocale, 1, PrivateUseStart - 1) + '-u-' + Addition + + Copy(ALocale, PrivateUseStart, MaxInt) + else + Result := ALocale + '-u-' + Addition; end; function IsSupportedNumberingSystem(const AValue: string): Boolean; diff --git a/source/units/Goccia.Values.IntlCollator.pas b/source/units/Goccia.Values.IntlCollator.pas index ab4f1bcf..4ce9d810 100644 --- a/source/units/Goccia.Values.IntlCollator.pas +++ b/source/units/Goccia.Values.IntlCollator.pas @@ -168,102 +168,6 @@ function NormalizeCollationValue(const AValue: string): string; Result := AValue; end; -function FindSingletonExtensionStart(const ALocale, ASingleton: string; out AStart: Integer): Boolean; -var - Index, NextDash, SubtagStart: Integer; - Subtag, SingletonLower: string; -begin - Result := False; - AStart := 0; - SingletonLower := LowerCase(ASingleton); - Index := 1; - while Index <= Length(ALocale) do - begin - SubtagStart := Index; - NextDash := Pos('-', Copy(ALocale, Index, MaxInt)); - if NextDash = 0 then - NextDash := Length(ALocale) + 1 - else - NextDash := Index + NextDash - 1; - Subtag := LowerCase(Copy(ALocale, Index, NextDash - Index)); - - if Length(Subtag) = 1 then - begin - if Subtag = SingletonLower then - begin - AStart := SubtagStart - 1; - Result := True; - Exit; - end; - if Subtag = 'x' then - Exit; - end; - - Index := NextDash + 1; - end; -end; - -function FindUnicodeExtensionRangeFrom(const ALocale: string; ASearchStart: Integer; - out AStart, AEnd: Integer): Boolean; -var - Index, NextDash, SubtagStart: Integer; - Subtag: string; -begin - Result := False; - AStart := 0; - AEnd := 0; - Index := ASearchStart; - if Index < 1 then - Index := 1; - while Index <= Length(ALocale) do - begin - SubtagStart := Index; - NextDash := Pos('-', Copy(ALocale, Index, MaxInt)); - if NextDash = 0 then - NextDash := Length(ALocale) + 1 - else - NextDash := Index + NextDash - 1; - Subtag := LowerCase(Copy(ALocale, Index, NextDash - Index)); - - if Length(Subtag) = 1 then - begin - if Subtag = 'x' then - Exit; - if Subtag = 'u' then - begin - AStart := SubtagStart - 1; - AEnd := Length(ALocale) + 1; - Index := NextDash + 1; - while Index <= Length(ALocale) do - begin - SubtagStart := Index; - NextDash := Pos('-', Copy(ALocale, Index, MaxInt)); - if NextDash = 0 then - NextDash := Length(ALocale) + 1 - else - NextDash := Index + NextDash - 1; - Subtag := LowerCase(Copy(ALocale, Index, NextDash - Index)); - if Length(Subtag) = 1 then - begin - AEnd := SubtagStart - 1; - Break; - end; - Index := NextDash + 1; - end; - Result := True; - Exit; - end; - end; - - Index := NextDash + 1; - end; -end; - -function FindUnicodeExtensionRange(const ALocale: string; out AStart, AEnd: Integer): Boolean; -begin - Result := FindUnicodeExtensionRangeFrom(ALocale, 1, AStart, AEnd); -end; - function IsSupportedCollationValue(const ALocale, AValue: string): Boolean; const SupportedCollations: array[0..16] of string = ( @@ -300,122 +204,6 @@ function IsSupportedCollationValue(const ALocale, AValue: string): Boolean; end; end; -function TryGetUnicodeExtensionKey(const ALocale, AKey: string; out AValue: string): Boolean; -var - SearchStart, ExtensionStart, ExtensionEnd, Index, NextDash: Integer; - Tail, Subtag, KeyLower: string; -begin - Result := False; - AValue := ''; - KeyLower := LowerCase(AKey); - SearchStart := 1; - while FindUnicodeExtensionRangeFrom(ALocale, SearchStart, ExtensionStart, ExtensionEnd) do - begin - Tail := Copy(ALocale, ExtensionStart + 3, ExtensionEnd - ExtensionStart - 3); - Index := 1; - while Index <= Length(Tail) do - begin - NextDash := Pos('-', Copy(Tail, Index, MaxInt)); - if NextDash = 0 then - NextDash := Length(Tail) + 1 - else - NextDash := Index + NextDash - 1; - Subtag := LowerCase(Copy(Tail, Index, NextDash - Index)); - Index := NextDash + 1; - - if Length(Subtag) <> 2 then - Continue; - - if Subtag = KeyLower then - begin - Result := True; - while Index <= Length(Tail) do - begin - NextDash := Pos('-', Copy(Tail, Index, MaxInt)); - if NextDash = 0 then - NextDash := Length(Tail) + 1 - else - NextDash := Index + NextDash - 1; - Subtag := LowerCase(Copy(Tail, Index, NextDash - Index)); - if Length(Subtag) = 2 then - Exit; - if AValue = '' then - AValue := Subtag - else - AValue := AValue + '-' + Subtag; - Index := NextDash + 1; - end; - Exit; - end; - end; - - SearchStart := ExtensionEnd + 1; - end; -end; - -function RemoveUnicodeExtensionKey(const ALocale, AKey: string): string; -var - ExtensionStart, ExtensionEnd, Index, NextDash: Integer; - Base, Tail, Suffix, Subtag, KeyLower, NewTail: string; - Removing: Boolean; -begin - KeyLower := LowerCase(AKey); - if not FindUnicodeExtensionRange(ALocale, ExtensionStart, ExtensionEnd) then - begin - Result := ALocale; - Exit; - end; - - Base := Copy(ALocale, 1, ExtensionStart - 1); - Tail := Copy(ALocale, ExtensionStart + 3, ExtensionEnd - ExtensionStart - 3); - Suffix := Copy(ALocale, ExtensionEnd, MaxInt); - NewTail := ''; - Removing := False; - Index := 1; - while Index <= Length(Tail) do - begin - NextDash := Pos('-', Copy(Tail, Index, MaxInt)); - if NextDash = 0 then - NextDash := Length(Tail) + 1 - else - NextDash := Index + NextDash - 1; - Subtag := Copy(Tail, Index, NextDash - Index); - Index := NextDash + 1; - - if Length(Subtag) = 2 then - Removing := LowerCase(Subtag) = KeyLower; - if Removing then - Continue; - if NewTail <> '' then - NewTail := NewTail + '-'; - NewTail := NewTail + Subtag; - end; - - if NewTail = '' then - Result := Base + Suffix - else - Result := Base + '-u-' + NewTail + Suffix; -end; - -function AddUnicodeExtensionKey(const ALocale, AKey, AValue: string): string; -var - ExtensionStart, ExtensionEnd, PrivateUseStart: Integer; - Addition: string; -begin - Addition := AKey; - if AValue <> '' then - Addition := Addition + '-' + AValue; - - if FindUnicodeExtensionRange(ALocale, ExtensionStart, ExtensionEnd) then - Result := Copy(ALocale, 1, ExtensionEnd - 1) + '-' + Addition + - Copy(ALocale, ExtensionEnd, MaxInt) - else if FindSingletonExtensionStart(ALocale, 'x', PrivateUseStart) then - Result := Copy(ALocale, 1, PrivateUseStart - 1) + '-u-' + Addition + - Copy(ALocale, PrivateUseStart, MaxInt) - else - Result := ALocale + '-u-' + Addition -end; - { TGocciaIntlCollatorValue } constructor TGocciaIntlCollatorValue.Create(const ALocale: string; const AOptions: TGocciaObjectValue); @@ -444,8 +232,8 @@ constructor TGocciaIntlCollatorValue.Create(const ALocale: string; const AOption CaseFirstOptionPresent := False; CollationOptionPresent := False; - if TryGetUnicodeExtensionKey(FLocale, 'kn', LocaleNumeric) or - TryGetUnicodeExtensionKey(RawLocale, 'kn', LocaleNumeric) then + if TryGetUnicodeLocaleExtensionKeyword(FLocale, 'kn', LocaleNumeric) or + TryGetUnicodeLocaleExtensionKeyword(RawLocale, 'kn', LocaleNumeric) then begin if (LocaleNumeric = '') or (LocaleNumeric = 'true') then begin @@ -454,46 +242,46 @@ constructor TGocciaIntlCollatorValue.Create(const ALocale: string; const AOption else if LocaleNumeric = 'false' then FNumeric := False else - FLocale := RemoveUnicodeExtensionKey(FLocale, 'kn'); - if not TryGetUnicodeExtensionKey(FLocale, 'kn', Ignored) and + FLocale := RemoveUnicodeLocaleExtensionKeyword(FLocale, 'kn'); + if not TryGetUnicodeLocaleExtensionKeyword(FLocale, 'kn', Ignored) and ((LocaleNumeric = '') or (LocaleNumeric = 'true') or (LocaleNumeric = 'false')) then begin if LocaleNumeric = 'true' then - FLocale := AddUnicodeExtensionKey(FLocale, 'kn', '') + FLocale := AddUnicodeLocaleExtensionKeyword(FLocale, 'kn', '') else - FLocale := AddUnicodeExtensionKey(FLocale, 'kn', LocaleNumeric); + FLocale := AddUnicodeLocaleExtensionKeyword(FLocale, 'kn', LocaleNumeric); end; end; - if TryGetUnicodeExtensionKey(FLocale, 'kf', LocaleCaseFirst) or - TryGetUnicodeExtensionKey(RawLocale, 'kf', LocaleCaseFirst) then + if TryGetUnicodeLocaleExtensionKeyword(FLocale, 'kf', LocaleCaseFirst) or + TryGetUnicodeLocaleExtensionKeyword(RawLocale, 'kf', LocaleCaseFirst) then begin if IsValidCaseFirstValue(LocaleCaseFirst) then begin FCaseFirst := LocaleCaseFirst end else - FLocale := RemoveUnicodeExtensionKey(FLocale, 'kf'); - if not TryGetUnicodeExtensionKey(FLocale, 'kf', Ignored) and + FLocale := RemoveUnicodeLocaleExtensionKeyword(FLocale, 'kf'); + if not TryGetUnicodeLocaleExtensionKeyword(FLocale, 'kf', Ignored) and IsValidCaseFirstValue(LocaleCaseFirst) then - FLocale := AddUnicodeExtensionKey(FLocale, 'kf', LocaleCaseFirst); + FLocale := AddUnicodeLocaleExtensionKeyword(FLocale, 'kf', LocaleCaseFirst); end; - if TryGetUnicodeExtensionKey(FLocale, 'co', LocaleCollation) or - TryGetUnicodeExtensionKey(RawLocale, 'co', LocaleCollation) then + if TryGetUnicodeLocaleExtensionKeyword(FLocale, 'co', LocaleCollation) or + TryGetUnicodeLocaleExtensionKeyword(RawLocale, 'co', LocaleCollation) then begin LocaleCollation := NormalizeCollationValue(LocaleCollation); if IsSupportedCollationValue(FLocale, LocaleCollation) then begin FCollation := LocaleCollation; - if TryGetUnicodeExtensionKey(FLocale, 'co', Ignored) and + if TryGetUnicodeLocaleExtensionKeyword(FLocale, 'co', Ignored) and (NormalizeCollationValue(Ignored) <> LocaleCollation) then - FLocale := RemoveUnicodeExtensionKey(FLocale, 'co'); - if not TryGetUnicodeExtensionKey(FLocale, 'co', Ignored) then - FLocale := AddUnicodeExtensionKey(FLocale, 'co', LocaleCollation); + FLocale := RemoveUnicodeLocaleExtensionKeyword(FLocale, 'co'); + if not TryGetUnicodeLocaleExtensionKeyword(FLocale, 'co', Ignored) then + FLocale := AddUnicodeLocaleExtensionKeyword(FLocale, 'co', LocaleCollation); end else - FLocale := RemoveUnicodeExtensionKey(FLocale, 'co'); + FLocale := RemoveUnicodeLocaleExtensionKeyword(FLocale, 'co'); end; if Assigned(AOptions) then @@ -526,25 +314,25 @@ constructor TGocciaIntlCollatorValue.Create(const ALocale: string; const AOption ReadCollatorStringOption(AOptions, 'localeMatcher', Ignored, ['lookup', 'best fit']); end; - if NumericOptionPresent and TryGetUnicodeExtensionKey(FLocale, 'kn', LocaleNumeric) then + if NumericOptionPresent and TryGetUnicodeLocaleExtensionKeyword(FLocale, 'kn', LocaleNumeric) then begin if ((LocaleNumeric = '') or (LocaleNumeric = 'true')) <> FNumeric then - FLocale := RemoveUnicodeExtensionKey(FLocale, 'kn'); + FLocale := RemoveUnicodeLocaleExtensionKeyword(FLocale, 'kn'); end; - if CaseFirstOptionPresent and TryGetUnicodeExtensionKey(FLocale, 'kf', LocaleCaseFirst) then + if CaseFirstOptionPresent and TryGetUnicodeLocaleExtensionKeyword(FLocale, 'kf', LocaleCaseFirst) then begin if LocaleCaseFirst <> FCaseFirst then - FLocale := RemoveUnicodeExtensionKey(FLocale, 'kf'); + FLocale := RemoveUnicodeLocaleExtensionKeyword(FLocale, 'kf'); end; - if CollationOptionPresent and TryGetUnicodeExtensionKey(FLocale, 'co', LocaleCollation) then + if CollationOptionPresent and TryGetUnicodeLocaleExtensionKeyword(FLocale, 'co', LocaleCollation) then begin if NormalizeCollationValue(LocaleCollation) <> FCollation then - FLocale := RemoveUnicodeExtensionKey(FLocale, 'co'); + FLocale := RemoveUnicodeLocaleExtensionKeyword(FLocale, 'co'); end; FICULocale := LocaleWithoutUnicodeExtension(FLocale); if FCollation <> 'default' then - FICULocale := AddUnicodeExtensionKey(FICULocale, 'co', FCollation); + FICULocale := AddUnicodeLocaleExtensionKeyword(FICULocale, 'co', FCollation); InitializePrototype; if Assigned(GetIntlCollatorShared) then diff --git a/source/units/Goccia.Values.IntlLocale.pas b/source/units/Goccia.Values.IntlLocale.pas index dc179586..6e334ea4 100644 --- a/source/units/Goccia.Values.IntlLocale.pas +++ b/source/units/Goccia.Values.IntlLocale.pas @@ -122,55 +122,6 @@ function SplitSubtags(const AValue: string): IntlTypes.TStringArray; end; end; -function TryGetUnicodeExtensionKeyword(const AParsed: TBcp47Tag; - const AKey: string; out AValue: string): Boolean; -var - ExtensionIndex, PartIndex, ValueStart, ValueEnd: Integer; - Parts: IntlTypes.TStringArray; -begin - Result := False; - AValue := ''; - - for ExtensionIndex := 0 to High(AParsed.Extensions) do - begin - if AParsed.Extensions[ExtensionIndex].Singleton <> 'u' then - Continue; - - Parts := SplitSubtags(AParsed.Extensions[ExtensionIndex].Value); - PartIndex := 0; - while PartIndex <= High(Parts) do - begin - if Length(Parts[PartIndex]) = 2 then - begin - ValueStart := PartIndex + 1; - ValueEnd := ValueStart; - while (ValueEnd <= High(Parts)) and (Length(Parts[ValueEnd]) <> 2) do - Inc(ValueEnd); - - if Parts[PartIndex] = AKey then - begin - if ValueEnd > ValueStart then - begin - AValue := Parts[ValueStart]; - Inc(ValueStart); - while ValueStart < ValueEnd do - begin - AValue := AValue + '-' + Parts[ValueStart]; - Inc(ValueStart); - end; - end; - Result := AValue <> ''; - Exit; - end; - - PartIndex := ValueEnd; - end - else - Inc(PartIndex); - end; - end; -end; - function NormalizeCalendarType(const AValue: string): string; begin if AValue = 'gregorian' then @@ -277,7 +228,7 @@ procedure AppendSubtag(var AValue: string; const ASubtag: string); end; procedure SetUnicodeExtensionKeyword(var AParsed: TBcp47Tag; - const AKey, AValue: string); + const AKey, AValue: string; const AIncludeEmptyValue: Boolean = False); var ExtensionIndex, PartIndex, ValueIndex, ValueEnd: Integer; Parts: IntlTypes.TStringArray; @@ -308,7 +259,7 @@ procedure SetUnicodeExtensionKeyword(var AParsed: TBcp47Tag; if Parts[PartIndex] = AKey then begin Found := True; - if AValue <> '' then + if (AValue <> '') or AIncludeEmptyValue then begin AppendSubtag(NewValue, AKey); AppendSubtag(NewValue, AValue); @@ -330,7 +281,7 @@ procedure SetUnicodeExtensionKeyword(var AParsed: TBcp47Tag; end; end; - if (not Found) and (AValue <> '') then + if (not Found) and ((AValue <> '') or AIncludeEmptyValue) then begin AppendSubtag(NewValue, AKey); AppendSubtag(NewValue, AValue); @@ -340,11 +291,14 @@ procedure SetUnicodeExtensionKeyword(var AParsed: TBcp47Tag; Exit; end; - if (not HasUnicodeExtension) and (AValue <> '') then + if (not HasUnicodeExtension) and ((AValue <> '') or AIncludeEmptyValue) then begin SetLength(AParsed.Extensions, Length(AParsed.Extensions) + 1); AParsed.Extensions[High(AParsed.Extensions)].Singleton := 'u'; - AParsed.Extensions[High(AParsed.Extensions)].Value := AKey + '-' + AValue; + AParsed.Extensions[High(AParsed.Extensions)].Value := AKey; + if AValue <> '' then + AParsed.Extensions[High(AParsed.Extensions)].Value := + AParsed.Extensions[High(AParsed.Extensions)].Value + '-' + AValue; end; end; @@ -433,7 +387,11 @@ procedure TGocciaIntlLocaleValue.ParseTag(const ATag: string; const AOptions: TG V: TGocciaValue; Parsed, BaseParsed: TBcp47Tag; ExtensionValue, FirstDayOption: string; + CaseFirstExtensionPresent, NumericExtensionPresent, NumericOptionPresent: Boolean; begin + CaseFirstExtensionPresent := False; + NumericExtensionPresent := False; + NumericOptionPresent := False; if ContainsNulCharacter(ATag) then ThrowRangeError('Invalid language tag: ' + ATag); Canonical := CanonicalizeUnicodeLocaleId(ATag); @@ -455,16 +413,26 @@ procedure TGocciaIntlLocaleValue.ParseTag(const ATag: string; const AOptions: TG FScript := Parsed.Script; FRegion := Parsed.Region; - if TryGetUnicodeExtensionKeyword(Parsed, 'ca', ExtensionValue) then + if TryGetUnicodeLocaleExtensionKeyword(Canonical, 'ca', ExtensionValue) then FCalendar := NormalizeCalendarType(ExtensionValue); - if TryGetUnicodeExtensionKeyword(Parsed, 'co', ExtensionValue) then + if TryGetUnicodeLocaleExtensionKeyword(Canonical, 'co', ExtensionValue) then FCollation := NormalizeCollationType(ExtensionValue); - if TryGetUnicodeExtensionKeyword(Parsed, 'hc', ExtensionValue) then + if TryGetUnicodeLocaleExtensionKeyword(Canonical, 'hc', ExtensionValue) then FHourCycle := ExtensionValue; - if TryGetUnicodeExtensionKeyword(Parsed, 'nu', ExtensionValue) then + if TryGetUnicodeLocaleExtensionKeyword(Canonical, 'nu', ExtensionValue) then FNumberingSystem := ExtensionValue; - if TryGetUnicodeExtensionKeyword(Parsed, 'fw', ExtensionValue) then + if TryGetUnicodeLocaleExtensionKeyword(Canonical, 'fw', ExtensionValue) then FFirstDayOfWeek := DayIdentifierToNumber(ExtensionValue); + if TryGetUnicodeLocaleExtensionKeyword(Canonical, 'kf', ExtensionValue) then + begin + CaseFirstExtensionPresent := True; + FCaseFirst := ExtensionValue; + end; + if TryGetUnicodeLocaleExtensionKeyword(Canonical, 'kn', ExtensionValue) then + begin + NumericExtensionPresent := True; + FNumeric := ExtensionValue <> 'false'; + end; end else begin @@ -498,7 +466,10 @@ procedure TGocciaIntlLocaleValue.ParseTag(const ATag: string; const AOptions: TG end; V := AOptions.GetProperty('numeric'); if Assigned(V) and not (V is TGocciaUndefinedLiteralValue) then + begin + NumericOptionPresent := True; FNumeric := V.ToBooleanLiteral.Value; + end; end; if Parsed.IsValid then @@ -511,6 +482,11 @@ procedure TGocciaIntlLocaleValue.ParseTag(const ATag: string; const AOptions: TG SetUnicodeExtensionKeyword(Parsed, 'hc', FHourCycle); SetUnicodeExtensionKeyword(Parsed, 'nu', FNumberingSystem); SetUnicodeExtensionKeyword(Parsed, 'fw', DayNumberToIdentifier(FFirstDayOfWeek)); + SetUnicodeExtensionKeyword(Parsed, 'kf', FCaseFirst, CaseFirstExtensionPresent); + if FNumeric then + SetUnicodeExtensionKeyword(Parsed, 'kn', '', True) + else if NumericExtensionPresent or NumericOptionPresent then + SetUnicodeExtensionKeyword(Parsed, 'kn', 'false'); RemoveEmptyExtensions(Parsed); Canonical := CanonicalizeBcp47Tag(Parsed); diff --git a/tests/built-ins/Intl/Locale/constructor.js b/tests/built-ins/Intl/Locale/constructor.js index c1f8cbf6..839dd7c2 100644 --- a/tests/built-ins/Intl/Locale/constructor.js +++ b/tests/built-ins/Intl/Locale/constructor.js @@ -26,6 +26,45 @@ describe.runIf(isIntl && typeof Intl.Locale !== "undefined")("Intl.Locale constr expect(locale.toString()).toBe("en-US"); }); + test("parses Unicode extension keywords into properties", () => { + const locale = new Intl.Locale("en-US-u-nu-latn-ca-gregory-co-phonebk-hc-h24-kf-upper-kn"); + expect(locale.calendar).toBe("gregory"); + expect(locale.numberingSystem).toBe("latn"); + expect(locale.collation).toBe("phonebk"); + expect(locale.hourCycle).toBe("h24"); + expect(locale.caseFirst).toBe("upper"); + expect(locale.numeric).toBe(true); + expect(locale.baseName).toBe("en-US"); + }); + + test("constructor options override Unicode extension keywords", () => { + const locale = new Intl.Locale("en-u-kf-upper-kn", { + caseFirst: "lower", + numeric: false, + }); + expect(locale.caseFirst).toBe("lower"); + expect(locale.numeric).toBe(false); + expect(locale.toString()).toBe("en-u-kf-lower-kn-false"); + }); + + test("numeric Unicode extension canonicalizes true to a keyword", () => { + const locale = new Intl.Locale("en-u-kn-true"); + expect(locale.numeric).toBe(true); + expect(locale.toString()).toBe("en-u-kn"); + }); + + test("preserves empty caseFirst Unicode extension keyword", () => { + const locale = new Intl.Locale("de-u-kf"); + expect(locale.toString()).toBe("de-u-kf"); + expect(locale.maximize().toString()).toBe("de-Latn-DE-u-kf"); + }); + + test("baseName strips all extensions and private use subtags", () => { + const locale = new Intl.Locale("en-US-a-foo-u-ca-gregory-x-bar"); + expect(locale.baseName).toBe("en-US"); + expect(locale.calendar).toBe("gregory"); + }); + test("script property returns the script subtag when present", () => { const locale = new Intl.Locale("zh-Hant-TW"); expect(locale.script).toBe("Hant");