From f7dd85a111516311a0934be68468b570e644b249 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20M=C3=A9a?= Date: Thu, 4 Jun 2026 08:41:38 -0400 Subject: [PATCH 1/9] Strings cleaners improvements #OCTranspo #2Locales --- .../java/org/mtransit/commons/CleanUtils.java | 20 ++++---- .../org/mtransit/commons/StringsCleaner.kt | 48 +++++++++++++------ .../org/mtransit/commons/CleanUtilsKtTests.kt | 15 ++++++ .../org/mtransit/commons/CleanUtilsTests.java | 6 +++ .../mtransit/commons/StringsCleanerTests.kt | 30 ++++++++++-- 5 files changed, 92 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/mtransit/commons/CleanUtils.java b/src/main/java/org/mtransit/commons/CleanUtils.java index f51c3bc..65e9ae0 100644 --- a/src/main/java/org/mtransit/commons/CleanUtils.java +++ b/src/main/java/org/mtransit/commons/CleanUtils.java @@ -587,16 +587,6 @@ public static String fixMcXCase(@NotNull String string) { // Mccowan -> McCowan "([^" + WORD_REGEX_FR + "]*)([" + WORD_REGEX_FR + "]+)([^" + WORD_REGEX_FR + "]*)", Pattern.CASE_INSENSITIVE | RegexUtils.fUNICODE_CHARACTER_CLASS() | RegexUtils.fCANON_EQ()); - @NotNull - public static String toLowerCaseUpperCaseStrings(@NotNull Locale locale, @NotNull String string, @NotNull String... ignoreWords) { - if (string.isEmpty()) return string; - if (Arrays.asList(ignoreWords).contains(string.trim())) return string; - if (CharUtils.isUppercaseOnly(string, true, true)) { - return string.toLowerCase(locale); - } - return string; - } - @NotNull public static String toLowerCaseUpperCaseWords(@NotNull Locale locale, @NotNull String string, @NotNull String... ignoreWords) { if (string.isEmpty()) return string; @@ -634,6 +624,16 @@ private static boolean containsIgnoreCase(@Nullable String string, @NotNull Stri return false; } + @NotNull + public static String toLowerCaseUpperCaseStrings(@NotNull Locale locale, @NotNull String string, @NotNull String... ignoreWords) { + if (string.isEmpty()) return string; + if (Arrays.asList(ignoreWords).contains(string.trim())) return string; + if (CharUtils.isUppercaseOnly(string, true, true)) { + return string.toLowerCase(locale); + } + return string; + } + public static final Regex ALL_FACE_A_REGEX = CleanUtilsExtKt.makeALL_FACE_A_REGEX(); public static final String ALL_FACE_A_REGEX_REPLACEMENT = CleanUtilsExtKt.makeALL_FACE_A_REGEX_REPLACEMENT(); diff --git a/src/main/java/org/mtransit/commons/StringsCleaner.kt b/src/main/java/org/mtransit/commons/StringsCleaner.kt index da54cc5..7e2a589 100644 --- a/src/main/java/org/mtransit/commons/StringsCleaner.kt +++ b/src/main/java/org/mtransit/commons/StringsCleaner.kt @@ -1,12 +1,17 @@ package org.mtransit.commons -import org.mtransit.commons.StringUtils.EMPTY import java.util.Locale object StringsCleaner { private const val ROUTE_LONG_NAME_SHORT_MAX_LENGTH = 33 + private val LINE_AND_SHORT_NAME = Regex("(^|\\s*)line\\s+(\\w+)") + private const val LINE_AND_SHORT_NAME_REPLACEMENT = "$2" + + private val FR_LIGNE_AND_SHORT_NAME = Regex("(^|\\s*)ligne\\s+(\\w+)") + private const val FR_LIGNE_AND_SHORT_NAME_REPLACEMENT = "$2" + @JvmOverloads @JvmStatic fun cleanRouteLongName( @@ -19,18 +24,24 @@ object StringsCleaner { ): String { var routeLongName = originalRouteLongName if (languages?.contains(Locale.ENGLISH) == true) { - routeLongName = CleanUtils.LINE_.matcher(routeLongName).replaceAll(EMPTY) + routeLongName = LINE_AND_SHORT_NAME.replace(routeLongName, LINE_AND_SHORT_NAME_REPLACEMENT) } if (languages?.contains(Locale.FRENCH) == true) { - routeLongName = CleanUtils.FR_CA_LIGNE.matcher(routeLongName).replaceAll(EMPTY) + routeLongName = FR_LIGNE_AND_SHORT_NAME.replace(routeLongName, FR_LIGNE_AND_SHORT_NAME_REPLACEMENT) } val makeShorter = routeLongName.length > ROUTE_LONG_NAME_SHORT_MAX_LENGTH && routeLongName.contains(' ') - routeLongName = cleanString(routeLongName, languages, lowerUCStrings, lowerUCWords, *ignoredUCWords, short = makeShorter, shortMaxLength = ROUTE_LONG_NAME_SHORT_MAX_LENGTH) + routeLongName = cleanString(routeLongName, languages, makeShorter, ROUTE_LONG_NAME_SHORT_MAX_LENGTH, lowerUCStrings, lowerUCWords, *ignoredUCWords) return routeLongName } private const val TRIP_HEADSIGN_SHORT_MAX_LENGTH = 13 + private val STATION_AND_NAME = Regex("(^|\\s*)station\\s+(\\w+)") + private const val STATION_AND_NAME_REPLACEMENT = "$2" + + private val FR_STATION_AND_NAME = Regex("(^|\\s*)station\\s+(\\w+)") + private const val FR_STATION_AND_NAME_REPLACEMENT = "$2" + @JvmOverloads @JvmStatic fun cleanTripHeadsign( @@ -49,7 +60,7 @@ object StringsCleaner { 1, // subway 2, // train/rail -> { - tripHeadsign = CleanUtils.STATION.matcher(tripHeadsign).replaceAll(EMPTY) + tripHeadsign = STATION_AND_NAME.replace(tripHeadsign, STATION_AND_NAME_REPLACEMENT) } } } @@ -57,7 +68,7 @@ object StringsCleaner { when (routeType) { 1, // subway -> { - tripHeadsign = CleanUtils.FR_CA_STATION.matcher(tripHeadsign).replaceAll(EMPTY) + tripHeadsign = FR_STATION_AND_NAME.replace(tripHeadsign, FR_STATION_AND_NAME_REPLACEMENT) } } } @@ -75,7 +86,7 @@ object StringsCleaner { } } val makeShorter = tripHeadsign.length > TRIP_HEADSIGN_SHORT_MAX_LENGTH && tripHeadsign.contains(' ') - tripHeadsign = cleanString(tripHeadsign, languages, lowerUCStrings, lowerUCWords, *ignoredUCWords, short = makeShorter, shortMaxLength = TRIP_HEADSIGN_SHORT_MAX_LENGTH) + tripHeadsign = cleanString(tripHeadsign, languages, makeShorter, TRIP_HEADSIGN_SHORT_MAX_LENGTH, lowerUCStrings, lowerUCWords, *ignoredUCWords) if (tripHeadsign.length > TRIP_HEADSIGN_SHORT_MAX_LENGTH) { tripHeadsign = CleanUtils.cleanSlashes(tripHeadsign, true) } @@ -101,24 +112,31 @@ object StringsCleaner { 1, // subway 2, // train/rail -> { - stopName = CleanUtils.STATION.matcher(stopName).replaceAll(EMPTY) + stopName = STATION_AND_NAME.replace(stopName, STATION_AND_NAME_REPLACEMENT) + } + } + } + if (languages?.contains(Locale.FRENCH) == true) { + when (routeType) { + 1, // subway + -> { + stopName = FR_STATION_AND_NAME.replace(stopName, FR_STATION_AND_NAME_REPLACEMENT) } } - } val makeShorter = stopName.length > STOP_NAME_SHORT_MAX_LENGTH && stopName.contains(' ') - stopName = cleanString(stopName, languages, lowerUCStrings, lowerUCWords, *ignoredUCWords, short = makeShorter, shortMaxLength = STOP_NAME_SHORT_MAX_LENGTH) + stopName = cleanString(stopName, languages, makeShorter, STOP_NAME_SHORT_MAX_LENGTH, lowerUCStrings, lowerUCWords, *ignoredUCWords) return stopName } private fun cleanString( originalString: String, languages: List?, + short: Boolean, + shortMaxLength: Int, lowerUCStrings: Boolean = false, lowerUCWords: Boolean = false, vararg ignoredUCWords: String = emptyArray(), - short: Boolean, - shortMaxLength: Int, ): String { var string = originalString languages?.forEach { language -> @@ -149,12 +167,14 @@ object StringsCleaner { string = CleanUtils.ALL_CHARS_REGEX.replace(string, CleanUtils.ALL_CHARS_REGEX_REPLACEMENT) } } - val capitalize = lowerUCStrings || lowerUCWords // only capitalize if lower case was called languages?.forEach { language -> if (short && string.length > shortMaxLength) { string = CleanUtils.cleanBounds(language, string) } - string = CleanUtils.cleanLabel(language, string, capitalize) + } + val capitalize = (lowerUCStrings || lowerUCWords) && string.none { it.isUpperCase() } // only capitalize if LC enabled & used for this string + languages?.forEachIndexed { index, language -> + string = CleanUtils.cleanLabel(language, string, capitalize && index == 0) // lower case only applied once for the 1st language } return string } diff --git a/src/test/java/org/mtransit/commons/CleanUtilsKtTests.kt b/src/test/java/org/mtransit/commons/CleanUtilsKtTests.kt index 9b6512a..c05e778 100644 --- a/src/test/java/org/mtransit/commons/CleanUtilsKtTests.kt +++ b/src/test/java/org/mtransit/commons/CleanUtilsKtTests.kt @@ -1,5 +1,6 @@ package org.mtransit.commons +import java.util.Locale import kotlin.test.BeforeTest import kotlin.test.Test import kotlin.test.assertEquals @@ -11,6 +12,20 @@ class CleanUtilsKtTests { CommonsApp.setup(false) } + @Test + fun test_toLowerCaseUpperCaseWords() { + "MARY'S".let { + CleanUtils.toLowerCaseUpperCaseWords(Locale.ENGLISH, it) + }.let { + assertEquals("mary's", it) + } + "d'AYLMER".let { + CleanUtils.toLowerCaseUpperCaseWords(Locale.FRENCH, it) + }.let { + assertEquals("d'aylmer", it) + } + } + @Test fun test_keepOrRemoveVia_keepVia() { // Arrange diff --git a/src/test/java/org/mtransit/commons/CleanUtilsTests.java b/src/test/java/org/mtransit/commons/CleanUtilsTests.java index 8d7fa58..c465df7 100644 --- a/src/test/java/org/mtransit/commons/CleanUtilsTests.java +++ b/src/test/java/org/mtransit/commons/CleanUtilsTests.java @@ -495,5 +495,11 @@ public void testRemoveVia_KeepOnlyVia2() { public void test_toLowerCaseUpperCaseStrings() { String result = CleanUtils.toLowerCaseUpperCaseStrings(Locale.ENGLISH, "YMCA ", "YMCA"); assertEquals("YMCA ", result); + + result = CleanUtils.toLowerCaseUpperCaseStrings(Locale.ENGLISH, "1 UPPERCASE, ONLY."); + assertEquals("1 uppercase, only.", result); + + result = CleanUtils.toLowerCaseUpperCaseStrings(Locale.ENGLISH, "2 Uppercase, Capitalized."); + assertEquals("2 Uppercase, Capitalized.", result); } } diff --git a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt index 1faf9a7..51f0fd9 100644 --- a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt +++ b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt @@ -9,7 +9,7 @@ class StringsCleanerTests { @BeforeTest fun setUp() { - CommonsApp.setup(false); + CommonsApp.setup(false) } @Test @@ -43,6 +43,21 @@ class StringsCleanerTests { @Test fun test_cleanRouteLongName() { + "Tenth Line <> Place D'Orléans".let { + StringsCleaner.cleanRouteLongName(it, languages = null, routeType = 3) + }.let { result -> + assertEquals("Tenth Line <> Place D'Orléans", result) + } + + "Line 10".let { + StringsCleaner.cleanRouteLongName(it, languages = null, routeType = 3) + }.let { result -> + assertEquals("Line 10", result) + } + } + + @Test + fun test_cleanRouteLongName_Capitalize() { "Tunney's Pasture <> Bridlewood".let { StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) }.let { result -> @@ -61,8 +76,17 @@ class StringsCleanerTests { "Tunney's Pasture <> Bridlewood".let { StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH, Locale.FRENCH), routeType = 3, lowerUCWords = true) }.let { result -> - assertEquals("Tunney'S Pasture <> Bridlewood", result) // too bad + assertEquals("Tunney's Pasture <> Bridlewood", result) + } + "tunney's pasture <> bridlewood".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH, Locale.FRENCH), routeType = 3, lowerUCWords = true) + }.let { result -> + assertEquals("Tunney's Pasture <> Bridlewood", result) + } + "tunney's pasture <> bridlewood".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH, Locale.FRENCH), routeType = 3) + }.let { result -> + assertEquals("tunney's pasture <> bridlewood", result) } - } } From 617228292f4dae3c29112ae6baadf203ab5aac7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20M=C3=A9a?= Date: Thu, 4 Jun 2026 11:22:37 -0400 Subject: [PATCH 2/9] wip --- .../java/org/mtransit/commons/CleanUtils.java | 4 ++-- .../org/mtransit/commons/StringsCleaner.kt | 10 ++++++---- .../org/mtransit/commons/CleanUtilsKtTests.kt | 5 +++++ .../mtransit/commons/StringsCleanerTests.kt | 18 +++++++++++------- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/mtransit/commons/CleanUtils.java b/src/main/java/org/mtransit/commons/CleanUtils.java index 65e9ae0..00f1a9c 100644 --- a/src/main/java/org/mtransit/commons/CleanUtils.java +++ b/src/main/java/org/mtransit/commons/CleanUtils.java @@ -593,7 +593,7 @@ public static String toLowerCaseUpperCaseWords(@NotNull Locale locale, @NotNull final float charCount = string.length(); final float upperCaseCount = CharUtils.countUpperCase(string); final float percent = upperCaseCount / charCount; - if (percent < .25f) { // 25% + if (percent < .33f) { // 33% return string; } StringBuilder sb = new StringBuilder(); @@ -602,7 +602,7 @@ public static String toLowerCaseUpperCaseWords(@NotNull Locale locale, @NotNull while (matcher.find()) { sb.append(matcher.group(1)); // before final String word = matcher.group(2); - if (!word.isEmpty() + if (word.length() > 1 // do not lower case single letter && CharUtils.isUppercaseOnly(word, false, true) && !CharUtils.isRomanDigits(word) && !containsIgnoreCase(word, ignoreWords)) { diff --git a/src/main/java/org/mtransit/commons/StringsCleaner.kt b/src/main/java/org/mtransit/commons/StringsCleaner.kt index 7e2a589..725761e 100644 --- a/src/main/java/org/mtransit/commons/StringsCleaner.kt +++ b/src/main/java/org/mtransit/commons/StringsCleaner.kt @@ -1,5 +1,6 @@ package org.mtransit.commons +import org.jetbrains.annotations.VisibleForTesting import java.util.Locale object StringsCleaner { @@ -34,7 +35,8 @@ object StringsCleaner { return routeLongName } - private const val TRIP_HEADSIGN_SHORT_MAX_LENGTH = 13 + @VisibleForTesting + internal const val TRIP_HEADSIGN_SHORT_MAX_LENGTH = 13 private val STATION_AND_NAME = Regex("(^|\\s*)station\\s+(\\w+)") private const val STATION_AND_NAME_REPLACEMENT = "$2" @@ -129,7 +131,8 @@ object StringsCleaner { return stopName } - private fun cleanString( + @VisibleForTesting + internal fun cleanString( originalString: String, languages: List?, short: Boolean, @@ -172,9 +175,8 @@ object StringsCleaner { string = CleanUtils.cleanBounds(language, string) } } - val capitalize = (lowerUCStrings || lowerUCWords) && string.none { it.isUpperCase() } // only capitalize if LC enabled & used for this string languages?.forEachIndexed { index, language -> - string = CleanUtils.cleanLabel(language, string, capitalize && index == 0) // lower case only applied once for the 1st language + string = CleanUtils.cleanLabel(language, string, index == 0) // lower case only applied once for the 1st language } return string } diff --git a/src/test/java/org/mtransit/commons/CleanUtilsKtTests.kt b/src/test/java/org/mtransit/commons/CleanUtilsKtTests.kt index c05e778..01941a7 100644 --- a/src/test/java/org/mtransit/commons/CleanUtilsKtTests.kt +++ b/src/test/java/org/mtransit/commons/CleanUtilsKtTests.kt @@ -24,6 +24,11 @@ class CleanUtilsKtTests { }.let { assertEquals("d'aylmer", it) } + "Fallowfield <> Limebank & CFIA ~ ACIA".let { + CleanUtils.toLowerCaseUpperCaseWords(Locale.ENGLISH, it) + }.let { + assertEquals("Fallowfield <> Limebank & CFIA ~ ACIA", it) + } } @Test diff --git a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt index 51f0fd9..9216a7d 100644 --- a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt +++ b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt @@ -39,6 +39,11 @@ class StringsCleanerTests { }.let { result -> assertEquals("Angora/Gascon/Terrebonne/Cégep Terrebonne", result) } + "Bell H.S".let { tripHeadsign -> + StringsCleaner.cleanTripHeadsign(tripHeadsign, languages = listOf(Locale.ENGLISH), routeType = 3, lowerUCWords = true) + }.let { result -> + assertEquals("Bell HS", result) + } } @Test @@ -48,12 +53,16 @@ class StringsCleanerTests { }.let { result -> assertEquals("Tenth Line <> Place D'Orléans", result) } - "Line 10".let { StringsCleaner.cleanRouteLongName(it, languages = null, routeType = 3) }.let { result -> assertEquals("Line 10", result) } + "John McCrae H.S <> Half Moon Bay".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH, Locale.FRENCH), routeType = 3, lowerUCWords = true) + }.let { result -> + assertEquals("John McCrae HS <> Half Moon Bay", result) + } } @Test @@ -63,11 +72,6 @@ class StringsCleanerTests { }.let { result -> assertEquals("Tunney's Pasture <> Bridlewood", result) } - "Tunney's Pasture <> Bridlewood".let { - StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.FRENCH, Locale.ENGLISH), routeType = 3) - }.let { result -> - assertEquals("Tunney's Pasture <> Bridlewood", result) - } "Tunney's Pasture <> Bridlewood".let { StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH, Locale.FRENCH), routeType = 3) }.let { result -> @@ -86,7 +90,7 @@ class StringsCleanerTests { "tunney's pasture <> bridlewood".let { StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH, Locale.FRENCH), routeType = 3) }.let { result -> - assertEquals("tunney's pasture <> bridlewood", result) + assertEquals("Tunney's Pasture <> Bridlewood", result) } } } From 40c52fbf3fa0aa500237b90bfbb0765bc400e272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20M=C3=A9a?= Date: Thu, 4 Jun 2026 12:33:43 -0400 Subject: [PATCH 3/9] PR comments --- .../org/mtransit/commons/StringsCleaner.kt | 18 +++++++++--------- .../mtransit/commons/StringsCleanerTests.kt | 11 ++++++++--- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/mtransit/commons/StringsCleaner.kt b/src/main/java/org/mtransit/commons/StringsCleaner.kt index 725761e..1927260 100644 --- a/src/main/java/org/mtransit/commons/StringsCleaner.kt +++ b/src/main/java/org/mtransit/commons/StringsCleaner.kt @@ -7,11 +7,11 @@ object StringsCleaner { private const val ROUTE_LONG_NAME_SHORT_MAX_LENGTH = 33 - private val LINE_AND_SHORT_NAME = Regex("(^|\\s*)line\\s+(\\w+)") - private const val LINE_AND_SHORT_NAME_REPLACEMENT = "$2" + private val LINE_AND_SHORT_NAME = Regex("(^|\\s*)line\\s+(\\w+)", RegexOption.IGNORE_CASE) + private const val LINE_AND_SHORT_NAME_REPLACEMENT = "$1$2" - private val FR_LIGNE_AND_SHORT_NAME = Regex("(^|\\s*)ligne\\s+(\\w+)") - private const val FR_LIGNE_AND_SHORT_NAME_REPLACEMENT = "$2" + private val FR_LIGNE_AND_SHORT_NAME = Regex("(^|\\s*)ligne\\s+(\\w+)", RegexOption.IGNORE_CASE) + private const val FR_LIGNE_AND_SHORT_NAME_REPLACEMENT = "$1$2" @JvmOverloads @JvmStatic @@ -38,11 +38,11 @@ object StringsCleaner { @VisibleForTesting internal const val TRIP_HEADSIGN_SHORT_MAX_LENGTH = 13 - private val STATION_AND_NAME = Regex("(^|\\s*)station\\s+(\\w+)") - private const val STATION_AND_NAME_REPLACEMENT = "$2" + private val STATION_AND_NAME = Regex("(^|\\s*)station\\s+(\\w+)", RegexOption.IGNORE_CASE) + private const val STATION_AND_NAME_REPLACEMENT = "$1$2" - private val FR_STATION_AND_NAME = Regex("(^|\\s*)station\\s+(\\w+)") - private const val FR_STATION_AND_NAME_REPLACEMENT = "$2" + private val FR_STATION_AND_NAME = Regex("(^|\\s*)station\\s+(\\w+)", RegexOption.IGNORE_CASE) + private const val FR_STATION_AND_NAME_REPLACEMENT = "$1$2" @JvmOverloads @JvmStatic @@ -176,7 +176,7 @@ object StringsCleaner { } } languages?.forEachIndexed { index, language -> - string = CleanUtils.cleanLabel(language, string, index == 0) // lower case only applied once for the 1st language + string = CleanUtils.cleanLabel(language, string, index == 0) // capitalize only first language } return string } diff --git a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt index 9216a7d..d87c91a 100644 --- a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt +++ b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt @@ -49,14 +49,19 @@ class StringsCleanerTests { @Test fun test_cleanRouteLongName() { "Tenth Line <> Place D'Orléans".let { - StringsCleaner.cleanRouteLongName(it, languages = null, routeType = 3) + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) }.let { result -> assertEquals("Tenth Line <> Place D'Orléans", result) } "Line 10".let { - StringsCleaner.cleanRouteLongName(it, languages = null, routeType = 3) + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) + }.let { result -> + assertEquals("10", result) + } + "The Line 10".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) }.let { result -> - assertEquals("Line 10", result) + assertEquals("The 10", result) } "John McCrae H.S <> Half Moon Bay".let { StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH, Locale.FRENCH), routeType = 3, lowerUCWords = true) From a6d2f0fa93f0529526ed32a01148e5963dd75ef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20M=C3=A9a?= Date: Thu, 4 Jun 2026 12:37:41 -0400 Subject: [PATCH 4/9] PR comments --- src/main/java/org/mtransit/commons/StringsCleaner.kt | 8 ++++---- .../java/org/mtransit/commons/StringsCleanerTests.kt | 10 ++++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/mtransit/commons/StringsCleaner.kt b/src/main/java/org/mtransit/commons/StringsCleaner.kt index 1927260..6bb486a 100644 --- a/src/main/java/org/mtransit/commons/StringsCleaner.kt +++ b/src/main/java/org/mtransit/commons/StringsCleaner.kt @@ -7,10 +7,10 @@ object StringsCleaner { private const val ROUTE_LONG_NAME_SHORT_MAX_LENGTH = 33 - private val LINE_AND_SHORT_NAME = Regex("(^|\\s*)line\\s+(\\w+)", RegexOption.IGNORE_CASE) + private val LINE_AND_SHORT_NAME = Regex("(^|\\s+)line\\s+(\\w+)", RegexOption.IGNORE_CASE) private const val LINE_AND_SHORT_NAME_REPLACEMENT = "$1$2" - private val FR_LIGNE_AND_SHORT_NAME = Regex("(^|\\s*)ligne\\s+(\\w+)", RegexOption.IGNORE_CASE) + private val FR_LIGNE_AND_SHORT_NAME = Regex("(^|\\s+)ligne\\s+(\\w+)", RegexOption.IGNORE_CASE) private const val FR_LIGNE_AND_SHORT_NAME_REPLACEMENT = "$1$2" @JvmOverloads @@ -38,10 +38,10 @@ object StringsCleaner { @VisibleForTesting internal const val TRIP_HEADSIGN_SHORT_MAX_LENGTH = 13 - private val STATION_AND_NAME = Regex("(^|\\s*)station\\s+(\\w+)", RegexOption.IGNORE_CASE) + private val STATION_AND_NAME = Regex("(^|\\s+)station\\s+(\\w+)", RegexOption.IGNORE_CASE) private const val STATION_AND_NAME_REPLACEMENT = "$1$2" - private val FR_STATION_AND_NAME = Regex("(^|\\s*)station\\s+(\\w+)", RegexOption.IGNORE_CASE) + private val FR_STATION_AND_NAME = Regex("(^|\\s+)station\\s+(\\w+)", RegexOption.IGNORE_CASE) private const val FR_STATION_AND_NAME_REPLACEMENT = "$1$2" @JvmOverloads diff --git a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt index d87c91a..da5bb52 100644 --- a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt +++ b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt @@ -58,11 +58,21 @@ class StringsCleanerTests { }.let { result -> assertEquals("10", result) } + " Line 10".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) + }.let { result -> + assertEquals("10", result) + } "The Line 10".let { StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) }.let { result -> assertEquals("The 10", result) } + "Online 10".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) + }.let { result -> + assertEquals("Online 10", result) + } "John McCrae H.S <> Half Moon Bay".let { StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH, Locale.FRENCH), routeType = 3, lowerUCWords = true) }.let { result -> From b397574c60d328482c00f9590a88a27ec43d8129 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20M=C3=A9a?= Date: Thu, 4 Jun 2026 12:46:34 -0400 Subject: [PATCH 5/9] clean --- src/main/java/org/mtransit/commons/StringsCleaner.kt | 4 ++-- .../java/org/mtransit/commons/StringsCleanerTests.kt | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/mtransit/commons/StringsCleaner.kt b/src/main/java/org/mtransit/commons/StringsCleaner.kt index 6bb486a..e069fbe 100644 --- a/src/main/java/org/mtransit/commons/StringsCleaner.kt +++ b/src/main/java/org/mtransit/commons/StringsCleaner.kt @@ -175,8 +175,8 @@ object StringsCleaner { string = CleanUtils.cleanBounds(language, string) } } - languages?.forEachIndexed { index, language -> - string = CleanUtils.cleanLabel(language, string, index == 0) // capitalize only first language + languages?.firstOrNull()?.let { language -> + string = CleanUtils.cleanLabel(language, string, true) // only 1st language } return string } diff --git a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt index da5bb52..0ed366f 100644 --- a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt +++ b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt @@ -12,6 +12,15 @@ class StringsCleanerTests { CommonsApp.setup(false) } + @Test + fun test_cleanStopName() { + "Station Mont-Royal".let { stopName -> + StringsCleaner.cleanStopName(stopName, languages = listOf(Locale.FRENCH), routeType = 1) // subway + }.let { result -> + assertEquals("Mont-Royal", result) + } + } + @Test fun test_cleanTripHeadsign() { "Terrebonne / Mascouche".let { tripHeadsign -> From 7718dee960b6aa2f3f6c8d48ff2a38d152716da4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20M=C3=A9a?= Date: Thu, 4 Jun 2026 13:19:49 -0400 Subject: [PATCH 6/9] fix --- src/main/java/org/mtransit/commons/StringsCleaner.kt | 8 ++++---- .../commons/provider/OttawaOCTranspoProviderCommons.kt | 2 +- src/test/java/org/mtransit/commons/StringsCleanerTests.kt | 5 +++++ 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/mtransit/commons/StringsCleaner.kt b/src/main/java/org/mtransit/commons/StringsCleaner.kt index e069fbe..210ef1a 100644 --- a/src/main/java/org/mtransit/commons/StringsCleaner.kt +++ b/src/main/java/org/mtransit/commons/StringsCleaner.kt @@ -7,10 +7,10 @@ object StringsCleaner { private const val ROUTE_LONG_NAME_SHORT_MAX_LENGTH = 33 - private val LINE_AND_SHORT_NAME = Regex("(^|\\s+)line\\s+(\\w+)", RegexOption.IGNORE_CASE) + private val LINE_AND_SHORT_NAME = Regex("""(?U)(^|\s+)line\s+(\w+)""", RegexOption.IGNORE_CASE) private const val LINE_AND_SHORT_NAME_REPLACEMENT = "$1$2" - private val FR_LIGNE_AND_SHORT_NAME = Regex("(^|\\s+)ligne\\s+(\\w+)", RegexOption.IGNORE_CASE) + private val FR_LIGNE_AND_SHORT_NAME = Regex("""(?U)(^|\s+)ligne\s+(\w+)""", RegexOption.IGNORE_CASE) private const val FR_LIGNE_AND_SHORT_NAME_REPLACEMENT = "$1$2" @JvmOverloads @@ -38,10 +38,10 @@ object StringsCleaner { @VisibleForTesting internal const val TRIP_HEADSIGN_SHORT_MAX_LENGTH = 13 - private val STATION_AND_NAME = Regex("(^|\\s+)station\\s+(\\w+)", RegexOption.IGNORE_CASE) + private val STATION_AND_NAME = Regex("""(?U)(^|\s+)station\s+(\w+)""", RegexOption.IGNORE_CASE) private const val STATION_AND_NAME_REPLACEMENT = "$1$2" - private val FR_STATION_AND_NAME = Regex("(^|\\s+)station\\s+(\\w+)", RegexOption.IGNORE_CASE) + private val FR_STATION_AND_NAME = Regex("""(?U)(^|\s+)station\s+(\w+)""", RegexOption.IGNORE_CASE) private const val FR_STATION_AND_NAME_REPLACEMENT = "$1$2" @JvmOverloads diff --git a/src/main/java/org/mtransit/commons/provider/OttawaOCTranspoProviderCommons.kt b/src/main/java/org/mtransit/commons/provider/OttawaOCTranspoProviderCommons.kt index c7666b3..5df23c3 100644 --- a/src/main/java/org/mtransit/commons/provider/OttawaOCTranspoProviderCommons.kt +++ b/src/main/java/org/mtransit/commons/provider/OttawaOCTranspoProviderCommons.kt @@ -10,7 +10,7 @@ object OttawaOCTranspoProviderCommons { private val FIX_CAIRINE_WILSON_ = CleanUtils.cleanWords("carine wilson") private val FIX_CAIRINE_WILSON_REPLACEMENT = CleanUtils.cleanWordsReplacement("Cairine Wilson") - private val REMOVE_SECOND_LANGUAGE = Pattern.compile("( ~ .*$)") // FIXME i18n head-signs + private val REMOVE_SECOND_LANGUAGE = Pattern.compile("(\\s+~\\s+[^<>]+?)(?=\\s*<>|$)") // FIXME i18n head-signs @JvmStatic fun cleanTripHeadsign(tripHeadSign: String, @Suppress("unused") vararg ignoreWords: String): String { diff --git a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt index 0ed366f..2f8d745 100644 --- a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt +++ b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt @@ -19,6 +19,11 @@ class StringsCleanerTests { }.let { result -> assertEquals("Mont-Royal", result) } + "Station Édouard-Montpetit".let { stopName -> + StringsCleaner.cleanStopName(stopName, languages = listOf(Locale.FRENCH), routeType = 1) // subway + }.let { result -> + assertEquals("Édouard-Montpetit", result) + } } @Test From d64d02ad4e4c917f0316f075496c67ecc9ab6e6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20M=C3=A9a?= Date: Thu, 4 Jun 2026 13:32:20 -0400 Subject: [PATCH 7/9] cleanup --- .../commons/provider/OttawaOCTranspoProviderCommons.kt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/mtransit/commons/provider/OttawaOCTranspoProviderCommons.kt b/src/main/java/org/mtransit/commons/provider/OttawaOCTranspoProviderCommons.kt index 5df23c3..6597eed 100644 --- a/src/main/java/org/mtransit/commons/provider/OttawaOCTranspoProviderCommons.kt +++ b/src/main/java/org/mtransit/commons/provider/OttawaOCTranspoProviderCommons.kt @@ -1,21 +1,20 @@ package org.mtransit.commons.provider import org.mtransit.commons.CleanUtils -import org.mtransit.commons.StringUtils.EMPTY import java.util.Locale -import java.util.regex.Pattern object OttawaOCTranspoProviderCommons { private val FIX_CAIRINE_WILSON_ = CleanUtils.cleanWords("carine wilson") private val FIX_CAIRINE_WILSON_REPLACEMENT = CleanUtils.cleanWordsReplacement("Cairine Wilson") - private val REMOVE_SECOND_LANGUAGE = Pattern.compile("(\\s+~\\s+[^<>]+?)(?=\\s*<>|$)") // FIXME i18n head-signs + private val REMOVE_SECOND_LANGUAGE = """(?U)(\s+~\s+[^<>]+?)(?=\s*<>|$)""".toRegex(RegexOption.IGNORE_CASE) // FIXME i18n head-signs + private const val REMOVE_SECOND_LANGUAGE_REPLACEMENT = "$2$3$5" @JvmStatic fun cleanTripHeadsign(tripHeadSign: String, @Suppress("unused") vararg ignoreWords: String): String { var newTripHeadSign = tripHeadSign - newTripHeadSign = REMOVE_SECOND_LANGUAGE.matcher(newTripHeadSign).replaceAll(EMPTY) + newTripHeadSign = REMOVE_SECOND_LANGUAGE.replace(newTripHeadSign, REMOVE_SECOND_LANGUAGE_REPLACEMENT) newTripHeadSign = FIX_CAIRINE_WILSON_.matcher(newTripHeadSign).replaceAll(FIX_CAIRINE_WILSON_REPLACEMENT) newTripHeadSign = CleanUtils.fixMcXCase(newTripHeadSign) newTripHeadSign = CleanUtils.cleanBounds(newTripHeadSign) @@ -23,4 +22,4 @@ object OttawaOCTranspoProviderCommons { newTripHeadSign = CleanUtils.cleanLabel(Locale.ENGLISH, newTripHeadSign) return newTripHeadSign } -} \ No newline at end of file +} From 7387490fb8976a25cf052f21df2d882d5c01d51a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20M=C3=A9a?= Date: Thu, 4 Jun 2026 13:35:15 -0400 Subject: [PATCH 8/9] revert --- src/main/java/org/mtransit/commons/Constants.kt | 4 ++-- src/main/java/org/mtransit/scratch/RegexScratch.kt | 13 ------------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/mtransit/commons/Constants.kt b/src/main/java/org/mtransit/commons/Constants.kt index d0014a9..857fc11 100644 --- a/src/main/java/org/mtransit/commons/Constants.kt +++ b/src/main/java/org/mtransit/commons/Constants.kt @@ -4,8 +4,8 @@ package org.mtransit.commons object Constants { @JvmStatic - val DEBUG = true // DEBUG - // val DEBUG = false + // val DEBUG = true // DEBUG + val DEBUG = false const val NEW_LINE = '\n' const val SPACE = ' ' diff --git a/src/main/java/org/mtransit/scratch/RegexScratch.kt b/src/main/java/org/mtransit/scratch/RegexScratch.kt index 7e3cce4..b3c8b6e 100644 --- a/src/main/java/org/mtransit/scratch/RegexScratch.kt +++ b/src/main/java/org/mtransit/scratch/RegexScratch.kt @@ -8,20 +8,11 @@ internal object RegexScratch { fun main(args: Array) { var regex: String regex = "" - // regex = "(^|\\s*)line (\\w+)" - regex = "((^|(?<=<>)\\s+)([^~]+)\\s+~\\s+([^~<]+)(\\s+(?=<>)|$))" - // regex = "((^|<>\\s+)([^~]+)\\s+~\\s+([^~<]+)(\\s+<>|$))" println("regex: '$regex'.") val pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE) var string: String string = "" - // string = "line ABC (b+)" - // En-en ~ Fr-fr <> - string = "Test <> Nepean South ~ Nepean Sud" - // string = "Nepean South ~ Nepean Sud <> Test" - // string = "Nepean South ~ Nepean Sud <> Nepean North ~ Nepean Nord" - // string = "Shuttle-Express Downtown ~ Navette-Express Ctr-Ville <> Shuttle-Express Blair ~ Navette-Express Blair" println("string: '$string'.") println("=======") @@ -38,10 +29,6 @@ internal object RegexScratch { println("=======") var replaceAll: String replaceAll = "" - // replaceAll = "$2" - replaceAll = "$2$3$5" - // replaceAll = "$3$5" - // replaceAll == "Shuttle-Express Downtown <> Shuttle-Express Blair" println("replaceAll: '$replaceAll'.") println("-> '" + pattern.matcher(string).replaceAll(replaceAll) + "'.") } From 95eed240f6f78a86466cbff5824064b68f2ea0c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20M=C3=A9a?= Date: Thu, 4 Jun 2026 13:38:30 -0400 Subject: [PATCH 9/9] wip --- .../mtransit/commons/provider/OttawaOCTranspoProviderCommons.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/mtransit/commons/provider/OttawaOCTranspoProviderCommons.kt b/src/main/java/org/mtransit/commons/provider/OttawaOCTranspoProviderCommons.kt index 6597eed..38e04be 100644 --- a/src/main/java/org/mtransit/commons/provider/OttawaOCTranspoProviderCommons.kt +++ b/src/main/java/org/mtransit/commons/provider/OttawaOCTranspoProviderCommons.kt @@ -8,7 +8,7 @@ object OttawaOCTranspoProviderCommons { private val FIX_CAIRINE_WILSON_ = CleanUtils.cleanWords("carine wilson") private val FIX_CAIRINE_WILSON_REPLACEMENT = CleanUtils.cleanWordsReplacement("Cairine Wilson") - private val REMOVE_SECOND_LANGUAGE = """(?U)(\s+~\s+[^<>]+?)(?=\s*<>|$)""".toRegex(RegexOption.IGNORE_CASE) // FIXME i18n head-signs + private val REMOVE_SECOND_LANGUAGE = """((^|(?<=<>)\s+)([^~]+)\s+~\s+([^~<]+)(\s+(?=<>)|$))""".toRegex(RegexOption.IGNORE_CASE) // FIXME i18n head-signs private const val REMOVE_SECOND_LANGUAGE_REPLACEMENT = "$2$3$5" @JvmStatic