From 7d73b7c9a1bceef26c8b6a7402c775fb6ccf6ac4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20M=C3=A9a?= Date: Wed, 17 Jun 2026 09:41:51 -0400 Subject: [PATCH 1/5] String cleaners > line, station... --- .../org/mtransit/commons/StringsCleaner.kt | 4 +-- .../java/org/mtransit/scratch/RegexScratch.kt | 16 +++++----- .../mtransit/commons/StringsCleanerTests.kt | 30 +++++++++++++++++++ 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/mtransit/commons/StringsCleaner.kt b/src/main/java/org/mtransit/commons/StringsCleaner.kt index 194958e..7e2a0ce 100644 --- a/src/main/java/org/mtransit/commons/StringsCleaner.kt +++ b/src/main/java/org/mtransit/commons/StringsCleaner.kt @@ -7,8 +7,8 @@ object StringsCleaner { private const val ROUTE_LONG_NAME_SHORT_MAX_LENGTH = 33 - private val LINE_AND_SHORT_NAME = Regex("""(?U)(^|\s+)line\s+(\w+)""", RegexOption.IGNORE_CASE) - private const val LINE_AND_SHORT_NAME_REPLACEMENT = "$1$2" + private val LINE_AND_SHORT_NAME = Regex("""(?U)((^|\s+)line\s+(\w+))|(^((\w+(-|\s+|'))*)(\w+)\s+line(\s*$))""", RegexOption.IGNORE_CASE) + private const val LINE_AND_SHORT_NAME_REPLACEMENT = "$2$3$5$8" private val FR_LIGNE_AND_SHORT_NAME = Regex("""(?U)(^|\s+)ligne\s+(\w+)""", RegexOption.IGNORE_CASE) private const val FR_LIGNE_AND_SHORT_NAME_REPLACEMENT = "$1$2" diff --git a/src/main/java/org/mtransit/scratch/RegexScratch.kt b/src/main/java/org/mtransit/scratch/RegexScratch.kt index b3c8b6e..efc4996 100644 --- a/src/main/java/org/mtransit/scratch/RegexScratch.kt +++ b/src/main/java/org/mtransit/scratch/RegexScratch.kt @@ -1,27 +1,27 @@ package org.mtransit.scratch -import java.util.regex.Pattern +import org.intellij.lang.annotations.Language @Suppress("JoinDeclarationAndAssignment", "CanBeVal", "UNUSED_VALUE", "KotlinRedundantDiagnosticSuppress") internal object RegexScratch { @JvmStatic fun main(args: Array) { + @Language("RegExp") var regex: String regex = "" println("regex: '$regex'.") - val pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE) + val pattern = Regex(regex, RegexOption.IGNORE_CASE) var string: String string = "" println("string: '$string'.") println("=======") - val matcher = pattern.matcher(string) - while (matcher.find()) { + pattern.findAll(string).forEach { matchResult -> println("-------") - println("- Found group: '" + matcher.group() + "' (count: '" + matcher.groupCount() + "'):") - for (g in 0..matcher.groupCount()) { - println(" - group[" + g + "]: '" + matcher.group(g) + "'.") + println("- Found group: '${matchResult.value}' (count: '${matchResult.groups.size}'):") + matchResult.groupValues.forEachIndexed { g, groupValue -> + println(" - group[$g]: '$groupValue'.") } println("-------") } @@ -30,6 +30,6 @@ internal object RegexScratch { var replaceAll: String replaceAll = "" println("replaceAll: '$replaceAll'.") - println("-> '" + pattern.matcher(string).replaceAll(replaceAll) + "'.") + println("-> '${pattern.replace(string, replaceAll)}'.") } } \ No newline at end of file diff --git a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt index 2f8d745..d99c552 100644 --- a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt +++ b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt @@ -62,11 +62,36 @@ class StringsCleanerTests { @Test fun test_cleanRouteLongName() { + "Yonge-University Line".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 1) + }.let { result -> + assertEquals("Yonge-University", result) + } "Tenth Line <> Place D'Orléans".let { StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) }.let { result -> assertEquals("Tenth Line <> Place D'Orléans", result) } + "Place D'Orléans <> Tenth Line".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) + }.let { result -> + assertEquals("Place D'Orléans <> Tenth Line", result) + } + "Tenth Line".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) + }.let { result -> + assertEquals("Tenth", result) + } + "Place Orléans Line".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) + }.let { result -> + assertEquals("Place Orléans", result) + } + "Place D'Orléans Line".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) + }.let { result -> + assertEquals("Place D'Orléans", result) + } "Line 10".let { StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) }.let { result -> @@ -92,6 +117,11 @@ class StringsCleanerTests { }.let { result -> assertEquals("John McCrae HS <> Half Moon Bay", result) } + "Ligne Bleue".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.FRENCH), routeType = 1) + }.let { result -> + assertEquals("Bleue", result) + } } @Test From 493240d58e5c8265c29985d67ed7f3a2ab6a74f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20M=C3=A9a?= Date: Wed, 17 Jun 2026 10:12:07 -0400 Subject: [PATCH 2/5] wip --- src/main/java/org/mtransit/commons/StringsCleaner.kt | 4 ++-- src/test/java/org/mtransit/commons/StringsCleanerTests.kt | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/mtransit/commons/StringsCleaner.kt b/src/main/java/org/mtransit/commons/StringsCleaner.kt index 7e2a0ce..4fd2893 100644 --- a/src/main/java/org/mtransit/commons/StringsCleaner.kt +++ b/src/main/java/org/mtransit/commons/StringsCleaner.kt @@ -38,8 +38,8 @@ object StringsCleaner { @VisibleForTesting internal const val TRIP_HEADSIGN_SHORT_MAX_LENGTH = 13 - private val STATION_AND_NAME = Regex("""(?U)(^|\s+)station\s+(\w+)""", RegexOption.IGNORE_CASE) - private const val STATION_AND_NAME_REPLACEMENT = "$1$2" + private val STATION_AND_NAME = Regex("""(?U)((^|\s+)station\s+(\w+))|(^((\w+(-|\s+|'))*)(\w+)\s+station(\s*$))""", RegexOption.IGNORE_CASE) + private const val STATION_AND_NAME_REPLACEMENT = "$2$3$5$8" private val FR_STATION_AND_NAME = Regex("""(?U)(^|\s+)station\s+(\w+)""", RegexOption.IGNORE_CASE) private const val FR_STATION_AND_NAME_REPLACEMENT = "$1$2" diff --git a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt index d99c552..fdc6809 100644 --- a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt +++ b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt @@ -24,6 +24,11 @@ class StringsCleanerTests { }.let { result -> assertEquals("Édouard-Montpetit", result) } + "Union Station".let { stopName -> + StringsCleaner.cleanStopName(stopName, languages = listOf(Locale.ENGLISH), routeType = 1) // subway + }.let { result -> + assertEquals("Union", result) + } } @Test From 030e2b97b47d9e306d09ef32124d2adcff52ff64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20M=C3=A9a?= Date: Wed, 17 Jun 2026 10:32:10 -0400 Subject: [PATCH 3/5] PR comments --- .../java/org/mtransit/commons/StringsCleaner.kt | 4 ++-- .../org/mtransit/commons/StringsCleanerTests.kt | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/mtransit/commons/StringsCleaner.kt b/src/main/java/org/mtransit/commons/StringsCleaner.kt index 4fd2893..c66f95b 100644 --- a/src/main/java/org/mtransit/commons/StringsCleaner.kt +++ b/src/main/java/org/mtransit/commons/StringsCleaner.kt @@ -7,7 +7,7 @@ object StringsCleaner { private const val ROUTE_LONG_NAME_SHORT_MAX_LENGTH = 33 - private val LINE_AND_SHORT_NAME = Regex("""(?U)((^|\s+)line\s+(\w+))|(^((\w+(-|\s+|'))*)(\w+)\s+line(\s*$))""", RegexOption.IGNORE_CASE) + private val LINE_AND_SHORT_NAME = Regex("""(?U)((^|\s+)line\s+(\w+))|(^((\w+(-|\s+|'|.\s+))*)(\w+)\s+line(\s*$))""", RegexOption.IGNORE_CASE) private const val LINE_AND_SHORT_NAME_REPLACEMENT = "$2$3$5$8" private val FR_LIGNE_AND_SHORT_NAME = Regex("""(?U)(^|\s+)ligne\s+(\w+)""", RegexOption.IGNORE_CASE) @@ -38,7 +38,7 @@ object StringsCleaner { @VisibleForTesting internal const val TRIP_HEADSIGN_SHORT_MAX_LENGTH = 13 - private val STATION_AND_NAME = Regex("""(?U)((^|\s+)station\s+(\w+))|(^((\w+(-|\s+|'))*)(\w+)\s+station(\s*$))""", RegexOption.IGNORE_CASE) + private val STATION_AND_NAME = Regex("""(?U)((^|\s+)station\s+(\w+))|(^((\w+(-|\s+|'|.\s+))*)(\w+)\s+station(\s*$))""", RegexOption.IGNORE_CASE) private const val STATION_AND_NAME_REPLACEMENT = "$2$3$5$8" private val FR_STATION_AND_NAME = Regex("""(?U)(^|\s+)station\s+(\w+)""", RegexOption.IGNORE_CASE) diff --git a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt index fdc6809..e0abf51 100644 --- a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt +++ b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt @@ -29,6 +29,11 @@ class StringsCleanerTests { }.let { result -> assertEquals("Union", result) } + "St. George Station".let { stopName -> + StringsCleaner.cleanStopName(stopName, languages = listOf(Locale.ENGLISH), routeType = 1) // subway + }.let { result -> + assertEquals("St George", result) + } } @Test @@ -72,6 +77,16 @@ class StringsCleanerTests { }.let { result -> assertEquals("Yonge-University", result) } + "Yonge - University Line".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 1) + }.let { result -> + assertEquals("Yonge - University Line", result) + } + "St. Clair Line".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 1) + }.let { result -> + assertEquals("St Clair", result) + } "Tenth Line <> Place D'Orléans".let { StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) }.let { result -> From 90188e11dda2bbd75b84860d250b46088537d50a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20M=C3=A9a?= Date: Wed, 17 Jun 2026 13:06:11 -0400 Subject: [PATCH 4/5] PR comments --- src/main/java/org/mtransit/commons/StringsCleaner.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/mtransit/commons/StringsCleaner.kt b/src/main/java/org/mtransit/commons/StringsCleaner.kt index c66f95b..fe53c32 100644 --- a/src/main/java/org/mtransit/commons/StringsCleaner.kt +++ b/src/main/java/org/mtransit/commons/StringsCleaner.kt @@ -7,7 +7,7 @@ object StringsCleaner { private const val ROUTE_LONG_NAME_SHORT_MAX_LENGTH = 33 - private val LINE_AND_SHORT_NAME = Regex("""(?U)((^|\s+)line\s+(\w+))|(^((\w+(-|\s+|'|.\s+))*)(\w+)\s+line(\s*$))""", RegexOption.IGNORE_CASE) + private val LINE_AND_SHORT_NAME = Regex("""(?U)((^|\s+)line\s+(\w+))|(^((\w+(-|\s+(?!line\b)|'|.\s+(?!line\b)))*+)(\w+)\s+line(\s*$))""", RegexOption.IGNORE_CASE) private const val LINE_AND_SHORT_NAME_REPLACEMENT = "$2$3$5$8" private val FR_LIGNE_AND_SHORT_NAME = Regex("""(?U)(^|\s+)ligne\s+(\w+)""", RegexOption.IGNORE_CASE) @@ -38,7 +38,7 @@ object StringsCleaner { @VisibleForTesting internal const val TRIP_HEADSIGN_SHORT_MAX_LENGTH = 13 - private val STATION_AND_NAME = Regex("""(?U)((^|\s+)station\s+(\w+))|(^((\w+(-|\s+|'|.\s+))*)(\w+)\s+station(\s*$))""", RegexOption.IGNORE_CASE) + private val STATION_AND_NAME = Regex("""(?U)((^|\s+)station\s+(\w+))|(^((\w+(-|\s+(?!station\b)|'|.\s+(?!station\b)))*+)(\w+)\s+station(\s*$))""", RegexOption.IGNORE_CASE) private const val STATION_AND_NAME_REPLACEMENT = "$2$3$5$8" private val FR_STATION_AND_NAME = Regex("""(?U)(^|\s+)station\s+(\w+)""", RegexOption.IGNORE_CASE) From a133676d9cbf451e6e9638eed8298c3ebd0062a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20M=C3=A9a?= Date: Wed, 17 Jun 2026 13:13:37 -0400 Subject: [PATCH 5/5] PR comments --- .../org/mtransit/commons/StringsCleaner.kt | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/mtransit/commons/StringsCleaner.kt b/src/main/java/org/mtransit/commons/StringsCleaner.kt index fe53c32..044dc1c 100644 --- a/src/main/java/org/mtransit/commons/StringsCleaner.kt +++ b/src/main/java/org/mtransit/commons/StringsCleaner.kt @@ -7,7 +7,20 @@ object StringsCleaner { private const val ROUTE_LONG_NAME_SHORT_MAX_LENGTH = 33 - private val LINE_AND_SHORT_NAME = Regex("""(?U)((^|\s+)line\s+(\w+))|(^((\w+(-|\s+(?!line\b)|'|.\s+(?!line\b)))*+)(\w+)\s+line(\s*$))""", RegexOption.IGNORE_CASE) + private val LINE_AND_SHORT_NAME = Regex( + """(?Ux) + # Alternative 1: "line " at start or after space + ( (^|\s+) line \s+ (\w+) ) + | + # Alternative 2: " line" at the end of the string + ( + ^ + ( ( \w+ ( - | \s+(?!line\b) | ' | \.\s+(?!line\b) ) )*+ ) + ( \w+ ) \s+ line ( \s* $ ) + ) + """.trimIndent(), + RegexOption.IGNORE_CASE + ) private const val LINE_AND_SHORT_NAME_REPLACEMENT = "$2$3$5$8" private val FR_LIGNE_AND_SHORT_NAME = Regex("""(?U)(^|\s+)ligne\s+(\w+)""", RegexOption.IGNORE_CASE) @@ -38,7 +51,20 @@ object StringsCleaner { @VisibleForTesting internal const val TRIP_HEADSIGN_SHORT_MAX_LENGTH = 13 - private val STATION_AND_NAME = Regex("""(?U)((^|\s+)station\s+(\w+))|(^((\w+(-|\s+(?!station\b)|'|.\s+(?!station\b)))*+)(\w+)\s+station(\s*$))""", RegexOption.IGNORE_CASE) + private val STATION_AND_NAME = Regex( + """(?Ux) + # Alternative 1: "station " at start or after space + ( (^|\s+) station \s+ (\w+) ) + | + # Alternative 2: " station" at the end of the string + ( + ^ + ( ( \w+ ( - | \s+(?!station\b) | ' | \.\s+(?!station\b) ) )*+ ) + ( \w+ ) \s+ station ( \s* $ ) + ) + """.trimIndent(), + RegexOption.IGNORE_CASE + ) private const val STATION_AND_NAME_REPLACEMENT = "$2$3$5$8" private val FR_STATION_AND_NAME = Regex("""(?U)(^|\s+)station\s+(\w+)""", RegexOption.IGNORE_CASE)