diff --git a/src/main/java/org/mtransit/commons/StringsCleaner.kt b/src/main/java/org/mtransit/commons/StringsCleaner.kt index 194958e..044dc1c 100644 --- a/src/main/java/org/mtransit/commons/StringsCleaner.kt +++ b/src/main/java/org/mtransit/commons/StringsCleaner.kt @@ -7,8 +7,21 @@ object StringsCleaner { private const val ROUTE_LONG_NAME_SHORT_MAX_LENGTH = 33 - private val LINE_AND_SHORT_NAME = Regex("""(?U)(^|\s+)line\s+(\w+)""", RegexOption.IGNORE_CASE) - private const val LINE_AND_SHORT_NAME_REPLACEMENT = "$1$2" + private val LINE_AND_SHORT_NAME = Regex( + """(?Ux) + # Alternative 1: "line " at start or after space + ( (^|\s+) line \s+ (\w+) ) + | + # Alternative 2: " line" at the end of the string + ( + ^ + ( ( \w+ ( - | \s+(?!line\b) | ' | \.\s+(?!line\b) ) )*+ ) + ( \w+ ) \s+ line ( \s* $ ) + ) + """.trimIndent(), + RegexOption.IGNORE_CASE + ) + private const val LINE_AND_SHORT_NAME_REPLACEMENT = "$2$3$5$8" private val FR_LIGNE_AND_SHORT_NAME = Regex("""(?U)(^|\s+)ligne\s+(\w+)""", RegexOption.IGNORE_CASE) private const val FR_LIGNE_AND_SHORT_NAME_REPLACEMENT = "$1$2" @@ -38,8 +51,21 @@ object StringsCleaner { @VisibleForTesting internal const val TRIP_HEADSIGN_SHORT_MAX_LENGTH = 13 - private val STATION_AND_NAME = Regex("""(?U)(^|\s+)station\s+(\w+)""", RegexOption.IGNORE_CASE) - private const val STATION_AND_NAME_REPLACEMENT = "$1$2" + private val STATION_AND_NAME = Regex( + """(?Ux) + # Alternative 1: "station " at start or after space + ( (^|\s+) station \s+ (\w+) ) + | + # Alternative 2: " station" at the end of the string + ( + ^ + ( ( \w+ ( - | \s+(?!station\b) | ' | \.\s+(?!station\b) ) )*+ ) + ( \w+ ) \s+ station ( \s* $ ) + ) + """.trimIndent(), + RegexOption.IGNORE_CASE + ) + private const val STATION_AND_NAME_REPLACEMENT = "$2$3$5$8" private val FR_STATION_AND_NAME = Regex("""(?U)(^|\s+)station\s+(\w+)""", RegexOption.IGNORE_CASE) private const val FR_STATION_AND_NAME_REPLACEMENT = "$1$2" diff --git a/src/main/java/org/mtransit/scratch/RegexScratch.kt b/src/main/java/org/mtransit/scratch/RegexScratch.kt index b3c8b6e..efc4996 100644 --- a/src/main/java/org/mtransit/scratch/RegexScratch.kt +++ b/src/main/java/org/mtransit/scratch/RegexScratch.kt @@ -1,27 +1,27 @@ package org.mtransit.scratch -import java.util.regex.Pattern +import org.intellij.lang.annotations.Language @Suppress("JoinDeclarationAndAssignment", "CanBeVal", "UNUSED_VALUE", "KotlinRedundantDiagnosticSuppress") internal object RegexScratch { @JvmStatic fun main(args: Array) { + @Language("RegExp") var regex: String regex = "" println("regex: '$regex'.") - val pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE) + val pattern = Regex(regex, RegexOption.IGNORE_CASE) var string: String string = "" println("string: '$string'.") println("=======") - val matcher = pattern.matcher(string) - while (matcher.find()) { + pattern.findAll(string).forEach { matchResult -> println("-------") - println("- Found group: '" + matcher.group() + "' (count: '" + matcher.groupCount() + "'):") - for (g in 0..matcher.groupCount()) { - println(" - group[" + g + "]: '" + matcher.group(g) + "'.") + println("- Found group: '${matchResult.value}' (count: '${matchResult.groups.size}'):") + matchResult.groupValues.forEachIndexed { g, groupValue -> + println(" - group[$g]: '$groupValue'.") } println("-------") } @@ -30,6 +30,6 @@ internal object RegexScratch { var replaceAll: String replaceAll = "" println("replaceAll: '$replaceAll'.") - println("-> '" + pattern.matcher(string).replaceAll(replaceAll) + "'.") + println("-> '${pattern.replace(string, replaceAll)}'.") } } \ No newline at end of file diff --git a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt index 2f8d745..e0abf51 100644 --- a/src/test/java/org/mtransit/commons/StringsCleanerTests.kt +++ b/src/test/java/org/mtransit/commons/StringsCleanerTests.kt @@ -24,6 +24,16 @@ class StringsCleanerTests { }.let { result -> assertEquals("Édouard-Montpetit", result) } + "Union Station".let { stopName -> + StringsCleaner.cleanStopName(stopName, languages = listOf(Locale.ENGLISH), routeType = 1) // subway + }.let { result -> + assertEquals("Union", result) + } + "St. George Station".let { stopName -> + StringsCleaner.cleanStopName(stopName, languages = listOf(Locale.ENGLISH), routeType = 1) // subway + }.let { result -> + assertEquals("St George", result) + } } @Test @@ -62,11 +72,46 @@ class StringsCleanerTests { @Test fun test_cleanRouteLongName() { + "Yonge-University Line".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 1) + }.let { result -> + assertEquals("Yonge-University", result) + } + "Yonge - University Line".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 1) + }.let { result -> + assertEquals("Yonge - University Line", result) + } + "St. Clair Line".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 1) + }.let { result -> + assertEquals("St Clair", result) + } "Tenth Line <> Place D'Orléans".let { StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) }.let { result -> assertEquals("Tenth Line <> Place D'Orléans", result) } + "Place D'Orléans <> Tenth Line".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) + }.let { result -> + assertEquals("Place D'Orléans <> Tenth Line", result) + } + "Tenth Line".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) + }.let { result -> + assertEquals("Tenth", result) + } + "Place Orléans Line".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) + }.let { result -> + assertEquals("Place Orléans", result) + } + "Place D'Orléans Line".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) + }.let { result -> + assertEquals("Place D'Orléans", result) + } "Line 10".let { StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.ENGLISH), routeType = 3) }.let { result -> @@ -92,6 +137,11 @@ class StringsCleanerTests { }.let { result -> assertEquals("John McCrae HS <> Half Moon Bay", result) } + "Ligne Bleue".let { + StringsCleaner.cleanRouteLongName(it, languages = listOf(Locale.FRENCH), routeType = 1) + }.let { result -> + assertEquals("Bleue", result) + } } @Test