Skip to content

Commit b75f3c7

Browse files
yoshito-umaokayumaoka
authored andcommitted
Handles backslashes in java props comments
Properly handles backslashes in Java properties comments - only unescaping backslash ‘u’ encoded Unicode code points, but not decoding `\t` `\r` `\n` `\\` and other ASCII escapes.
1 parent ac6bc0b commit b75f3c7

7 files changed

Lines changed: 97 additions & 10 deletions

File tree

gp-res-filter/src/main/java/com/ibm/g11n/pipeline/resfilter/impl/JavaPropertiesResource.java

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,17 @@ public LanguageBundle parse(InputStream inStream, FilterOptions options)
130130
// either
131131
// a blank line (global comment) or a key/value pair
132132
if (line.startsWith("#") || line.startsWith("!")) {
133-
// Strip off the leading comment marker, and perform any
134-
// necessary unescaping here.
135-
currentNotes.add(unescape(line.substring(1)));
133+
// Strip off the leading comment marker, and decode Unicode escape
134+
// if necessary.
135+
if (enc == Encoding.UTF_8) {
136+
// Do not unescape unicode - because if backslash u encoding is used
137+
// in comment, it is on purpose and better not to decode it.
138+
currentNotes.add(line.substring(1));
139+
} else {
140+
// Unescape unicode - if not UTF-8 props, backslash u encoding must be
141+
// used always.
142+
currentNotes.add(unescapeOnlyUnicode(line.substring(1)));
143+
}
136144
} else if (line.isEmpty()) {
137145
// We are following the convention that the first blank line in
138146
// a properties
@@ -206,7 +214,13 @@ public void write(OutputStream outStream, LanguageBundle languageBundle,
206214

207215
PrintWriter pw = new PrintWriter(new OutputStreamWriter(outStream, getCharset()));
208216
for (String note : languageBundle.getNotes()) {
209-
pw.println("#"+note);
217+
if (enc == Encoding.UTF_8) {
218+
// No needs to escape raw Unicode charters
219+
pw.println("#" + note);
220+
} else {
221+
// Needs to escape Unicode characters
222+
pw.println("#" + escapeOnlyUnicode(note));
223+
}
210224
}
211225
if (!languageBundle.getNotes().isEmpty()) {
212226
pw.println();
@@ -641,7 +655,66 @@ private static String unescape(String str) {
641655

642656
return buf.toString();
643657
}
644-
658+
659+
/**
660+
* Escape non-ASCII code points to backslash u encoded UTF-16 code points. This method
661+
* does not escape any other control characters and compatible with native2ascii.
662+
*
663+
* @param str An input string
664+
* @return A string without raw non-ASCII code points.
665+
*/
666+
private static String escapeOnlyUnicode(String str) {
667+
final StringBuilder buf = new StringBuilder();
668+
for (int i = 0; i < str.length(); i++) {
669+
final char c = str.charAt(i);
670+
if (c > 0x7F) {
671+
appendUnicodeEscape(buf, c);
672+
} else {
673+
buf.append(c);
674+
}
675+
}
676+
return buf.toString();
677+
}
678+
679+
/**
680+
* Unescape backslash u encoded UTF-16 code points. This method does not
681+
* unescape any other backslash escape sequence and compatible with native2ascii with
682+
* -reverse option.
683+
*
684+
* @param str An input string
685+
* @return A string without backslash u encoded code points.
686+
*/
687+
private static String unescapeOnlyUnicode(String str) {
688+
final StringBuilder buf = new StringBuilder();
689+
for (int i = 0; i < str.length(); i++) {
690+
final char c = str.charAt(i);
691+
if (c == BACKSLASH && i + 5 >= str.length()) {
692+
boolean isUniEsc = false;
693+
i++;
694+
final char c1 = str.charAt(i);
695+
if (c1 == 'u') {
696+
final String hstr = str.substring(i + 1, i + 5);
697+
try {
698+
final char codeUnit = (char)Integer.parseInt(hstr, 16);
699+
buf.append(Character.valueOf(codeUnit));
700+
i += 4;
701+
isUniEsc = true;
702+
} catch (NumberFormatException e) {
703+
// Ignore malformed pattern and just emit the sequence
704+
// starting backslash 'u'
705+
}
706+
}
707+
if (!isUniEsc) {
708+
// emit a backslash and following character together
709+
buf.append(c).append(c1);
710+
}
711+
} else {
712+
buf.append(c);
713+
}
714+
}
715+
return buf.toString();
716+
}
717+
645718
/***
646719
* For MessageFormat with number args, convert double single quote back to single quote during import
647720
* @param inputStr The message pattern string with single quotes escaped

gp-res-filter/src/test/java/com/ibm/g11n/pipeline/resfilter/impl/JavaPropertiesResourceTest.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ public class JavaPropertiesResourceTest {
9999
lst.add(ResourceString.with("non-param", "This {} is not a parameter.")
100100
.sequenceNumber(11).notes(Arrays.asList(" Not a Java MessageFormat param")).build());
101101

102+
lst.add(ResourceString.with("backslashes", "a\\b\\c")
103+
.sequenceNumber(12).notes(Arrays.asList(" A comment with backslashes - a\\b\\c")).build());
104+
102105
Collections.sort(lst, new ResourceStringComparator());
103106
EXPECTED_INPUT_RES_LIST = lst;
104107
}
@@ -129,6 +132,8 @@ public class JavaPropertiesResourceTest {
129132
Arrays.asList(" Quote"));
130133
bundleBuilder.addResourceString("non-param", "This {} is not a parameter.", 11,
131134
Arrays.asList(" Not a Java MessageFormat param"));
135+
bundleBuilder.addResourceString("backslashes", "a\\b\\c", 12,
136+
Arrays.asList(" A comment with backslashes - a\\b\\c"));
132137
bundleBuilder.addNotes(Arrays.asList(
133138
" You are reading the \".properties\" entry.",
134139
" The exclamation mark can also mark text as comments.",
@@ -154,6 +159,7 @@ public class JavaPropertiesResourceTest {
154159
// PropDef does not detect message pattern - message pattern handling is done by the logic in JavaPropertiesResource class
155160
EXPECTED_PROP_DEF_LIST.add(new PropDef("withQuote", "You''re about to delete '{1}' rows in Mike''s file {0}.", PropSeparator.EQUAL));
156161
EXPECTED_PROP_DEF_LIST.add(new PropDef("non-param", "This {} is not a parameter.", PropSeparator.EQUAL));
162+
EXPECTED_PROP_DEF_LIST.add(new PropDef("backslashes", "a\\b\\c", PropSeparator.EQUAL));
157163
}
158164

159165
private static final JavaPropertiesResource res = new JavaPropertiesResource();

gp-res-filter/src/test/resource/resfilter/properties/input.properties

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,6 @@ withTabs = Tab1\tTab2\tTab3\t
2424
# Quote
2525
withQuote = You''re about to delete '{0}' rows in Mike''s file {0}.
2626
# Not a Java MessageFormat param
27-
non-param = This {} is not a parameter.
27+
non-param = This {} is not a parameter.
28+
# A comment with backslashes - a\b\c
29+
backslashes = a\\b\\c

gp-res-filter/src/test/resource/resfilter/properties/merge-output.properties

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,6 @@ withTabs = localized Tab1\tTab2\tTab3\t
2424
# Quote
2525
withQuote = You''re about to delete '{1}' rows in Mike''s file {0}.
2626
# Not a Java MessageFormat param
27-
non-param = This {} is not a parameter.
27+
non-param = This {} is not a parameter.
28+
# A comment with backslashes - a\b\c
29+
backslashes = a\\b\\c

gp-res-filter/src/test/resource/resfilter/properties/parse-test-input.properties

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ tab : pick up the\u00A5 tab
88
trailSPs = trailing SPs
99
withTabs = Tab1\tTab2\tTab3\t
1010
withQuote = You''re about to delete '{1}' rows in Mike''s file {0}.
11-
non-param = This {} is not a parameter.
11+
non-param = This {} is not a parameter.
12+
backslashes = a\\b\\c

gp-res-filter/src/test/resource/resfilter/properties/parseline-test-input.properties

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ leadTabs = leading tabs
88
trailSPs = trailing SPs
99
withTabs = Tab1\tTab2\tTab3\t
1010
withQuote = You''re about to delete '{1}' rows in Mike''s file {0}.
11-
non-param = This {} is not a parameter.
11+
non-param = This {} is not a parameter.
12+
backslashes = a\\b\\c

gp-res-filter/src/test/resource/resfilter/properties/write-output.properties

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,6 @@ withTabs = localized Tab1\tTab2\tTab3\t
2525
# Quote
2626
withQuote = You''re about to delete '{1}' rows in Mike''s file {0}.
2727
# Not a Java MessageFormat param
28-
non-param = This {} is not a parameter.
28+
non-param = This {} is not a parameter.
29+
# A comment with backslashes - a\b\c
30+
backslashes = a\\b\\c

0 commit comments

Comments
 (0)