Skip to content

Commit ea5dbb4

Browse files
committed
Merge branch 'fix-unicode-replacements'
2 parents 1dc3d4d + f954742 commit ea5dbb4

4 files changed

Lines changed: 64 additions & 23 deletions

File tree

PythonScript.Tests/tests/TestReplacer.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,5 +106,20 @@ TEST_F(ReplacerTest, ReplaceSimpleAnsi) {
106106
for_each(entries.begin(), entries.end(), deleteEntry);
107107
}
108108

109+
/** Tests UTF8 replacement with a UTF-8 string replacement
110+
*/
111+
TEST_F(ReplacerTest, ReplaceUtf8ReplacementString) {
109112

113+
NppPythonScript::Replacer<NppPythonScript::Utf8CharTraits> replacer;
114+
std::list<NppPythonScript::ReplaceEntry* > entries;
115+
bool moreEntries = replacer.startReplace("aaa\xC3\xB4" "ZZZ" , 8, 0, 0, "aaa\xC3\xB4", "x" "\xC3\xB5" "z", NppPythonScript::python_re_flag_normal,entries);
116+
ASSERT_EQ(1, entries.size());
117+
std::list<NppPythonScript::ReplaceEntry*>::const_iterator it = entries.begin();
118+
ASSERT_EQ(0, (*it)->getStart());
119+
ASSERT_STREQ("x" "\xC3\xB5" "z", (*it)->getReplacement());
120+
ASSERT_EQ(4, (*it)->getReplacementLength());
121+
122+
123+
for_each(entries.begin(), entries.end(), deleteEntry);
124+
}
110125
}

PythonScript/python_tests/tests/ReplaceAnsiTestCase.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,33 @@
33
from Npp import *
44

55
class ReplaceAnsiTestCase(unittest.TestCase):
6-
def setUp(self):
7-
notepad.new()
8-
notepad.runMenuCommand("Encoding", "Encode in ANSI")
9-
editor.write(u'Here is some text\r\nWith some umlauts XäXüXö\r\n'.encode('windows-1252'));
10-
11-
def tearDown(self):
12-
editor.setSavePoint()
13-
notepad.close()
14-
15-
def test_simple_replace(self):
16-
editor.rereplace(r'some\s([a-z]+)', 'TEST');
17-
text = editor.getText()
18-
self.assertEqual(text, u'Here is TEST\r\nWith TEST XäXüXö\r\n'.encode('windows-1252'));
19-
20-
def test_ansi_replace(self):
21-
editor.rereplace(u'X[äö]'.encode('windows-1252'), 'YY');
22-
text = editor.getText()
23-
self.assertEqual(text, u'Here is some text\r\nWith some umlauts YYXüYY\r\n'.encode('windows-1252'));
6+
def setUp(self):
7+
notepad.new()
8+
notepad.runMenuCommand("Encoding", "Encode in ANSI")
9+
editor.write(u'Here is some text\r\nWith some umlauts XäXüXö\r\n'.encode('windows-1252'));
10+
11+
def tearDown(self):
12+
editor.setSavePoint()
13+
notepad.close()
14+
15+
def test_simple_replace(self):
16+
editor.rereplace(r'some\s([a-z]+)', 'TEST');
17+
text = editor.getText()
18+
self.assertEqual(text, u'Here is TEST\r\nWith TEST XäXüXö\r\n'.encode('windows-1252'));
19+
20+
def test_ansi_replace(self):
21+
editor.rereplace(u'X[äö]'.encode('windows-1252'), 'YY');
22+
text = editor.getText()
23+
self.assertEqual(text, u'Here is some text\r\nWith some umlauts YYXüYY\r\n'.encode('windows-1252'));
2424

25-
def test_unicode_replace(self):
26-
editor.rereplace(u'X[äö]', 'PP');
27-
text = editor.getText()
28-
self.assertEqual(text, u'Here is some text\r\nWith some umlauts PPXüPP\r\n'.encode('windows-1252'));
25+
def test_unicode_replace(self):
26+
editor.rereplace(u'X[äö]', 'PP');
27+
text = editor.getText()
28+
self.assertEqual(text, u'Here is some text\r\nWith some umlauts PPXüPP\r\n'.encode('windows-1252'));
29+
30+
def test_replace_with_unicode(self):
31+
editor.rereplace('Here|With', u'XäöüY')
32+
text = editor.getText()
33+
self.assertEqual(text, u'XäöüY is some text\r\nXäöüY some umlauts XäXüXö\r\n'.encode('windows-1252'))
2934

3035
suite = unittest.TestLoader().loadTestsFromTestCase(ReplaceAnsiTestCase)

PythonScript/python_tests/tests/ReplaceUTF8TestCase.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,16 @@ def test_replace_condition(self):
2626
editor.rereplace('(Here)|(Xä)', '(?1Cheese)(?2Y)')
2727
text = editor.getText()
2828
self.assertEqual(text, 'Cheese is some text\r\nWith some umlauts YXüXö\r\n')
29+
30+
31+
def test_replace_with_unicode(self):
32+
editor.rereplace('Here|With', u'XäöüY')
33+
text = editor.getText()
34+
self.assertEqual(text, 'XäöüY is some text\r\nXäöüY some umlauts XäXüXö\r\n')
35+
36+
def test_replace_with_unicode_using_standard_string(self):
37+
editor.rereplace('Here|With', 'XäöüY') # note the standard replacement string here
38+
text = editor.getText()
39+
self.assertEqual(text, 'XäöüY is some text\r\nXäöüY some umlauts XäXüXö\r\n')
40+
2941
suite = unittest.TestLoader().loadTestsFromTestCase(ReplaceUTF8TestCase)

PythonScript/src/Replacer.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@ int BoostRegexMatch<CharTraitsT>::groupIndexFromName(const char *groupName)
138138
template <class CharTraitsT>
139139
void BoostRegexMatch<CharTraitsT>::expand(const char *format, char **result, int *resultLength)
140140
{
141-
CharTraitsT::string_type resultString = m_match->format(format, boost::regex_constants::format_all);
141+
CharTraitsT::string_type formatString = CharTraitsT::fromChars(format);
142+
CharTraitsT::string_type resultString = m_match->format(formatString, boost::regex_constants::format_all);
142143

143144
std::string charResult(CharTraitsT::toCharString(resultString));
144145

@@ -166,6 +167,10 @@ typename std::string BoostRegexMatch<CharTraitsT>::getTextForGroup(GroupDetail*
166167
// We just want a u32string to utf8 char*
167168
return std::basic_string<char>(UtfConversion::toUtf8(ConstString<U32>(source)));
168169
}
170+
171+
static string_type fromChars(const char *source) {
172+
return string_type(UtfConversion::toUtf32(ConstString<U8>(source)));
173+
}
169174
};
170175

171176
class AnsiCharTraits {
@@ -178,6 +183,10 @@ typename std::string BoostRegexMatch<CharTraitsT>::getTextForGroup(GroupDetail*
178183
static std::basic_string<char> toCharString(const string_type& source) {
179184
return source;
180185
}
186+
187+
static string_type fromChars(const char *source) {
188+
return string_type(source);
189+
}
181190
};
182191

183192
template <class CharTraitsT>

0 commit comments

Comments
 (0)