Skip to content

Commit cc1bc4c

Browse files
SilarnHolt59
andauthored
Fix string conversion issues with emoji characters (#120)
* Fix load/cast of QString to avoid issues with surrogate pairs (emoji characters, ...). --------- Co-authored-by: Mikaël Capelle <capelle.mikael@gmail.com>
1 parent 7a885ad commit cc1bc4c

3 files changed

Lines changed: 44 additions & 18 deletions

File tree

src/pybind11-qt/pybind11_qt_basic.cpp

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -35,26 +35,36 @@ namespace pybind11::detail {
3535
*/
3636
bool type_caster<QString>::load(handle src, bool)
3737
{
38-
3938
PyObject* objPtr = src.ptr();
4039

41-
if (!PyBytes_Check(objPtr) && !PyUnicode_Check(objPtr)) {
42-
return false;
40+
if (PyBytes_Check(objPtr)) {
41+
value = QString::fromUtf8(PyBytes_AsString(objPtr));
42+
return true;
4343
}
44+
else if (PyUnicode_Check(objPtr)) {
45+
switch (PyUnicode_KIND(objPtr)) {
46+
case PyUnicode_1BYTE_KIND:
47+
value = QString::fromUtf8(PyUnicode_AsUTF8(objPtr));
48+
break;
49+
case PyUnicode_2BYTE_KIND:
50+
value = QString::fromUtf16(
51+
reinterpret_cast<char16_t*>(PyUnicode_2BYTE_DATA(objPtr)),
52+
PyUnicode_GET_LENGTH(objPtr));
53+
break;
54+
case PyUnicode_4BYTE_KIND:
55+
value = QString::fromUcs4(
56+
reinterpret_cast<char32_t*>(PyUnicode_4BYTE_DATA(objPtr)),
57+
PyUnicode_GET_LENGTH(objPtr));
58+
break;
59+
default:
60+
return false;
61+
}
4462

45-
// Ensure the string uses 8-bit characters
46-
PyObject* strPtr =
47-
PyUnicode_Check(objPtr) ? PyUnicode_AsUTF8String(objPtr) : objPtr;
48-
49-
// Extract the character data from the python string
50-
value = QString::fromUtf8(PyBytes_AsString(strPtr));
51-
52-
// Deallocate local copy if one was made
53-
if (strPtr != objPtr) {
54-
Py_DecRef(strPtr);
63+
return true;
64+
}
65+
else {
66+
return false;
5567
}
56-
57-
return true;
5868
}
5969

6070
/**
@@ -67,9 +77,8 @@ namespace pybind11::detail {
6777
handle type_caster<QString>::cast(QString src, return_value_policy /* policy */,
6878
handle /* parent */)
6979
{
70-
static_assert(sizeof(QChar) == 2);
71-
return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, src.constData(),
72-
src.length());
80+
return PyUnicode_DecodeUTF16(reinterpret_cast<const char*>(src.utf16()),
81+
2 * src.length(), nullptr, 0);
7382
}
7483

7584
bool type_caster<QVariant>::load(handle src, bool)

tests/python/test_qt.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@ PYBIND11_MODULE(qt, m)
1111
{
1212
// QString
1313

14+
m.def("create_qstring_with_emoji", []() {
15+
return QString::fromUtf16(u"\U0001F600");
16+
});
17+
18+
m.def("consume_qstring_with_emoji", [](QString const& qstring) {
19+
return qstring.length();
20+
});
21+
1422
m.def("qstring_to_stdstring", [](QString const& qstring) {
1523
return qstring.toStdString();
1624
});

tests/python/test_qt.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,19 @@ def test_qstring():
1010

1111
assert m.qstring_to_stdstring("éàüö") == "éàüö"
1212
assert m.stdstring_to_qstring("éàüö") == "éàüö"
13+
assert m.qstring_to_stdstring("خالد") == "خالد"
14+
assert m.qstring_to_stdstring("🌎") == "🌎"
1315

1416
assert m.qstring_to_int("2") == 2
1517
assert m.int_to_qstring(2) == "2"
1618

19+
emoji = m.create_qstring_with_emoji()
20+
21+
assert emoji.encode("utf-16be", "surrogatepass") == b"\xd8\x3d\xde\x00"
22+
assert m.consume_qstring_with_emoji(emoji) == 2
23+
24+
assert m.consume_qstring_with_emoji("🌎") == 2
25+
1726

1827
def test_qstringlist():
1928
assert m.qstringlist_join([""], "--") == ""

0 commit comments

Comments
 (0)