Skip to content

Commit cbb1442

Browse files
Fixed bug when using direct path load with a single byte character set (#567)
and when encoding/decoding strings in database objects (#371).
1 parent e127923 commit cbb1442

10 files changed

Lines changed: 170 additions & 45 deletions

File tree

doc/src/release_notes.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ oracledb `4.0.0 <https://github.com/oracle/python-oracledb/compare/v3.4.2...v4.0
1919
Thin Mode Changes
2020
+++++++++++++++++
2121

22+
#) Fixed bug when using direct path load with a single byte database character
23+
set
24+
(`issue 567 <https://github.com/oracle/python-oracledb/issues/567>`__).
25+
#) Fixed bug when decoding/encoding strings found within database objects
26+
(`issue 371 <https://github.com/oracle/python-oracledb/issues/371>`__).
2227
#) Fixed bug when unexpected error is thrown during authentication when using
2328
tokens
2429
(`issue 542 <https://github.com/oracle/python-oracledb/issues/542>`__).

src/oracledb/base_impl.pxd

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1053,11 +1053,13 @@ cdef int convert_oracle_data_to_arrow(OracleMetadata from_metadata,
10531053
cdef object convert_oracle_data_to_python(OracleMetadata from_metadata,
10541054
OracleMetadata to_metadatda,
10551055
OracleData* data,
1056+
const char* encoding,
10561057
const char* encoding_errors,
10571058
bint from_dbobject)
10581059
cdef object convert_python_to_oracle_data(OracleMetadata metadata,
10591060
OracleData* data,
1060-
object value)
1061+
object value,
1062+
const char* encoding)
10611063
cdef int convert_vector_to_arrow(ArrowArrayImpl array_impl,
10621064
object vector) except -1
10631065
cdef cydatetime.datetime convert_date_to_python(OracleDataBuffer *buffer)

src/oracledb/errors.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ def _raise_not_supported(feature: str) -> None:
335335
ERR_UNSUPPORTED_ARROW_TYPE = 3037
336336
ERR_CANNOT_CONVERT_TO_ARROW_TYPE = 3038
337337
ERR_CANNOT_CONVERT_FROM_ARROW_TYPE = 3039
338+
ERR_DB_CS_NOT_SUPPORTED = 3040
338339

339340
# error numbers that result in DatabaseError
340341
ERR_TNS_ENTRY_NOT_FOUND = 4000
@@ -641,6 +642,10 @@ def _raise_not_supported(feature: str) -> None:
641642
),
642643
ERR_CURSOR_HAS_BEEN_CLOSED: "cursor has been closed by the database",
643644
ERR_CURSOR_NOT_OPEN: "cursor is not open",
645+
ERR_DB_CS_NOT_SUPPORTED: (
646+
"database character set id {charset_id} is not supported by "
647+
"python-oracledb in thin mode"
648+
),
644649
ERR_DBOBJECT_ATTR_MAX_SIZE_VIOLATED: (
645650
"attribute {attr_name} of type {type_name} exceeds its maximum size "
646651
"(actual: {actual_size}, maximum: {max_size})"

src/oracledb/impl/base/converters.pyx

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -418,16 +418,15 @@ cdef int convert_str_to_arrow(ArrowArrayImpl array_impl,
418418
array_impl.append_bytes(<void*> rb.ptr, rb.num_bytes)
419419

420420

421-
cdef object convert_str_to_python(OracleDataBuffer *buffer, uint8_t csfrm,
421+
cdef object convert_str_to_python(OracleDataBuffer *buffer,
422+
const char* encoding,
422423
const char* encoding_errors):
423424
"""
424425
Converts a CHAR, NCHAR, LONG, VARCHAR, or NVARCHAR value stored in the
425426
buffer to Python string.
426427
"""
427428
cdef OracleRawBytes *rb = &buffer.as_raw_bytes
428-
if csfrm == CS_FORM_IMPLICIT:
429-
return rb.ptr[:rb.num_bytes].decode(ENCODING_UTF8, encoding_errors)
430-
return rb.ptr[:rb.num_bytes].decode(ENCODING_UTF16, encoding_errors)
429+
return rb.ptr[:rb.num_bytes].decode(encoding, encoding_errors)
431430

432431

433432
cdef int convert_oracle_data_to_arrow(OracleMetadata from_metadata,
@@ -499,21 +498,20 @@ cdef int convert_oracle_data_to_arrow(OracleMetadata from_metadata,
499498
cdef object convert_oracle_data_to_python(OracleMetadata from_metadata,
500499
OracleMetadata to_metadata,
501500
OracleData* data,
501+
const char* encoding,
502502
const char* encoding_errors,
503503
bint from_dbobject):
504504
"""
505505
Converts the value stored in OracleData to a Python object.
506506
"""
507-
cdef:
508-
uint8_t py_type_num, ora_type_num, csfrm
507+
cdef uint8_t py_type_num, ora_type_num
509508

510509
# NULL values
511510
if data.is_null:
512511
return None
513512

514513
# reduce typing
515514
ora_type_num = from_metadata.dbtype._ora_type_num
516-
csfrm = from_metadata.dbtype._csfrm
517515
py_type_num = to_metadata._py_type_num
518516

519517
# Python bytes
@@ -539,7 +537,8 @@ cdef object convert_oracle_data_to_python(OracleMetadata from_metadata,
539537
ORA_TYPE_NUM_LONG,
540538
ORA_TYPE_NUM_VARCHAR
541539
):
542-
return convert_str_to_python(&data.buffer, csfrm, encoding_errors)
540+
return convert_str_to_python(&data.buffer, encoding,
541+
encoding_errors)
543542

544543
# Oracle NUMBER
545544
elif ora_type_num == ORA_TYPE_NUM_NUMBER:
@@ -591,7 +590,8 @@ cdef object convert_oracle_data_to_python(OracleMetadata from_metadata,
591590
ORA_TYPE_NUM_LONG,
592591
ORA_TYPE_NUM_VARCHAR
593592
):
594-
value = convert_str_to_python(&data.buffer, csfrm, encoding_errors)
593+
value = convert_str_to_python(&data.buffer, encoding,
594+
encoding_errors)
595595
return int(PY_TYPE_DECIMAL(value))
596596

597597
# Oracle BINARY_DOUBLE
@@ -674,7 +674,8 @@ cdef object convert_oracle_data_to_python(OracleMetadata from_metadata,
674674

675675
cdef object convert_python_to_oracle_data(OracleMetadata metadata,
676676
OracleData* data,
677-
object value):
677+
object value,
678+
const char* encoding):
678679
"""
679680
Converts a Python value to the OracleData structure. The object returned is
680681
any temporary object that is required to be retained (if any).
@@ -688,10 +689,7 @@ cdef object convert_python_to_oracle_data(OracleMetadata metadata,
688689
elif ora_type_num in (ORA_TYPE_NUM_VARCHAR,
689690
ORA_TYPE_NUM_CHAR,
690691
ORA_TYPE_NUM_LONG):
691-
if metadata.dbtype._csfrm == CS_FORM_IMPLICIT:
692-
temp_bytes = (<str> value).encode()
693-
else:
694-
temp_bytes = (<str> value).encode(ENCODING_UTF16)
692+
temp_bytes = (<str> value).encode(encoding)
695693
convert_bytes_to_oracle_data(&data.buffer, temp_bytes)
696694
if data.buffer.as_raw_bytes.num_bytes == 0:
697695
data.is_null = True

src/oracledb/impl/thin/capabilities.pyx

Lines changed: 85 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#------------------------------------------------------------------------------
2-
# Copyright (c) 2021, 2025, Oracle and/or its affiliates.
2+
# Copyright (c) 2021, 2026, Oracle and/or its affiliates.
33
#
44
# This software is dual-licensed to you under the Universal Permissive License
55
# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
@@ -30,12 +30,70 @@
3030
# thin_impl.pyx).
3131
#------------------------------------------------------------------------------
3232

33+
# defines the mapping between Oracle Database character set and IANA encoding
34+
# names used by Python
35+
cdef dict ORACLE_CHARSET_TO_PYTHON_ENCODING = {
36+
# ASCII
37+
1: "ascii", # US7ASCII
38+
39+
# ISO 8859 series
40+
31: "iso_8859_1", # WE8ISO8859P1
41+
32: "iso_8859_2", # EE8ISO8859P2
42+
33: "iso_8859_3", # SE8ISO8859P3
43+
34: "iso_8859_4", # NEE8ISO8859P4
44+
35: "iso_8859_5", # CL8ISO8859P5
45+
36: "iso_8859_6", # AR8ISO8859P6
46+
37: "iso_8859_7", # EL8ISO8859P7
47+
38: "iso_8859_8", # IW8ISO8859P8
48+
39: "iso_8859_9", # WE8ISO8859P9
49+
40: "iso_8859_10", # NE8ISO8859P10
50+
41: "tis_620", # TH8TISASCII
51+
46: "iso_8859_15", # WE8ISO8859P15
52+
47: "iso_8859_13", # BLT8ISO8859P13
53+
54+
# Windows code pages
55+
170: "cp1250", # EE8MSWIN1250
56+
171: "cp1251", # CL8MSWIN1251
57+
172: "cp1253", # EL8MSWIN1253
58+
173: "cp1254", # TR8MSWIN1254
59+
174: "cp1255", # IW8MSWIN1255
60+
175: "cp1256", # AR8MSWIN1256
61+
176: "cp1257", # BLT8MSWIN1257
62+
177: "cp1258", # VN8MSWIN1258
63+
178: "cp1252", # WE8MSWIN1252
64+
65+
# DOS / PC code pages
66+
351: "cp850", # WE8PC850
67+
354: "cp437", # US8PC437
68+
368: "cp866", # RU8PC866
69+
382: "cp852", # EE8PC852
70+
71+
# East Asian multi-byte
72+
829: "big5", # ZHT16BIG5
73+
830: "euc_kr", # KO16KSC5601
74+
831: "euc_jp", # JA16EUC
75+
832: "cp932", # JA16SJIS
76+
833: "cp932", # JA16SJISTILDE
77+
834: "euc_jp", # JA16EUCTILDE
78+
846: "gbk", # ZHS16GBK
79+
850: "big5hkscs", # ZHT16HKSCS
80+
852: "euc_kr", # KO16MSWIN949
81+
854: "big5", # ZHT16MSWIN950
82+
870: "gb18030", # ZHS32GB18030
83+
84+
# universal encodings
85+
873: "utf_8", # AL32UTF8
86+
2000: "utf_16_be", # AL16UTF16
87+
}
88+
3389
cdef class Capabilities:
3490
cdef:
3591
uint16_t protocol_version
3692
uint8_t ttc_field_version
3793
uint16_t charset_id
94+
const char* encoding
3895
uint16_t ncharset_id
96+
const char* nencoding
3997
bytearray compile_caps
4098
bytearray runtime_caps
4199
uint32_t max_string_size
@@ -87,14 +145,36 @@ cdef class Capabilities:
87145
if not (server_caps[TNS_RCAP_TTC] & TNS_RCAP_TTC_SESSION_STATE_OPS):
88146
self.supports_request_boundaries = False
89147

90-
cdef int _check_ncharset_id(self) except -1:
148+
cdef const char* _get_encoding(self) except NULL:
149+
"""
150+
Returns the encoding to use for encoding or decoding data that is
151+
stored in the database character set. If no encoding is found, an
152+
exception is raised. This is only required for direct path load and for
153+
strings found within Oracle database objects.
154+
"""
155+
cdef str encoding
156+
if self.encoding != NULL:
157+
return self.encoding
158+
encoding = ORACLE_CHARSET_TO_PYTHON_ENCODING.get(self.charset_id)
159+
if encoding is None:
160+
errors._raise_err(errors.ERR_DB_CS_NOT_SUPPORTED,
161+
charset_id=self.charset_id)
162+
return encoding.encode()
163+
164+
cdef const char* _get_nencoding(self) except NULL:
91165
"""
92-
Checks that the national character set id is AL16UTF16, which is the
93-
only id that is currently supported.
166+
Returns the encoding to use for encoding or decoding data that is
167+
stored in the database national character set. If no encoding is found,
168+
an exception is raised. This is required for handling NCHAR data.
94169
"""
95-
if self.ncharset_id != TNS_CHARSET_UTF16:
170+
cdef str encoding
171+
if self.nencoding != NULL:
172+
return self.nencoding
173+
encoding = ORACLE_CHARSET_TO_PYTHON_ENCODING.get(self.ncharset_id)
174+
if encoding is None:
96175
errors._raise_err(errors.ERR_NCHAR_CS_NOT_SUPPORTED,
97176
charset_id=self.ncharset_id)
177+
return encoding.encode()
98178

99179
@cython.boundscheck(False)
100180
cdef void _init_compile_caps(self):

src/oracledb/impl/thin/dbobject.pyx

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#------------------------------------------------------------------------------
2-
# Copyright (c) 2022, 2025, Oracle and/or its affiliates.
2+
# Copyright (c) 2022, 2026, Oracle and/or its affiliates.
33
#
44
# This software is dual-licensed to you under the Universal Permissive License
55
# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
@@ -106,6 +106,7 @@ cdef class DbObjectPickleBuffer(GrowableBuffer):
106106
cdef:
107107
uint8_t image_flags, image_version
108108
BaseThinLobImpl lob_impl
109+
const char* encoding
109110
const char_type *ptr
110111
ssize_t bytes_left
111112
uint32_t xml_flag
@@ -118,7 +119,8 @@ cdef class DbObjectPickleBuffer(GrowableBuffer):
118119
bytes_left = self.bytes_left()
119120
ptr = self.read_raw_bytes(bytes_left)
120121
if xml_flag & TNS_XML_TYPE_STRING:
121-
return ptr[:bytes_left].decode()
122+
encoding = conn_impl._protocol._caps._get_encoding()
123+
return ptr[:bytes_left].decode(encoding)
122124
elif xml_flag & TNS_XML_TYPE_LOB:
123125
lob_impl = conn_impl._create_lob_impl(DB_TYPE_CLOB,
124126
ptr[:bytes_left])
@@ -250,8 +252,10 @@ cdef class ThinDbObjectImpl(BaseDbObjectImpl):
250252
"""
251253
cdef:
252254
uint8_t ora_type_num = metadata.dbtype._ora_type_num
255+
BaseThinConnImpl conn_impl
253256
ThinDbObjectImpl obj_impl
254257
BaseThinLobImpl lob_impl
258+
const char* encoding
255259
bytes temp_bytes
256260
if value is None:
257261
if metadata.objtype is not None \
@@ -260,10 +264,12 @@ cdef class ThinDbObjectImpl(BaseDbObjectImpl):
260264
else:
261265
buf.write_uint8(TNS_NULL_LENGTH_INDICATOR)
262266
elif ora_type_num in (ORA_TYPE_NUM_CHAR, ORA_TYPE_NUM_VARCHAR):
267+
conn_impl = self.type._conn_impl
263268
if metadata.dbtype._csfrm == CS_FORM_IMPLICIT:
264-
temp_bytes = (<str> value).encode()
269+
encoding = conn_impl._protocol._caps._get_encoding()
265270
else:
266-
temp_bytes = (<str> value).encode(ENCODING_UTF16)
271+
encoding = conn_impl._protocol._caps._get_nencoding()
272+
temp_bytes = (<str> value).encode(encoding)
267273
buf.write_bytes_with_length(temp_bytes)
268274
elif ora_type_num == ORA_TYPE_NUM_NUMBER:
269275
temp_bytes = (<str> cpython.PyObject_Str(value)).encode()
@@ -353,19 +359,19 @@ cdef class ThinDbObjectImpl(BaseDbObjectImpl):
353359
"""
354360
cdef:
355361
uint8_t ora_type_num = metadata.dbtype._ora_type_num
362+
BaseThinConnImpl conn_impl = self.type._conn_impl
356363
uint8_t csfrm = metadata.dbtype._csfrm
357364
DbObjectPickleBuffer xml_buf
358365
bint is_null, is_collection
359-
BaseThinConnImpl conn_impl
360366
ThinDbObjectImpl obj_impl
361367
BaseThinLobImpl lob_impl
368+
const char* encoding
362369
OracleData data
363370
bytes locator
364371
type cls
365372
if ora_type_num in (ORA_TYPE_NUM_CLOB,
366-
ORA_TYPE_NUM_BLOB,
367-
ORA_TYPE_NUM_BFILE):
368-
conn_impl = self.type._conn_impl
373+
ORA_TYPE_NUM_BLOB,
374+
ORA_TYPE_NUM_BFILE):
369375
locator = buf.read_bytes()
370376
if locator is None:
371377
return None
@@ -381,7 +387,7 @@ cdef class ThinDbObjectImpl(BaseDbObjectImpl):
381387
return None
382388
xml_buf = DbObjectPickleBuffer.__new__(DbObjectPickleBuffer)
383389
xml_buf._populate_from_bytes(xml_bytes)
384-
return xml_buf.read_xmltype(self.type._conn_impl)
390+
return xml_buf.read_xmltype(conn_impl)
385391
is_collection = \
386392
metadata.objtype.is_collection or self.type.is_collection
387393
buf.get_is_atomic_null(is_collection, &is_null)
@@ -396,11 +402,12 @@ cdef class ThinDbObjectImpl(BaseDbObjectImpl):
396402
return PY_TYPE_DB_OBJECT._from_impl(obj_impl)
397403
buf.read_oracle_data(metadata, &data, from_dbobject=True,
398404
decode_str=False)
399-
if metadata.dbtype._csfrm == CS_FORM_NCHAR:
400-
conn_impl = self.type._conn_impl
401-
conn_impl._protocol._caps._check_ncharset_id()
405+
if metadata.dbtype._csfrm == CS_FORM_IMPLICIT:
406+
encoding = conn_impl._protocol._caps._get_encoding()
407+
else:
408+
encoding = conn_impl._protocol._caps._get_nencoding()
402409
return convert_oracle_data_to_python(metadata, metadata, &data,
403-
encoding_errors=NULL,
410+
encoding, encoding_errors=NULL,
404411
from_dbobject=True)
405412

406413
def append_checked(self, object value):

src/oracledb/impl/thin/messages/base.pyx

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -996,6 +996,7 @@ cdef class MessageWithData(Message):
996996
uint8_t num_bytes, ora_type_num, csfrm
997997
ThinDbObjectTypeImpl typ_impl
998998
BaseThinCursorImpl cursor_impl
999+
const char *encoding = NULL
9991000
object column_value = None
10001001
ThinDbObjectImpl obj_impl
10011002
int32_t actual_num_bytes
@@ -1074,14 +1075,14 @@ cdef class MessageWithData(Message):
10741075
decode_str=self.cursor_impl.fetching_arrow
10751076
)
10761077
if metadata.dbtype._csfrm == CS_FORM_NCHAR:
1077-
buf._caps._check_ncharset_id()
1078+
encoding = buf._caps._get_nencoding()
10781079
if self.cursor_impl.fetching_arrow:
10791080
convert_oracle_data_to_arrow(
10801081
metadata, var_impl.metadata, &data, var_impl._arrow_array
10811082
)
10821083
else:
10831084
column_value = convert_oracle_data_to_python(
1084-
metadata, var_impl.metadata, &data,
1085+
metadata, var_impl.metadata, &data, encoding,
10851086
var_impl._encoding_errors, from_dbobject=False
10861087
)
10871088
if not self.in_fetch:
@@ -1396,6 +1397,7 @@ cdef class MessageWithData(Message):
13961397
cdef:
13971398
ThinDbObjectTypeImpl typ_impl
13981399
BaseThinCursorImpl cursor_impl
1400+
const char* encoding = NULL
13991401
BaseThinLobImpl lob_impl
14001402
OracleMetadata metadata
14011403
uint8_t ora_type_num
@@ -1409,8 +1411,11 @@ cdef class MessageWithData(Message):
14091411
value = convert_arrow_to_oracle_data(metadata, &data,
14101412
var_impl._arrow_array, offset)
14111413
else:
1414+
if metadata.dbtype._csfrm == CS_FORM_NCHAR:
1415+
encoding = ENCODING_UTF16
14121416
value = convert_python_to_oracle_data(metadata, &data,
1413-
var_impl._values[offset])
1417+
var_impl._values[offset],
1418+
encoding)
14141419
ora_type_num = metadata.dbtype._ora_type_num
14151420
if data.is_null:
14161421
if ora_type_num == ORA_TYPE_NUM_BOOLEAN:

0 commit comments

Comments
 (0)