|
1 | 1 | #------------------------------------------------------------------------------ |
2 | | -# Copyright (c) 2021, 2025, Oracle and/or its affiliates. |
| 2 | +# Copyright (c) 2021, 2026, Oracle and/or its affiliates. |
3 | 3 | # |
4 | 4 | # This software is dual-licensed to you under the Universal Permissive License |
5 | 5 | # (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License |
|
30 | 30 | # thin_impl.pyx). |
31 | 31 | #------------------------------------------------------------------------------ |
32 | 32 |
|
| 33 | +# defines the mapping between Oracle Database character set and IANA encoding |
| 34 | +# names used by Python |
| 35 | +cdef dict ORACLE_CHARSET_TO_PYTHON_ENCODING = { |
| 36 | + # ASCII |
| 37 | + 1: "ascii", # US7ASCII |
| 38 | + |
| 39 | + # ISO 8859 series |
| 40 | + 31: "iso_8859_1", # WE8ISO8859P1 |
| 41 | + 32: "iso_8859_2", # EE8ISO8859P2 |
| 42 | + 33: "iso_8859_3", # SE8ISO8859P3 |
| 43 | + 34: "iso_8859_4", # NEE8ISO8859P4 |
| 44 | + 35: "iso_8859_5", # CL8ISO8859P5 |
| 45 | + 36: "iso_8859_6", # AR8ISO8859P6 |
| 46 | + 37: "iso_8859_7", # EL8ISO8859P7 |
| 47 | + 38: "iso_8859_8", # IW8ISO8859P8 |
| 48 | + 39: "iso_8859_9", # WE8ISO8859P9 |
| 49 | + 40: "iso_8859_10", # NE8ISO8859P10 |
| 50 | + 41: "tis_620", # TH8TISASCII |
| 51 | + 46: "iso_8859_15", # WE8ISO8859P15 |
| 52 | + 47: "iso_8859_13", # BLT8ISO8859P13 |
| 53 | + |
| 54 | + # Windows code pages |
| 55 | + 170: "cp1250", # EE8MSWIN1250 |
| 56 | + 171: "cp1251", # CL8MSWIN1251 |
| 57 | + 172: "cp1253", # EL8MSWIN1253 |
| 58 | + 173: "cp1254", # TR8MSWIN1254 |
| 59 | + 174: "cp1255", # IW8MSWIN1255 |
| 60 | + 175: "cp1256", # AR8MSWIN1256 |
| 61 | + 176: "cp1257", # BLT8MSWIN1257 |
| 62 | + 177: "cp1258", # VN8MSWIN1258 |
| 63 | + 178: "cp1252", # WE8MSWIN1252 |
| 64 | + |
| 65 | + # DOS / PC code pages |
| 66 | + 351: "cp850", # WE8PC850 |
| 67 | + 354: "cp437", # US8PC437 |
| 68 | + 368: "cp866", # RU8PC866 |
| 69 | + 382: "cp852", # EE8PC852 |
| 70 | + |
| 71 | + # East Asian multi-byte |
| 72 | + 829: "big5", # ZHT16BIG5 |
| 73 | + 830: "euc_kr", # KO16KSC5601 |
| 74 | + 831: "euc_jp", # JA16EUC |
| 75 | + 832: "cp932", # JA16SJIS |
| 76 | + 833: "cp932", # JA16SJISTILDE |
| 77 | + 834: "euc_jp", # JA16EUCTILDE |
| 78 | + 846: "gbk", # ZHS16GBK |
| 79 | + 850: "big5hkscs", # ZHT16HKSCS |
| 80 | + 852: "euc_kr", # KO16MSWIN949 |
| 81 | + 854: "big5", # ZHT16MSWIN950 |
| 82 | + 870: "gb18030", # ZHS32GB18030 |
| 83 | + |
| 84 | + # universal encodings |
| 85 | + 873: "utf_8", # AL32UTF8 |
| 86 | + 2000: "utf_16_be", # AL16UTF16 |
| 87 | +} |
| 88 | + |
33 | 89 | cdef class Capabilities: |
34 | 90 | cdef: |
35 | 91 | uint16_t protocol_version |
36 | 92 | uint8_t ttc_field_version |
37 | 93 | uint16_t charset_id |
| 94 | + const char* encoding |
38 | 95 | uint16_t ncharset_id |
| 96 | + const char* nencoding |
39 | 97 | bytearray compile_caps |
40 | 98 | bytearray runtime_caps |
41 | 99 | uint32_t max_string_size |
@@ -87,14 +145,36 @@ cdef class Capabilities: |
87 | 145 | if not (server_caps[TNS_RCAP_TTC] & TNS_RCAP_TTC_SESSION_STATE_OPS): |
88 | 146 | self.supports_request_boundaries = False |
89 | 147 |
|
90 | | - cdef int _check_ncharset_id(self) except -1: |
| 148 | + cdef const char* _get_encoding(self) except NULL: |
| 149 | + """ |
| 150 | + Returns the encoding to use for encoding or decoding data that is |
| 151 | + stored in the database character set. If no encoding is found, an |
| 152 | + exception is raised. This is only required for direct path load and for |
| 153 | + strings found within Oracle database objects. |
| 154 | + """ |
| 155 | + cdef str encoding |
| 156 | + if self.encoding != NULL: |
| 157 | + return self.encoding |
| 158 | + encoding = ORACLE_CHARSET_TO_PYTHON_ENCODING.get(self.charset_id) |
| 159 | + if encoding is None: |
| 160 | + errors._raise_err(errors.ERR_DB_CS_NOT_SUPPORTED, |
| 161 | + charset_id=self.charset_id) |
| 162 | + return encoding.encode() |
| 163 | + |
| 164 | + cdef const char* _get_nencoding(self) except NULL: |
91 | 165 | """ |
92 | | - Checks that the national character set id is AL16UTF16, which is the |
93 | | - only id that is currently supported. |
| 166 | + Returns the encoding to use for encoding or decoding data that is |
| 167 | + stored in the database national character set. If no encoding is found, |
| 168 | + an exception is raised. This is required for handling NCHAR data. |
94 | 169 | """ |
95 | | - if self.ncharset_id != TNS_CHARSET_UTF16: |
| 170 | + cdef str encoding |
| 171 | + if self.nencoding != NULL: |
| 172 | + return self.nencoding |
| 173 | + encoding = ORACLE_CHARSET_TO_PYTHON_ENCODING.get(self.ncharset_id) |
| 174 | + if encoding is None: |
96 | 175 | errors._raise_err(errors.ERR_NCHAR_CS_NOT_SUPPORTED, |
97 | 176 | charset_id=self.ncharset_id) |
| 177 | + return encoding.encode() |
98 | 178 |
|
99 | 179 | @cython.boundscheck(False) |
100 | 180 | cdef void _init_compile_caps(self): |
|
0 commit comments