@@ -3,87 +3,106 @@ local validation = require("validation")
33
44local validateString = validation .validateString
55
6- local ffi_string = ffi .string
76local tonumber = tonumber
87local tostring = tostring
98
10- local iconv = {
11- errorMessages = {
12- INVALID_CONVERSION_HANDLE = " Cannot close an invalid iconv_t descriptor" ,
13- },
14- }
9+ local UTF_MAX_BYTES_PER_CODEPOINT = 4
10+
11+ local iconv = {}
1512
1613iconv .cdefs = [[
1714typedef void* iconv_t;
18- typedef struct iconv_result_t {
19- uint8_t status_code;
20- size_t num_bytes_written;
21- const char* message;
15+ typedef enum iconv_result_t {
16+ ICONV_RESULT_OK,
17+ ICONV_INVALID_REQUEST,
18+ ICONV_INVALID_DESCRIPTOR,
19+ ICONV_INVALID_INPUT,
20+ ICONV_INVALID_OUTPUT,
21+ ICONV_CONVERSION_FAILED,
22+ ICONV_INCOMPLETE_INPUT,
23+ ICONV_WRITEBUFFER_FULL,
24+ ICONV_RESULT_LAST,
2225} iconv_result_t;
2326
27+ typedef char* iconv_cursor_t;
28+ typedef const char* iconv_encoding_t; // Aliased for now, replace with enum later
29+
30+ typedef struct iconv_memory_t {
31+ iconv_encoding_t charset;
32+ iconv_cursor_t buffer;
33+ size_t length;
34+ size_t remaining;
35+ } iconv_memory_t;
36+
37+ typedef struct iconv_request_t {
38+ iconv_memory_t input;
39+ iconv_memory_t output;
40+ iconv_t handle;
41+ } iconv_request_t;
42+
2443struct static_iconv_exports_table {
25- iconv_result_t (*iconv_convert)(char* input, size_t input_size, const char* input_encoding, const char* output_encoding, char* output, size_t output_size);
44+
45+ // Exports from iconv.h
2646 iconv_t (*iconv_open)(const char* input_encoding, const char* output_encoding);
2747 int (*iconv_close)(iconv_t conversion_descriptor);
2848 size_t (*iconv)(iconv_t conversion_descriptor, char** input, size_t* input_size, char** output, size_t* output_size);
2949
30- // Shared constants
31- size_t CHARSET_CONVERSION_FAILED;
50+ // Charset conversion API
51+ iconv_result_t (*iconv_convert)(iconv_request_t* conversion_details);
52+ iconv_result_t (*iconv_try_close)(iconv_request_t* request);
53+
54+ // Utility methods
55+ const char* (*iconv_strerror)(iconv_result_t status);
56+ bool (*iconv_check_result)(iconv_t handle);
3257};
3358
3459]]
3560
36- -- Should probably move this elsewhere?
37- local function ffi_strerror (errno )
38- return ffi .string (ffi .C .strerror (errno ))
39- end
40-
4161function iconv .initialize ()
42- ffi .cdef ([[
43- // Should probably move this elsewhere?
44- char *strerror(int errnum);
45- ]] )
46-
4762 ffi .cdef (iconv .cdefs )
4863end
4964
50- local UTF_BYTES_PER_CODEPOINT = 4
51-
65+ local request , readBuffer , writeBuffer
5266function iconv .convert (input , inputEncoding , outputEncoding )
5367 validateString (input , " input" )
5468 validateString (inputEncoding , " inputEncoding" )
5569 validateString (outputEncoding , " outputEncoding" )
5670
57- if # input == 0 then
58- -- Prevents LuaJIT from trying to collect a NULL buffer (= crash)
59- return nil , ffi_strerror (22 ) -- EINVAL
60- end
61-
62- local inputBuffer = ffi .new (" char[?]" , # input + 1 , input ) -- Wasteful, but iconv modifies the input buffer
63- local maxOutputBufferSize = # input * UTF_BYTES_PER_CODEPOINT -- Worst case scenario (also wasteful)
64- local outputBuffer = buffer .new (maxOutputBufferSize )
65- local ptr , len = outputBuffer :reserve (maxOutputBufferSize )
66-
67- local result = iconv .bindings .iconv_convert (inputBuffer , # input , inputEncoding , outputEncoding , ptr , len )
68-
69- local numBytesWritten = tonumber (result .num_bytes_written )
70- outputBuffer :commit (numBytesWritten )
71-
72- if tonumber (result .status_code ) ~= 0 then
73- local errorMessage = ffi_string (result .message )
74- return nil , errorMessage
71+ -- Preallocate resources only when needed; it's somewhat costly otherwise
72+ request = request or ffi .new (" iconv_request_t" )
73+ readBuffer = readBuffer or buffer .new (256 )
74+ writeBuffer = writeBuffer or buffer .new (256 )
75+
76+ readBuffer :put (input )
77+ local readCursor = readBuffer :ref ()
78+ request .input .charset = " CP949"
79+ request .input .buffer = readCursor
80+ request .input .length = # input
81+ request .input .remaining = # input
82+
83+ -- If the input is empty, reserving a zero-length write buffer may lead to misleading errors
84+ local maxRequiredWriteBufferSize = math.max (1 , # input * UTF_MAX_BYTES_PER_CODEPOINT )
85+
86+ writeBuffer :reset ()
87+ local writeCursor , writeBufferCapacity = writeBuffer :reserve (maxRequiredWriteBufferSize )
88+ request .output .charset = " UTF-8"
89+ request .output .buffer = writeCursor
90+ request .output .length = writeBufferCapacity
91+ request .output .remaining = writeBufferCapacity
92+
93+ local result = iconv .bindings .iconv_convert (request )
94+ local numBytesWritten = tonumber (request .output .length - request .output .remaining )
95+ writeBuffer :commit (numBytesWritten )
96+
97+ if result ~= ffi .C .ICONV_RESULT_OK then
98+ return nil , iconv .strerror (result )
7599 end
76100
77- return tostring (outputBuffer ), ffi_strerror ( 0 )
101+ return tostring (writeBuffer ), iconv . strerror ( result )
78102end
79103
80- function iconv .try_close (descriptor )
81- if ffi .cast (" size_t" , descriptor ) ~= iconv .bindings .CHARSET_CONVERSION_FAILED then
82- -- Guard this because MINGW64's iconv can't handle closing invalid descriptors
83- return iconv .bindings .iconv_close (descriptor )
84- end
85-
86- return nil , iconv .errorMessages .INVALID_CONVERSION_HANDLE
104+ function iconv .strerror (result )
105+ return ffi .string (iconv .bindings .iconv_strerror (result ))
87106end
88107
89108return iconv
0 commit comments