Skip to content

Commit d790270

Browse files
authored
Merge pull request #662 from evo-lua/iconv-interface-rework
Rework the interface for the iconv.convert FFI bindings
2 parents 0b4c727 + 891e9e2 commit d790270

7 files changed

Lines changed: 435 additions & 213 deletions

File tree

Lines changed: 52 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
local console = require("console")
22
local iconv = require("iconv")
33
local ffi = require("ffi")
4+
local jit = require("jit")
45

56
local SAMPLE_SIZE = 500000
67

7-
local function iconv_lowlevel()
8-
local input = "\192\175\192\250\192\206\197\205\198\228\192\204\189\186"
8+
local UTF_MAX_BYTES_PER_CODEPOINT = 4
9+
local CP949_INPUT_STRING = "\192\175\192\250\192\206\197\205\198\228\192\204\189\186"
10+
local UTF8_OUTPUT_STRING = "유저인터페이스"
11+
local function iconv_lowlevel(input)
912
local descriptor = iconv.bindings.iconv_open("UTF-8", "CP949")
1013

1114
local inputSize = ffi.new("size_t[1]", #input)
@@ -22,53 +25,85 @@ local function iconv_lowlevel()
2225
local converted = ffi.string(outputBuffer, numConversionsPerformed)
2326

2427
iconv.bindings.iconv_close(descriptor)
25-
return converted, result
28+
return result, converted
2629
end
2730

28-
local function iconv_lua()
29-
local input = "\192\175\192\250\192\206\197\205\198\228\192\204\189\186"
31+
local function iconv_lua(input)
3032
local output, message = iconv.convert(input, "CP949", "UTF-8")
3133
return output, message
3234
end
3335

34-
local function iconv_cpp()
35-
local inputBuffer = buffer.new()
36-
local outputBuffer = buffer.new(1024)
37-
local ptr, len = outputBuffer:reserve(1024)
38-
local result = iconv.bindings.iconv_convert(inputBuffer, #inputBuffer, "CP949", "UTF-8", ptr, len)
39-
return result
36+
-- This should more or less match what iconv.convert is doing, minus validation/error handling
37+
local readBuffer = buffer.new()
38+
local writeBuffer = buffer.new()
39+
local request = ffi.new("iconv_request_t")
40+
local function iconv_cpp(input)
41+
readBuffer:put(input)
42+
local readCursor = readBuffer:ref()
43+
44+
writeBuffer:reset()
45+
local writeCursor, writeBufferSize = writeBuffer:reserve(#input * UTF_MAX_BYTES_PER_CODEPOINT)
46+
47+
request.input.charset = "CP949"
48+
request.input.buffer = readCursor
49+
request.input.length = #input
50+
request.input.remaining = #input
51+
52+
request.output.charset = "UTF-8"
53+
request.output.buffer = writeCursor
54+
request.output.length = writeBufferSize
55+
request.output.remaining = writeBufferSize
56+
57+
local result = iconv.bindings.iconv_convert(request)
58+
local numBytesWritten = tonumber(request.output.length - request.output.remaining)
59+
writeBuffer:commit(numBytesWritten)
60+
61+
return result, tostring(writeBuffer)
4062
end
4163

4264
math.randomseed(os.clock())
4365
local availableBenchmarks = {
4466
function()
45-
local label = "[FFI] Low-level API (tedious and slow, but the most flexible)"
67+
local label = "[FFI] Multi-step conversions using the libiconv API directly (manual descriptor management)"
4668
console.startTimer(label)
4769
for i = 1, SAMPLE_SIZE, 1 do
48-
iconv_lowlevel()
70+
local result, output = iconv_lowlevel(CP949_INPUT_STRING)
71+
assert(result == ffi.C.ICONV_RESULT_OK, iconv.strerror(result))
72+
assert(output == UTF8_OUTPUT_STRING, output)
4973
end
5074
console.stopTimer(label)
5175
end,
5276
function()
53-
local label = "[FFI] One-shot C++ conversion (fast but less flexible)"
77+
local label = "[FFI] Immediate conversion using iconv.bindings.iconv_convert (manual buffer management)"
5478
console.startTimer(label)
5579
for i = 1, SAMPLE_SIZE, 1 do
56-
iconv_cpp()
80+
local result, output = iconv_cpp(CP949_INPUT_STRING)
81+
assert(result == ffi.C.ICONV_RESULT_OK, iconv.strerror(result))
82+
assert(output == UTF8_OUTPUT_STRING, output)
5783
end
5884
console.stopTimer(label)
5985
end,
6086
function()
61-
local label = "[FFI] Lua-friendly wrapper (safer, but slower)"
87+
local label = "[FFI] Immediate conversion using iconv.convert (idiomatic Lua wrapper, with preallocation)"
6288
console.startTimer(label)
6389
for i = 1, SAMPLE_SIZE, 1 do
64-
iconv_lua()
90+
local output, message = iconv_lua(CP949_INPUT_STRING)
91+
assert(output == UTF8_OUTPUT_STRING, message)
6592
end
6693
console.stopTimer(label)
6794
end,
6895
}
6996

7097
table.shuffle(availableBenchmarks)
98+
jit.off()
99+
print("Running benchmarks with JIT=OFF")
100+
for _, benchmark in ipairs(availableBenchmarks) do
101+
benchmark()
102+
end
71103

104+
table.shuffle(availableBenchmarks)
105+
print("Running benchmarks with JIT=ON")
106+
jit.on()
72107
for _, benchmark in ipairs(availableBenchmarks) do
73108
benchmark()
74109
end

Runtime/Bindings/FFI/iconv/iconv.lua

Lines changed: 70 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -3,87 +3,106 @@ local validation = require("validation")
33

44
local validateString = validation.validateString
55

6-
local ffi_string = ffi.string
76
local tonumber = tonumber
87
local tostring = tostring
98

10-
local iconv = {
11-
errorMessages = {
12-
INVALID_CONVERSION_HANDLE = "Cannot close an invalid iconv_t descriptor",
13-
},
14-
}
9+
local UTF_MAX_BYTES_PER_CODEPOINT = 4
10+
11+
local iconv = {}
1512

1613
iconv.cdefs = [[
1714
typedef void* iconv_t;
18-
typedef struct iconv_result_t {
19-
uint8_t status_code;
20-
size_t num_bytes_written;
21-
const char* message;
15+
typedef enum iconv_result_t {
16+
ICONV_RESULT_OK,
17+
ICONV_INVALID_REQUEST,
18+
ICONV_INVALID_DESCRIPTOR,
19+
ICONV_INVALID_INPUT,
20+
ICONV_INVALID_OUTPUT,
21+
ICONV_CONVERSION_FAILED,
22+
ICONV_INCOMPLETE_INPUT,
23+
ICONV_WRITEBUFFER_FULL,
24+
ICONV_RESULT_LAST,
2225
} iconv_result_t;
2326
27+
typedef char* iconv_cursor_t;
28+
typedef const char* iconv_encoding_t; // Aliased for now, replace with enum later
29+
30+
typedef struct iconv_memory_t {
31+
iconv_encoding_t charset;
32+
iconv_cursor_t buffer;
33+
size_t length;
34+
size_t remaining;
35+
} iconv_memory_t;
36+
37+
typedef struct iconv_request_t {
38+
iconv_memory_t input;
39+
iconv_memory_t output;
40+
iconv_t handle;
41+
} iconv_request_t;
42+
2443
struct static_iconv_exports_table {
25-
iconv_result_t (*iconv_convert)(char* input, size_t input_size, const char* input_encoding, const char* output_encoding, char* output, size_t output_size);
44+
45+
// Exports from iconv.h
2646
iconv_t (*iconv_open)(const char* input_encoding, const char* output_encoding);
2747
int (*iconv_close)(iconv_t conversion_descriptor);
2848
size_t (*iconv)(iconv_t conversion_descriptor, char** input, size_t* input_size, char** output, size_t* output_size);
2949
30-
// Shared constants
31-
size_t CHARSET_CONVERSION_FAILED;
50+
// Charset conversion API
51+
iconv_result_t (*iconv_convert)(iconv_request_t* conversion_details);
52+
iconv_result_t (*iconv_try_close)(iconv_request_t* request);
53+
54+
// Utility methods
55+
const char* (*iconv_strerror)(iconv_result_t status);
56+
bool (*iconv_check_result)(iconv_t handle);
3257
};
3358
3459
]]
3560

36-
-- Should probably move this elsewhere?
37-
local function ffi_strerror(errno)
38-
return ffi.string(ffi.C.strerror(errno))
39-
end
40-
4161
function iconv.initialize()
42-
ffi.cdef([[
43-
// Should probably move this elsewhere?
44-
char *strerror(int errnum);
45-
]])
46-
4762
ffi.cdef(iconv.cdefs)
4863
end
4964

50-
local UTF_BYTES_PER_CODEPOINT = 4
51-
65+
local request, readBuffer, writeBuffer
5266
function iconv.convert(input, inputEncoding, outputEncoding)
5367
validateString(input, "input")
5468
validateString(inputEncoding, "inputEncoding")
5569
validateString(outputEncoding, "outputEncoding")
5670

57-
if #input == 0 then
58-
-- Prevents LuaJIT from trying to collect a NULL buffer (= crash)
59-
return nil, ffi_strerror(22) -- EINVAL
60-
end
61-
62-
local inputBuffer = ffi.new("char[?]", #input + 1, input) -- Wasteful, but iconv modifies the input buffer
63-
local maxOutputBufferSize = #input * UTF_BYTES_PER_CODEPOINT -- Worst case scenario (also wasteful)
64-
local outputBuffer = buffer.new(maxOutputBufferSize)
65-
local ptr, len = outputBuffer:reserve(maxOutputBufferSize)
66-
67-
local result = iconv.bindings.iconv_convert(inputBuffer, #input, inputEncoding, outputEncoding, ptr, len)
68-
69-
local numBytesWritten = tonumber(result.num_bytes_written)
70-
outputBuffer:commit(numBytesWritten)
71-
72-
if tonumber(result.status_code) ~= 0 then
73-
local errorMessage = ffi_string(result.message)
74-
return nil, errorMessage
71+
-- Preallocate resources only when needed; it's somewhat costly otherwise
72+
request = request or ffi.new("iconv_request_t")
73+
readBuffer = readBuffer or buffer.new(256)
74+
writeBuffer = writeBuffer or buffer.new(256)
75+
76+
readBuffer:put(input)
77+
local readCursor = readBuffer:ref()
78+
request.input.charset = "CP949"
79+
request.input.buffer = readCursor
80+
request.input.length = #input
81+
request.input.remaining = #input
82+
83+
-- If the input is empty, reserving a zero-length write buffer may lead to misleading errors
84+
local maxRequiredWriteBufferSize = math.max(1, #input * UTF_MAX_BYTES_PER_CODEPOINT)
85+
86+
writeBuffer:reset()
87+
local writeCursor, writeBufferCapacity = writeBuffer:reserve(maxRequiredWriteBufferSize)
88+
request.output.charset = "UTF-8"
89+
request.output.buffer = writeCursor
90+
request.output.length = writeBufferCapacity
91+
request.output.remaining = writeBufferCapacity
92+
93+
local result = iconv.bindings.iconv_convert(request)
94+
local numBytesWritten = tonumber(request.output.length - request.output.remaining)
95+
writeBuffer:commit(numBytesWritten)
96+
97+
if result ~= ffi.C.ICONV_RESULT_OK then
98+
return nil, iconv.strerror(result)
7599
end
76100

77-
return tostring(outputBuffer), ffi_strerror(0)
101+
return tostring(writeBuffer), iconv.strerror(result)
78102
end
79103

80-
function iconv.try_close(descriptor)
81-
if ffi.cast("size_t", descriptor) ~= iconv.bindings.CHARSET_CONVERSION_FAILED then
82-
-- Guard this because MINGW64's iconv can't handle closing invalid descriptors
83-
return iconv.bindings.iconv_close(descriptor)
84-
end
85-
86-
return nil, iconv.errorMessages.INVALID_CONVERSION_HANDLE
104+
function iconv.strerror(result)
105+
return ffi.string(iconv.bindings.iconv_strerror(result))
87106
end
88107

89108
return iconv
Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,43 @@
1-
typedef struct iconv_result_t {
2-
uint8_t status_code;
3-
size_t num_bytes_written;
4-
const char* message;
1+
typedef enum iconv_result_t {
2+
ICONV_RESULT_OK,
3+
ICONV_INVALID_REQUEST,
4+
ICONV_INVALID_DESCRIPTOR,
5+
ICONV_INVALID_INPUT,
6+
ICONV_INVALID_OUTPUT,
7+
ICONV_CONVERSION_FAILED,
8+
ICONV_INCOMPLETE_INPUT,
9+
ICONV_WRITEBUFFER_FULL,
10+
ICONV_RESULT_LAST,
511
} iconv_result_t;
612

13+
typedef char* iconv_cursor_t;
14+
typedef const char* iconv_encoding_t; // Aliased for now, replace with enum later
15+
16+
typedef struct iconv_memory_t {
17+
iconv_encoding_t charset;
18+
iconv_cursor_t buffer;
19+
size_t length;
20+
size_t remaining;
21+
} iconv_memory_t;
22+
23+
typedef struct iconv_request_t {
24+
iconv_memory_t input;
25+
iconv_memory_t output;
26+
iconv_t handle;
27+
} iconv_request_t;
28+
729
struct static_iconv_exports_table {
8-
iconv_result_t (*iconv_convert)(char* input, size_t input_size, const char* input_encoding, const char* output_encoding, char* output, size_t output_size);
30+
31+
// Exports from iconv.h
932
iconv_t (*iconv_open)(const char* input_encoding, const char* output_encoding);
1033
int (*iconv_close)(iconv_t conversion_descriptor);
1134
size_t (*iconv)(iconv_t conversion_descriptor, char** input, size_t* input_size, char** output, size_t* output_size);
1235

13-
// Shared constants
14-
size_t CHARSET_CONVERSION_FAILED;
36+
// Charset conversion API
37+
iconv_result_t (*iconv_convert)(iconv_request_t* conversion_details);
38+
iconv_result_t (*iconv_try_close)(iconv_request_t* request);
39+
40+
// Utility methods
41+
const char* (*iconv_strerror)(iconv_result_t status);
42+
bool (*iconv_check_result)(iconv_t handle);
1543
};

0 commit comments

Comments
 (0)