Skip to content

Commit 83fa85b

Browse files
authored
Merge pull request #653 from evo-lua/libcurl-url-parsing
Add FFI bindings for the libcurl URL parsing APIs
2 parents d790270 + 7b87b8e commit 83fa85b

8 files changed

Lines changed: 705 additions & 8 deletions

File tree

Benchmarks/libcurl-url-parsing.lua

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
local console = require("console")
2+
local curl = require("curl")
3+
local ffi = require("ffi")
4+
local openssl = require("openssl")
5+
6+
local tinsert = table.insert
7+
8+
local assertions = require("assertions")
9+
local assertEquals = assertions.assertEquals
10+
11+
local SAMPLE_SIZE = 100000
12+
printf("Generating %d randomized samples of varying lengths", SAMPLE_SIZE)
13+
console.startTimer("Generate random samples")
14+
15+
local inputs = {}
16+
17+
local MAX_EXPECTED_TOKEN_LENGTH = 256
18+
19+
for i = 1, SAMPLE_SIZE, 1 do
20+
local alternatingProtocol = (i % 2 == 0) and "https" or "http"
21+
local randomHost = openssl.hex(openssl.random(MAX_EXPECTED_TOKEN_LENGTH)) .. ".com"
22+
local randomPath = "/"
23+
.. openssl.hex(openssl.random(MAX_EXPECTED_TOKEN_LENGTH))
24+
.. "/"
25+
.. openssl.hex(openssl.random(MAX_EXPECTED_TOKEN_LENGTH))
26+
.. (i % 2 == 0 and ".html" or ".htm")
27+
28+
local randomURL = alternatingProtocol .. "://" .. randomHost .. randomPath
29+
local fixture = {
30+
url = randomURL,
31+
protocol = alternatingProtocol,
32+
host = randomHost,
33+
path = randomPath,
34+
}
35+
tinsert(inputs, fixture)
36+
end
37+
38+
console.stopTimer("Generate random samples")
39+
40+
local function libcurl_lowlevel(fixture)
41+
local handle = curl.bindings.curl_url()
42+
assert(handle)
43+
44+
local status = curl.bindings.curl_url_set(handle, ffi.C.CURLUPART_URL, fixture.url, 0)
45+
assertEquals(tonumber(status), ffi.C.CURLUE_OK)
46+
47+
local host = ffi.new("char*")
48+
local hostPtr = ffi.new("char*[1]")
49+
hostPtr[0] = host
50+
ffi.gc(host, curl.bindings.curl_free)
51+
52+
status = curl.bindings.curl_url_get(handle, ffi.C.CURLUPART_HOST, hostPtr, 0)
53+
assertEquals(tonumber(status), ffi.C.CURLUE_OK)
54+
assertEquals(ffi.string(hostPtr[0]), fixture.host)
55+
56+
local path = ffi.new("char*")
57+
local pathPtr = ffi.new("char*[1]")
58+
pathPtr[0] = path
59+
ffi.gc(path, curl.bindings.curl_free)
60+
61+
status = curl.bindings.curl_url_get(handle, ffi.C.CURLUPART_PATH, pathPtr, 0)
62+
assertEquals(tonumber(status), ffi.C.CURLUE_OK)
63+
assertEquals(ffi.string(pathPtr[0]), fixture.path)
64+
65+
curl.bindings.curl_url_cleanup(handle)
66+
end
67+
68+
local function libcurl_lua(fixture)
69+
local url = curl.url()
70+
assert(url)
71+
assert(url:set("url", fixture.url))
72+
assertEquals(url:get("host"), fixture.host)
73+
assertEquals(url:get("path"), fixture.path)
74+
end
75+
76+
math.randomseed(os.clock())
77+
local availableBenchmarks = {
78+
function()
79+
local label = "[FFI] URL parsing using the curl APIs directly (manual GC handling)"
80+
console.startTimer(label)
81+
for i = 1, SAMPLE_SIZE, 1 do
82+
libcurl_lowlevel(inputs[i])
83+
end
84+
console.stopTimer(label)
85+
end,
86+
function()
87+
local label = "[FFI] URL parsing using the high-level URL interface (Lua wrapper)"
88+
console.startTimer(label)
89+
for i = 1, SAMPLE_SIZE, 1 do
90+
libcurl_lua(inputs[i])
91+
end
92+
console.stopTimer(label)
93+
end,
94+
}
95+
96+
table.shuffle(availableBenchmarks)
97+
98+
for _, benchmark in ipairs(availableBenchmarks) do
99+
benchmark()
100+
end

Runtime/Bindings/FFI/curl/curl.lua

Lines changed: 181 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
local ffi = require("ffi")
2-
local ffi_string = ffi.string
2+
3+
local cast = ffi.cast
34

45
local curl = {
56
MAX_CSTRING_LIST_SIZE = 256,
7+
metatypes = {},
68
}
79

810
curl.cdefs = [[
@@ -86,16 +88,135 @@ struct curl_version_info_data {
8688
};
8789
typedef struct curl_version_info_data curl_version_info_data;
8890
91+
typedef enum {
92+
CURLUE_OK,
93+
CURLUE_BAD_HANDLE, /* 1 */
94+
CURLUE_BAD_PARTPOINTER, /* 2 */
95+
CURLUE_MALFORMED_INPUT, /* 3 */
96+
CURLUE_BAD_PORT_NUMBER, /* 4 */
97+
CURLUE_UNSUPPORTED_SCHEME, /* 5 */
98+
CURLUE_URLDECODE, /* 6 */
99+
CURLUE_OUT_OF_MEMORY, /* 7 */
100+
CURLUE_USER_NOT_ALLOWED, /* 8 */
101+
CURLUE_UNKNOWN_PART, /* 9 */
102+
CURLUE_NO_SCHEME, /* 10 */
103+
CURLUE_NO_USER, /* 11 */
104+
CURLUE_NO_PASSWORD, /* 12 */
105+
CURLUE_NO_OPTIONS, /* 13 */
106+
CURLUE_NO_HOST, /* 14 */
107+
CURLUE_NO_PORT, /* 15 */
108+
CURLUE_NO_QUERY, /* 16 */
109+
CURLUE_NO_FRAGMENT, /* 17 */
110+
CURLUE_NO_ZONEID, /* 18 */
111+
CURLUE_BAD_FILE_URL, /* 19 */
112+
CURLUE_BAD_FRAGMENT, /* 20 */
113+
CURLUE_BAD_HOSTNAME, /* 21 */
114+
CURLUE_BAD_IPV6, /* 22 */
115+
CURLUE_BAD_LOGIN, /* 23 */
116+
CURLUE_BAD_PASSWORD, /* 24 */
117+
CURLUE_BAD_PATH, /* 25 */
118+
CURLUE_BAD_QUERY, /* 26 */
119+
CURLUE_BAD_SCHEME, /* 27 */
120+
CURLUE_BAD_SLASHES, /* 28 */
121+
CURLUE_BAD_USER, /* 29 */
122+
CURLUE_LACKS_IDN, /* 30 */
123+
CURLUE_TOO_LARGE, /* 31 */
124+
CURLUE_LAST
125+
} CURLUcode;
126+
127+
typedef enum {
128+
CURLUPART_URL,
129+
CURLUPART_SCHEME,
130+
CURLUPART_USER,
131+
CURLUPART_PASSWORD,
132+
CURLUPART_OPTIONS,
133+
CURLUPART_HOST,
134+
CURLUPART_PORT,
135+
CURLUPART_PATH,
136+
CURLUPART_QUERY,
137+
CURLUPART_FRAGMENT,
138+
CURLUPART_ZONEID /* added in 7.65.0 */
139+
} CURLUPart;
140+
141+
typedef struct CURLU* url_ptr_t;
142+
typedef struct const CURLU* url_cptr_t;
143+
144+
typedef enum {
145+
CURLU_DEFAULT_FEATURES = 0 << 0,
146+
CURLU_DEFAULT_PORT = (1 << 0),
147+
CURLU_NO_DEFAULT_PORT = (1 << 1),
148+
CURLU_DEFAULT_SCHEME = (1 << 2),
149+
CURLU_NON_SUPPORT_SCHEME = (1 << 3),
150+
CURLU_PATH_AS_IS = (1 << 4),
151+
CURLU_DISALLOW_USER = (1 << 5),
152+
CURLU_URLDECODE = (1 << 6),
153+
CURLU_URLENCODE = (1 << 7),
154+
CURLU_APPENDQUERY = (1 << 8),
155+
CURLU_GUESS_SCHEME = (1 << 9),
156+
CURLU_NO_AUTHORITY = (1 << 10),
157+
CURLU_ALLOW_SPACE = (1 << 11),
158+
CURLU_PUNYCODE = (1 << 12),
159+
CURLU_PUNY2IDN = (1 << 13),
160+
CURLU_GET_EMPTY = (1 << 14),
161+
CURLU_NO_GUESS_SCHEME = (1 << 15),
162+
} CURLUFeatureFlags;
163+
89164
struct static_curl_exports_table {
90-
// curl.h
165+
// Exports from curl.h
91166
CURLversion CURLVERSION_NOW;
92167
curl_version_info_data* (*curl_version_info)(CURLversion);
168+
void (*curl_free)(void*);
169+
170+
// Exports from urlapi.h
171+
url_ptr_t (*curl_url)(void);
172+
void (*curl_url_cleanup)(url_ptr_t handle);
173+
url_ptr_t (*curl_url_dup)(url_cptr_t handle);
174+
CURLUcode (*curl_url_get)(url_cptr_t handle,
175+
CURLUPart what,
176+
char** part,
177+
unsigned int flags);
178+
CURLUcode (*curl_url_set)(url_ptr_t handle,
179+
CURLUPart what,
180+
const char* part,
181+
unsigned int flags);
182+
const char* (*curl_url_strerror)(CURLUcode errno);
93183
};
94184
95185
]]
96186

97187
function curl.initialize()
98188
ffi.cdef(curl.cdefs)
189+
190+
curl.parts = {
191+
url = ffi.C.CURLUPART_URL,
192+
scheme = ffi.C.CURLUPART_SCHEME,
193+
user = ffi.C.CURLUPART_USER,
194+
password = ffi.C.CURLUPART_PASSWORD,
195+
options = ffi.C.CURLUPART_OPTIONS,
196+
host = ffi.C.CURLUPART_HOST,
197+
port = ffi.C.CURLUPART_PORT,
198+
path = ffi.C.CURLUPART_PATH,
199+
query = ffi.C.CURLUPART_QUERY,
200+
fragment = ffi.C.CURLUPART_FRAGMENT,
201+
zone = ffi.C.CURLUPART_ZONEID,
202+
}
203+
204+
local url = {}
205+
206+
function url:set(...)
207+
return curl.url_set(self, ...)
208+
end
209+
210+
function url:get(...)
211+
return curl.url_get(self, ...)
212+
end
213+
214+
function url:dup(...)
215+
return curl.url_dup(self, ...)
216+
end
217+
218+
url.__index = url
219+
curl.metatypes.CURLU = ffi.metatype("struct CURLU", url)
99220
end
100221

101222
function curl.unpack(cstrings)
@@ -107,7 +228,7 @@ function curl.unpack(cstrings)
107228
break
108229
end
109230

110-
local key = ffi_string(cstring)
231+
local key = ffi.string(cstring)
111232
entries[key] = true
112233

113234
index = index + 1
@@ -121,7 +242,62 @@ local function cstring_unwrap(cstring)
121242
return tostring(ffi.NULL)
122243
end
123244

124-
return ffi_string(cstring)
245+
return ffi.string(cstring)
246+
end
247+
248+
function curl.free(pointer)
249+
curl.bindings.curl_free(pointer)
250+
end
251+
252+
function curl.url(href)
253+
local handle = curl.bindings.curl_url()
254+
ffi.gc(handle, curl.bindings.curl_url_cleanup)
255+
handle = cast("struct CURLU*", handle)
256+
257+
if type(href) == "string" then
258+
handle:set("url", href)
259+
end
260+
261+
return handle
262+
end
263+
264+
function curl.url_dup(handle)
265+
local duplicatedHandle = curl.bindings.curl_url_dup(handle)
266+
ffi.gc(duplicatedHandle, curl.bindings.curl_url_cleanup)
267+
return cast("struct CURLU*", duplicatedHandle)
268+
end
269+
270+
local where = ffi.new("char*[1]")
271+
function curl.url_get(handle, what, how)
272+
what = what or "url"
273+
what = curl.parts[what] or curl.parts.url
274+
how = how or ffi.C.CURLU_DEFAULT_FEATURES
275+
276+
local status = curl.bindings.curl_url_get(handle, what, where, how)
277+
if status ~= ffi.C.CURLUE_OK then
278+
return nil, curl.url_strerror(status)
279+
end
280+
281+
local result = ffi.string(where[0])
282+
curl.free(where[0])
283+
return result
284+
end
285+
286+
function curl.url_set(handle, what, part, how)
287+
what = what or "url"
288+
part = part and tostring(part) or ffi.NULL
289+
how = how or ffi.C.CURLU_DEFAULT_FEATURES
290+
291+
local status = curl.bindings.curl_url_set(handle, curl.parts[what], part, how)
292+
if status ~= ffi.C.CURLUE_OK then
293+
return nil, curl.url_strerror(status)
294+
end
295+
296+
return true
297+
end
298+
299+
function curl.url_strerror(errorCode)
300+
return ffi.string(curl.bindings.curl_url_strerror(errorCode))
125301
end
126302

127303
function curl.version_info(age)
@@ -133,7 +309,7 @@ function curl.version_info(age)
133309
version = cstring_unwrap(versionInfo.version),
134310
version_num = tonumber(versionInfo.version_num),
135311
host = cstring_unwrap(versionInfo.host),
136-
features = tonumber(versionInfo.features), -- TBD tostring?
312+
features = tonumber(versionInfo.features),
137313
ssl_version = cstring_unwrap(versionInfo.ssl_version),
138314
libz_version = cstring_unwrap(versionInfo.libz_version),
139315
protocols = curl.unpack(versionInfo.protocols),

0 commit comments

Comments
 (0)