Skip to content

Commit b2efaa9

Browse files
committed
[k2] add support multipart/form-data to HTTP server
1 parent 0bb1456 commit b2efaa9

5 files changed

Lines changed: 356 additions & 3 deletions

File tree

runtime-light/server/http/http-server-state.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ inline constexpr std::string_view CONTENT_LENGTH = "content-length";
4646
inline constexpr std::string_view AUTHORIZATION = "authorization";
4747
inline constexpr std::string_view ACCEPT_ENCODING = "accept-encoding";
4848
inline constexpr std::string_view CONTENT_ENCODING = "content-encoding";
49+
inline constexpr std::string_view CONTENT_DISPOSITION = "content-disposition";
4950

5051
} // namespace headers
5152

runtime-light/server/http/init-functions.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,14 @@
2626
#include "runtime-light/core/globals/php-script-globals.h"
2727
#include "runtime-light/k2-platform/k2-api.h"
2828
#include "runtime-light/server/http/http-server-state.h"
29+
#include "runtime-light/server/http/multipart.h"
2930
#include "runtime-light/state/instance-state.h"
3031
#include "runtime-light/stdlib/component/component-api.h"
3132
#include "runtime-light/stdlib/diagnostics/logs.h"
3233
#include "runtime-light/stdlib/output/output-state.h"
3334
#include "runtime-light/stdlib/server/http-functions.h"
3435
#include "runtime-light/stdlib/zlib/zlib-functions.h"
36+
#include "runtime-light/stdlib/file/file-system-functions.h"
3537
#include "runtime-light/streams/stream.h"
3638
#include "runtime-light/tl/tl-core.h"
3739
#include "runtime-light/tl/tl-functions.h"
@@ -319,14 +321,16 @@ void init_server(kphp::component::stream&& request_stream, kphp::stl::vector<std
319321
break;
320322
}
321323
case kphp::http::method::post: {
324+
string body{reinterpret_cast<const char*>(invoke_http.body.data()), static_cast<string::size_type>(invoke_http.body.size())};
322325
if (!std::ranges::search(content_type, CONTENT_TYPE_APP_FORM_URLENCODED).empty()) {
323-
string body{reinterpret_cast<const char*>(invoke_http.body.data()), static_cast<string::size_type>(invoke_http.body.size())};
324326
f$parse_str(body, superglobals.v$_POST);
325327
http_server_instance_st.opt_raw_post_data.emplace(std::move(body));
326328
} else if (!std::ranges::search(content_type, CONTENT_TYPE_MULTIPART_FORM_DATA).empty()) {
327-
kphp::log::error("unsupported content-type: {}", CONTENT_TYPE_MULTIPART_FORM_DATA);
329+
std::optional<std::string_view> boundary{parse_boundary(content_type)};
330+
if (!boundary) {
331+
kphp::http::parse_multipart({body.c_str(), body.size()}, boundary.value(), superglobals.v$_POST, superglobals.v$_FILES);
332+
}
328333
} else {
329-
string body{reinterpret_cast<const char*>(invoke_http.body.data()), static_cast<string::size_type>(invoke_http.body.size())};
330334
http_server_instance_st.opt_raw_post_data.emplace(std::move(body));
331335
}
332336

@@ -378,6 +382,7 @@ void init_server(kphp::component::stream&& request_stream, kphp::stl::vector<std
378382

379383
kphp::coro::task<> finalize_server() noexcept {
380384
auto& http_server_instance_st{HttpServerInstanceState::get()};
385+
auto& superglobals{InstanceState::get().php_script_mutable_globals_singleton.get_superglobals()};
381386

382387
string response_body{};
383388
tl::HttpResponse http_response{};
@@ -431,6 +436,18 @@ kphp::coro::task<> finalize_server() noexcept {
431436
[[fallthrough]];
432437
}
433438
case kphp::http::response_state::completed:
439+
const array<mixed> files = superglobals.v$_FILES.to_array();
440+
for (array<mixed>::const_iterator it = files.begin(); it != files.end(); ++it) {
441+
const mixed& file = it.get_value();
442+
443+
if (!file.is_array()) {
444+
kphp::log::error("$_FILES contains a value that is not an array");
445+
continue;
446+
}
447+
448+
const mixed tmp_filename = file.get_value(string("tmp_name"));
449+
f$unlink(tmp_filename.to_string());
450+
}
434451
co_return;
435452
}
436453
}
Lines changed: 319 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,319 @@
1+
// Compiler for PHP (aka KPHP)
2+
// Copyright (c) 2024 LLC «V Kontakte»
3+
// Distributed under the GPL v3 License, see LICENSE.notice.txt
4+
5+
#include "runtime-light/server/http/multipart.h"
6+
#include "runtime-light/stdlib/file/resource.h"
7+
#include "runtime-light/stdlib/diagnostics/logs.h"
8+
#include "runtime-light/server/http/http-server-state.h"
9+
#include "runtime-light/stdlib/math/random-functions.h"
10+
11+
#include <string_view>
12+
#include <cstdio>
13+
14+
#include "runtime-common/core/runtime-core.h"
15+
#include "common/algorithms/string-algorithms.h"
16+
17+
namespace {
18+
19+
constexpr int8_t TMP_FILENAME_LENGTH = 10;
20+
constexpr std::string_view TMP_DIR = "/tmp/";
21+
22+
constexpr std::string_view HEADER_CONTENT_DISPOSITION_FORM_DATA = "form-data;";
23+
constexpr std::string_view MULTIPART_BOUNDARY_EQ = "boundary=";
24+
25+
struct header {
26+
header() = delete;
27+
28+
private:
29+
header(std::string_view name_view, std::string_view value_view) : name{name_view}, value{value_view} {}
30+
31+
public:
32+
std::string_view name;
33+
std::string_view value;
34+
35+
static std::optional<header> create(std::string_view header_string) noexcept {
36+
auto [name_view, value_view]{vk::split_string_view(header_string, ':')};
37+
if (name_view.size() + value_view.size() + 1 != header_string.size()) [[unlikely]] {
38+
return std::nullopt;
39+
}
40+
if (name_view.empty() || value_view.empty()) {
41+
return std::nullopt;
42+
}
43+
return header(name_view, value_view.substr(1));
44+
}
45+
46+
bool name_is(const std::string_view s) {
47+
const auto lower_name{name | std::views::take(s.size()) |
48+
std::views::transform([](auto c) noexcept { return std::tolower(c, std::locale::classic()); })};
49+
return std::ranges::equal(lower_name, s);
50+
}
51+
};
52+
53+
// Represents one attribute from Content-Disposition header.
54+
// For example, a typically file field will have two attributes:
55+
// 1) attr = "name", value = "avatar"
56+
// 2) attr = "filename", value = "my_avatar.png"
57+
struct partAttr {
58+
std::string_view attr;
59+
std::string_view value;
60+
61+
partAttr() = delete;
62+
partAttr(const std::string_view attr_, const std::string_view value_) : attr{attr_}, value{value_} {};
63+
};
64+
65+
// Represents one part of multipart content
66+
struct part {
67+
std::string_view name;
68+
std::string_view filename;
69+
std::string_view content_type;
70+
std::string_view data;
71+
};
72+
73+
class MultipartBody {
74+
private:
75+
76+
std::string_view body;
77+
std::string_view boundary;
78+
size_t pos;
79+
80+
std::optional<part> next_part();
81+
void addPost(const part &part, mixed &v$_POST);
82+
void addFile(const part &part, mixed &v$_FILES);
83+
84+
std::optional<header> next_header();
85+
std::string_view parse_data();
86+
87+
// Returns true if current pos refers to one of \r or \n
88+
bool is_crlf() {
89+
return body[pos] == '\r' || body[pos] == '\n';
90+
}
91+
92+
void skip_crlf() {
93+
if (body[pos] == '\r') {
94+
pos++;
95+
}
96+
if (body[pos] == '\n') {
97+
pos++;
98+
}
99+
}
100+
101+
void skip_boundary() {
102+
if (pos == 0) {
103+
pos += 2;
104+
}
105+
pos += boundary.size();
106+
if (body[pos] == '-' && body[pos+1] == '-') {
107+
pos += 2;
108+
}
109+
}
110+
111+
public:
112+
113+
MultipartBody(const std::string_view body_, const std::string_view boundary_)
114+
: body{body_}, boundary{boundary_}, pos{0} {}
115+
116+
void parse_into(mixed &v$_POST, mixed &v$_FILES);
117+
};
118+
119+
std::optional<part> MultipartBody::next_part() {
120+
part part;
121+
122+
if (pos == 0) {
123+
skip_boundary();
124+
skip_crlf();
125+
}
126+
127+
do {
128+
std::optional<header> maybe_header{next_header()};
129+
if (!maybe_header) {
130+
return std::nullopt;
131+
}
132+
header header{maybe_header.value()};
133+
134+
if (header.name_is(kphp::http::headers::CONTENT_DISPOSITION)) {
135+
if (!header.value.starts_with(HEADER_CONTENT_DISPOSITION_FORM_DATA)) {
136+
return std::nullopt;
137+
}
138+
auto attrs = std::views::split(header.value, ";") | std::views::transform([](auto part) {
139+
std::string_view part_view{vk::trim(std::string_view(part))};
140+
auto [name_view, value_view]{vk::split_string_view(part_view, '=')};
141+
if (value_view.size() >= 2 && value_view.starts_with('"') && value_view.ends_with('"')) {
142+
value_view = value_view.substr(1, value_view.size()-2);
143+
}
144+
return partAttr{name_view, value_view};
145+
});
146+
147+
for (partAttr a : attrs) {
148+
if (a.attr.empty()) {
149+
return std::nullopt;
150+
}
151+
if (a.attr == "name") {
152+
part.name = a.value;
153+
} else if (a.attr == "filename") {
154+
part.filename = a.value;
155+
}
156+
}
157+
} else if (header.name_is(kphp::http::headers::CONTENT_TYPE)) {
158+
part.content_type = header.value;
159+
}
160+
} while (!is_crlf());
161+
162+
skip_crlf();
163+
part.data = parse_data();
164+
skip_boundary();
165+
skip_crlf();
166+
return part;
167+
}
168+
169+
std::optional<header> MultipartBody::next_header() {
170+
size_t lf{body.find('\n', pos)};
171+
size_t header_end{lf-1};
172+
173+
if (lf == std::string_view::npos) {
174+
return std::nullopt;
175+
}
176+
177+
if (body[header_end] == '\r') {
178+
header_end--;
179+
}
180+
181+
auto res{header::create(body.substr(pos, header_end-pos+1))};
182+
pos = lf + 1;
183+
return res;
184+
}
185+
186+
std::string_view MultipartBody::parse_data() {
187+
size_t data_start{pos};
188+
size_t data_end{body.find(boundary, data_start)};
189+
pos = data_end;
190+
191+
if (pos == std::string_view::npos) {
192+
return {};
193+
}
194+
195+
if (body[data_end-1] != '-' || body[data_end-2] != '-') {
196+
return {};
197+
}
198+
data_end -= 2;
199+
if (body[data_end] == '\n') {
200+
data_end--;
201+
}
202+
if (body[data_end] == '\r') {
203+
data_end--;
204+
}
205+
206+
if (data_end > data_start) {
207+
return body.substr(data_start, data_end-data_start-1);
208+
}
209+
210+
return {};
211+
212+
}
213+
214+
void MultipartBody::parse_into(mixed &v$_POST, mixed &v$_FILES) {
215+
std::optional<part> maybe_part;
216+
while ((maybe_part = next_part())) {
217+
part p{maybe_part.value()};
218+
if (p.name.empty()) {
219+
return;
220+
}
221+
if (!p.filename.empty()) {
222+
addFile(p, v$_FILES);
223+
} else {
224+
addPost(p, v$_POST);
225+
}
226+
}
227+
}
228+
229+
void MultipartBody::addPost(const part &part, mixed &v$_POST) {
230+
string name{part.name.data(), static_cast<string::size_type>(part.name.size())};
231+
v$_POST.set_value(name, string(part.data.data(), part.data.size()));
232+
}
233+
234+
void MultipartBody::addFile(const part &part, mixed &v$_FILES) {
235+
//TODO: replace f$random_bytes to avoid string allocation
236+
Optional<string> rand_str{f$random_bytes(TMP_FILENAME_LENGTH)};
237+
238+
if (!rand_str.has_value()) {
239+
kphp::log::warning("error generating random_bytes for tmp file");
240+
return;
241+
}
242+
243+
string tmp_name_str{TMP_DIR.data(), TMP_DIR.size()};
244+
tmp_name_str.append(rand_str.val());
245+
std::string_view tmp_name{tmp_name_str.c_str(), tmp_name_str.size()};
246+
247+
auto file{kphp::fs::file::open(tmp_name, "w")};
248+
if (!file) [[unlikely]] {
249+
kphp::log::warning("error opening tmp file {}: error code -> {}", tmp_name, file.error());
250+
return;
251+
}
252+
253+
int file_size{0};
254+
auto file_size_result = (*file).write({reinterpret_cast<const std::byte*>(part.data.data()), part.data.size()});
255+
if (file_size_result.has_value()) {
256+
file_size = file_size_result.value();
257+
if (file_size < part.data.size()) {
258+
kphp::log::warning("error write to tmp file: wrote {} bytes insted of {}", file_size, part.data.size());
259+
return;
260+
}
261+
} else {
262+
kphp::log::warning("error write to tmp file: errcode {}", file_size_result.error());
263+
}
264+
265+
string name{part.name.data(), static_cast<string::size_type>(part.name.size())};
266+
267+
if (part.name.ends_with("[]")) {
268+
mixed& file = v$_FILES[name.substr(0, name.size() - 2)];
269+
if (file_size == part.data.size()) {
270+
file[string("name")].push_back(string(part.filename.data(), part.filename.size()));
271+
file[string("type")].push_back(string(part.content_type.data(), part.content_type.size()));
272+
file[string("size")].push_back(file_size);
273+
file[string("tmp_name")].push_back(string(tmp_name.data(), tmp_name.size()));
274+
file[string("error")].push_back(0);
275+
} else {
276+
file[string("name")].push_back(string());
277+
file[string("type")].push_back(string());
278+
file[string("size")].push_back(0);
279+
file[string("tmp_name")].push_back(string());
280+
file[string("error")].push_back(-file_size);
281+
}
282+
} else {
283+
mixed& file = v$_FILES[name];
284+
if (file_size == part.data.size()) {
285+
file.set_value(string("name"), string(part.filename.data(), part.filename.size()));
286+
file.set_value(string("type"), string(part.content_type.data(), part.content_type.size()));
287+
file.set_value(string("size"), file_size);
288+
file.set_value(string("tmp_name"), string(tmp_name.data(), tmp_name.size()));
289+
file.set_value(string("error"), 0);
290+
} else {
291+
file.set_value(string("size"), 0);
292+
file.set_value(string("tmp_name"), string());
293+
file.set_value(string("error"), -file_size);
294+
}
295+
}
296+
}
297+
298+
} // namespace
299+
300+
namespace kphp::http {
301+
302+
void parse_multipart(const std::string_view body, const std::string_view boundary, mixed &v$_POST, mixed &v$_FILES) {
303+
MultipartBody mb{body, boundary};
304+
mb.parse_into(v$_POST, v$_FILES);
305+
}
306+
307+
std::optional<std::string_view> parse_boundary(const std::string_view content_type) {
308+
size_t pos{content_type.find(MULTIPART_BOUNDARY_EQ)};
309+
if (pos == std::string_view::npos) {
310+
return std::nullopt;
311+
}
312+
std::string_view res{content_type.substr(pos + MULTIPART_BOUNDARY_EQ.size())};
313+
if (res.size() >= 2 && res.starts_with('"') && res.ends_with('"')) {
314+
res = res.substr(1, res.size()-2);
315+
}
316+
return res;
317+
}
318+
319+
} // namespace kphp::http

0 commit comments

Comments
 (0)