Skip to content

Commit 2cb3982

Browse files
authored
Merge pull request #67 from TysonAndre/depth-ignore
Ignore pathologically large depths longer than strlen
2 parents eb8ee62 + 9535f04 commit 2cb3982

4 files changed

Lines changed: 63 additions & 4 deletions

File tree

package.xml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818
<active>yes</active>
1919
</lead>
2020
-->
21-
<date>2022-08-30</date>
21+
<date>2022-09-30</date>
2222
<version>
23-
<release>2.0.4dev</release>
24-
<api>2.0.4dev</api>
23+
<release>2.0.4</release>
24+
<api>2.0.4</api>
2525
</version>
2626
<stability>
2727
<release>stable</release>
@@ -30,6 +30,8 @@
3030
<license uri="https://www.apache.org/licenses/LICENSE-2.0.html">Apache 2.0</license>
3131
<notes>
3232
* Add `-fvisibility=hidden` to compiler options to reduce compiled extension size by avoiding exporting symbols by default.
33+
* If the requested json parsing $depth is excessively large when reallocating larger buffers for the C simdjson parser,
34+
then internally use a smaller $depth that would behave identically with lower memory usage. (#66)
3335
</notes>
3436
<contents>
3537
<dir name="/">
@@ -55,6 +57,7 @@
5557
<file name="decode_integer_overflow.phpt" role="test"/>
5658
<file name="decode_invalid_property.phpt" role="test"/>
5759
<file name="decode_max_depth.phpt" role="test"/>
60+
<file name="decode_max_depth_memory_reduction.phpt" role="test"/>
5861
<file name="decode_result.phpt" role="test"/>
5962
<file name="decode_strict_types.phpt" role="test"/>
6063
<file name="decode_types.phpt" role="test"/>

php_simdjson.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
extern zend_module_entry simdjson_module_entry;
1818
#define phpext_simdjson_ptr &simdjson_module_entry
1919

20-
#define PHP_SIMDJSON_VERSION "2.0.4dev"
20+
#define PHP_SIMDJSON_VERSION "2.0.4"
2121
#define SIMDJSON_SUPPORT_URL "https://github.com/crazyxman/simdjson_php"
2222
#define SIMDJSON_PARSE_FAIL 0
2323
#define SIMDJSON_PARSE_SUCCESS 1

src/bindings.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ extern "C" {
2626
#define zend_string_release_ex(s, persistent) zend_string_release((s))
2727
#endif
2828

29+
#define SIMDJSON_DEPTH_CHECK_THRESHOLD 100000
30+
2931
static inline simdjson::simdjson_result<simdjson::dom::element>
3032
get_key_with_optional_prefix(simdjson::dom::element &doc, std::string_view json_pointer)
3133
{
@@ -36,6 +38,25 @@ get_key_with_optional_prefix(simdjson::dom::element &doc, std::string_view json_
3638
static simdjson::error_code
3739
build_parsed_json_cust(simdjson::dom::parser& parser, simdjson::dom::element &doc, const char *buf, size_t len, bool realloc_if_needed,
3840
size_t depth = simdjson::DEFAULT_MAX_DEPTH) {
41+
if (UNEXPECTED(depth > SIMDJSON_DEPTH_CHECK_THRESHOLD) && depth > len && depth > parser.max_depth()) {
42+
/*
43+
* Choose the depth in a way that both avoids frequent reallocations
44+
* and avoids excessive amounts of wasted memory beyond multiples of the largest string ever decoded.
45+
*
46+
* If the depth is already sufficient to parse a string of length `len`,
47+
* then use the parser's previous depth.
48+
*
49+
* Precondition: depth > len
50+
* Postcondition: depth <= original_depth && depth > len
51+
*/
52+
if (len < SIMDJSON_DEPTH_CHECK_THRESHOLD) {
53+
depth = SIMDJSON_DEPTH_CHECK_THRESHOLD;
54+
} else if (depth > len * 2) {
55+
// In callers, simdjson_validate_depth ensures depth <= SIMDJSON_MAX_DEPTH (which is <= SIZE_MAX/8),
56+
// so len * 2 is even smaller than the previous depth and won't overflow.
57+
depth = len * 2;
58+
}
59+
}
3960
auto error = parser.allocate(len, depth);
4061

4162
if (error) {
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
--TEST--
2+
simdjson_decode uses smaller depth than max_depth when safe to do so
3+
--FILE--
4+
<?php
5+
declare(strict_types=1);
6+
ini_set('error_reporting', (string)E_ALL);
7+
ini_set('display_errors', 'stderr');
8+
9+
// This should only be allocating a few megabytes of memory, not gigabytes,
10+
// due to internally choosing a smaller depth that behaves equivalently for excessively large requested depths.
11+
foreach ([1024, 1 << 27] as $depth) {
12+
echo "Test depth=$depth:\n";
13+
$value = simdjson_decode('[]', true, $depth);
14+
var_dump($value);
15+
$value = simdjson_key_count('{"a":"b"}', 'a', $depth);
16+
var_dump($value);
17+
try {
18+
simdjson_decode(str_repeat('[', 200000) . str_repeat(']', 199999), true, $depth);
19+
echo "should be invalid\n";
20+
} catch (Exception $e) {
21+
printf("Caught %s: %s\n", get_class($e), $e->getMessage());
22+
}
23+
}
24+
?>
25+
--EXPECT--
26+
Test depth=1024:
27+
array(0) {
28+
}
29+
int(0)
30+
Caught RuntimeException: The JSON document was too deep (too many nested objects and arrays)
31+
Test depth=134217728:
32+
array(0) {
33+
}
34+
int(0)
35+
Caught RuntimeException: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc.

0 commit comments

Comments
 (0)