Skip to content

Commit 3a8d229

Browse files
committed
Update simdjson single header files
Generated from simdjson 5809e51ae405d763700ec19083009a2a1cdbfdbc
1 parent 52b5f10 commit 3a8d229

2 files changed

Lines changed: 302 additions & 197 deletions

File tree

src/simdjson.cpp

Lines changed: 111 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2022-07-28 21:45:54 -0400. Do not edit! */
1+
/* auto-generated on 2022-09-30 12:13:16 -0400. Do not edit! */
22
/* begin file src/simdjson.cpp */
33
#include "simdjson.h"
44

@@ -1589,7 +1589,8 @@ namespace internal {
15891589
{ INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." },
15901590
{ INCOMPLETE_ARRAY_OR_OBJECT, "JSON document ended early in the middle of an object or array." },
15911591
{ SCALAR_DOCUMENT_AS_VALUE, "A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "},
1592-
{ OUT_OF_BOUNDS, "Attempted to access location outside of document."}
1592+
{ OUT_OF_BOUNDS, "Attempted to access location outside of document."},
1593+
{ TRAILING_CONTENT, "Unexpected trailing content in the JSON input."}
15931594
}; // error_messages[]
15941595

15951596
} // namespace internal
@@ -3105,6 +3106,14 @@ using namespace simd;
31053106
this->error |= this->prev_incomplete;
31063107
}
31073108

3109+
#ifndef SIMDJSON_IF_CONSTEXPR
3110+
#if SIMDJSON_CPLUSPLUS17
3111+
#define SIMDJSON_IF_CONSTEXPR if constexpr
3112+
#else
3113+
#define SIMDJSON_IF_CONSTEXPR if
3114+
#endif
3115+
#endif
3116+
31083117
simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
31093118
if(simdjson_likely(is_ascii(input))) {
31103119
this->error |= this->prev_incomplete;
@@ -3114,12 +3123,12 @@ using namespace simd;
31143123
||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
31153124
|| (simd8x64<uint8_t>::NUM_CHUNKS == 4),
31163125
"We support one, two or four chunks per 64-byte block.");
3117-
if(simd8x64<uint8_t>::NUM_CHUNKS == 1) {
3126+
SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
31183127
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
3119-
} if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
3128+
} else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
31203129
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
31213130
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
3122-
} else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
3131+
} else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
31233132
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
31243133
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
31253134
this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
@@ -3497,7 +3506,7 @@ struct json_block {
34973506
*/
34983507
class json_scanner {
34993508
public:
3500-
json_scanner() {}
3509+
json_scanner() = default;
35013510
simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
35023511
// Returns either UNCLOSED_STRING or SUCCESS
35033512
simdjson_inline error_code finish();
@@ -4194,17 +4203,18 @@ simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
41944203
}
41954204
uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
41964205

4197-
// if the first code point is invalid we will get here, as we will go past
4198-
// the check for being outside the Basic Multilingual plane. If we don't
4199-
// find a \u immediately afterwards we fail out anyhow, but if we do,
4200-
// this check catches both the case of the first code point being invalid
4201-
// or the second code point being invalid.
4202-
if ((code_point | code_point_2) >> 16) {
4206+
// We have already checked that the high surrogate is valid and
4207+
// (code_point - 0xd800) < 1024.
4208+
//
4209+
// Check that code_point_2 is in the range 0xdc00..0xdfff
4210+
// and that code_point_2 was parsed from valid hex.
4211+
uint32_t low_bit = code_point_2 - 0xdc00;
4212+
if (low_bit >> 10) {
42034213
return false;
42044214
}
42054215

42064216
code_point =
4207-
(((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
4217+
(((code_point - 0xd800) << 10) | low_bit) + 0x10000;
42084218
*src_ptr += 6;
42094219
} else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
42104220
// If we encounter a low surrogate (not preceded by a high surrogate)
@@ -5668,17 +5678,18 @@ simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
56685678
}
56695679
uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
56705680

5671-
// if the first code point is invalid we will get here, as we will go past
5672-
// the check for being outside the Basic Multilingual plane. If we don't
5673-
// find a \u immediately afterwards we fail out anyhow, but if we do,
5674-
// this check catches both the case of the first code point being invalid
5675-
// or the second code point being invalid.
5676-
if ((code_point | code_point_2) >> 16) {
5681+
// We have already checked that the high surrogate is valid and
5682+
// (code_point - 0xd800) < 1024.
5683+
//
5684+
// Check that code_point_2 is in the range 0xdc00..0xdfff
5685+
// and that code_point_2 was parsed from valid hex.
5686+
uint32_t low_bit = code_point_2 - 0xdc00;
5687+
if (low_bit >> 10) {
56775688
return false;
56785689
}
56795690

56805691
code_point =
5681-
(((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
5692+
(((code_point - 0xd800) << 10) | low_bit) + 0x10000;
56825693
*src_ptr += 6;
56835694
} else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
56845695
// If we encounter a low surrogate (not preceded by a high surrogate)
@@ -6883,6 +6894,14 @@ using namespace simd;
68836894
this->error |= this->prev_incomplete;
68846895
}
68856896

6897+
#ifndef SIMDJSON_IF_CONSTEXPR
6898+
#if SIMDJSON_CPLUSPLUS17
6899+
#define SIMDJSON_IF_CONSTEXPR if constexpr
6900+
#else
6901+
#define SIMDJSON_IF_CONSTEXPR if
6902+
#endif
6903+
#endif
6904+
68866905
simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
68876906
if(simdjson_likely(is_ascii(input))) {
68886907
this->error |= this->prev_incomplete;
@@ -6892,12 +6911,12 @@ using namespace simd;
68926911
||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
68936912
|| (simd8x64<uint8_t>::NUM_CHUNKS == 4),
68946913
"We support one, two or four chunks per 64-byte block.");
6895-
if(simd8x64<uint8_t>::NUM_CHUNKS == 1) {
6914+
SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
68966915
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
6897-
} if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
6916+
} else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
68986917
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
68996918
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
6900-
} else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
6919+
} else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
69016920
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
69026921
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
69036922
this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
@@ -7277,7 +7296,7 @@ struct json_block {
72777296
*/
72787297
class json_scanner {
72797298
public:
7280-
json_scanner() {}
7299+
json_scanner() = default;
72817300
simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
72827301
// Returns either UNCLOSED_STRING or SUCCESS
72837302
simdjson_inline error_code finish();
@@ -8020,17 +8039,18 @@ simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
80208039
}
80218040
uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
80228041

8023-
// if the first code point is invalid we will get here, as we will go past
8024-
// the check for being outside the Basic Multilingual plane. If we don't
8025-
// find a \u immediately afterwards we fail out anyhow, but if we do,
8026-
// this check catches both the case of the first code point being invalid
8027-
// or the second code point being invalid.
8028-
if ((code_point | code_point_2) >> 16) {
8042+
// We have already checked that the high surrogate is valid and
8043+
// (code_point - 0xd800) < 1024.
8044+
//
8045+
// Check that code_point_2 is in the range 0xdc00..0xdfff
8046+
// and that code_point_2 was parsed from valid hex.
8047+
uint32_t low_bit = code_point_2 - 0xdc00;
8048+
if (low_bit >> 10) {
80298049
return false;
80308050
}
80318051

80328052
code_point =
8033-
(((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
8053+
(((code_point - 0xd800) << 10) | low_bit) + 0x10000;
80348054
*src_ptr += 6;
80358055
} else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
80368056
// If we encounter a low surrogate (not preceded by a high surrogate)
@@ -9266,6 +9286,14 @@ using namespace simd;
92669286
this->error |= this->prev_incomplete;
92679287
}
92689288

9289+
#ifndef SIMDJSON_IF_CONSTEXPR
9290+
#if SIMDJSON_CPLUSPLUS17
9291+
#define SIMDJSON_IF_CONSTEXPR if constexpr
9292+
#else
9293+
#define SIMDJSON_IF_CONSTEXPR if
9294+
#endif
9295+
#endif
9296+
92699297
simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
92709298
if(simdjson_likely(is_ascii(input))) {
92719299
this->error |= this->prev_incomplete;
@@ -9275,12 +9303,12 @@ using namespace simd;
92759303
||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
92769304
|| (simd8x64<uint8_t>::NUM_CHUNKS == 4),
92779305
"We support one, two or four chunks per 64-byte block.");
9278-
if(simd8x64<uint8_t>::NUM_CHUNKS == 1) {
9306+
SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
92799307
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
9280-
} if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
9308+
} else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
92819309
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
92829310
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
9283-
} else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
9311+
} else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
92849312
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
92859313
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
92869314
this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
@@ -9658,7 +9686,7 @@ struct json_block {
96589686
*/
96599687
class json_scanner {
96609688
public:
9661-
json_scanner() {}
9689+
json_scanner() = default;
96629690
simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
96639691
// Returns either UNCLOSED_STRING or SUCCESS
96649692
simdjson_inline error_code finish();
@@ -10354,17 +10382,18 @@ simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
1035410382
}
1035510383
uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
1035610384

10357-
// if the first code point is invalid we will get here, as we will go past
10358-
// the check for being outside the Basic Multilingual plane. If we don't
10359-
// find a \u immediately afterwards we fail out anyhow, but if we do,
10360-
// this check catches both the case of the first code point being invalid
10361-
// or the second code point being invalid.
10362-
if ((code_point | code_point_2) >> 16) {
10385+
// We have already checked that the high surrogate is valid and
10386+
// (code_point - 0xd800) < 1024.
10387+
//
10388+
// Check that code_point_2 is in the range 0xdc00..0xdfff
10389+
// and that code_point_2 was parsed from valid hex.
10390+
uint32_t low_bit = code_point_2 - 0xdc00;
10391+
if (low_bit >> 10) {
1036310392
return false;
1036410393
}
1036510394

1036610395
code_point =
10367-
(((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
10396+
(((code_point - 0xd800) << 10) | low_bit) + 0x10000;
1036810397
*src_ptr += 6;
1036910398
} else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
1037010399
// If we encounter a low surrogate (not preceded by a high surrogate)
@@ -11563,6 +11592,14 @@ using namespace simd;
1156311592
this->error |= this->prev_incomplete;
1156411593
}
1156511594

11595+
#ifndef SIMDJSON_IF_CONSTEXPR
11596+
#if SIMDJSON_CPLUSPLUS17
11597+
#define SIMDJSON_IF_CONSTEXPR if constexpr
11598+
#else
11599+
#define SIMDJSON_IF_CONSTEXPR if
11600+
#endif
11601+
#endif
11602+
1156611603
simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
1156711604
if(simdjson_likely(is_ascii(input))) {
1156811605
this->error |= this->prev_incomplete;
@@ -11572,12 +11609,12 @@ using namespace simd;
1157211609
||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
1157311610
|| (simd8x64<uint8_t>::NUM_CHUNKS == 4),
1157411611
"We support one, two or four chunks per 64-byte block.");
11575-
if(simd8x64<uint8_t>::NUM_CHUNKS == 1) {
11612+
SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
1157611613
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
11577-
} if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
11614+
} else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
1157811615
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
1157911616
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
11580-
} else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
11617+
} else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
1158111618
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
1158211619
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
1158311620
this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
@@ -11955,7 +11992,7 @@ struct json_block {
1195511992
*/
1195611993
class json_scanner {
1195711994
public:
11958-
json_scanner() {}
11995+
json_scanner() = default;
1195911996
simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
1196011997
// Returns either UNCLOSED_STRING or SUCCESS
1196111998
simdjson_inline error_code finish();
@@ -12651,17 +12688,18 @@ simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
1265112688
}
1265212689
uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
1265312690

12654-
// if the first code point is invalid we will get here, as we will go past
12655-
// the check for being outside the Basic Multilingual plane. If we don't
12656-
// find a \u immediately afterwards we fail out anyhow, but if we do,
12657-
// this check catches both the case of the first code point being invalid
12658-
// or the second code point being invalid.
12659-
if ((code_point | code_point_2) >> 16) {
12691+
// We have already checked that the high surrogate is valid and
12692+
// (code_point - 0xd800) < 1024.
12693+
//
12694+
// Check that code_point_2 is in the range 0xdc00..0xdfff
12695+
// and that code_point_2 was parsed from valid hex.
12696+
uint32_t low_bit = code_point_2 - 0xdc00;
12697+
if (low_bit >> 10) {
1266012698
return false;
1266112699
}
1266212700

1266312701
code_point =
12664-
(((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
12702+
(((code_point - 0xd800) << 10) | low_bit) + 0x10000;
1266512703
*src_ptr += 6;
1266612704
} else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
1266712705
// If we encounter a low surrogate (not preceded by a high surrogate)
@@ -13895,6 +13933,14 @@ using namespace simd;
1389513933
this->error |= this->prev_incomplete;
1389613934
}
1389713935

13936+
#ifndef SIMDJSON_IF_CONSTEXPR
13937+
#if SIMDJSON_CPLUSPLUS17
13938+
#define SIMDJSON_IF_CONSTEXPR if constexpr
13939+
#else
13940+
#define SIMDJSON_IF_CONSTEXPR if
13941+
#endif
13942+
#endif
13943+
1389813944
simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
1389913945
if(simdjson_likely(is_ascii(input))) {
1390013946
this->error |= this->prev_incomplete;
@@ -13904,12 +13950,12 @@ using namespace simd;
1390413950
||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
1390513951
|| (simd8x64<uint8_t>::NUM_CHUNKS == 4),
1390613952
"We support one, two or four chunks per 64-byte block.");
13907-
if(simd8x64<uint8_t>::NUM_CHUNKS == 1) {
13953+
SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
1390813954
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
13909-
} if(simd8x64<uint8_t>::NUM_CHUNKS == 2) {
13955+
} else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
1391013956
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
1391113957
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
13912-
} else if(simd8x64<uint8_t>::NUM_CHUNKS == 4) {
13958+
} else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
1391313959
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
1391413960
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
1391513961
this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
@@ -14287,7 +14333,7 @@ struct json_block {
1428714333
*/
1428814334
class json_scanner {
1428914335
public:
14290-
json_scanner() {}
14336+
json_scanner() = default;
1429114337
simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
1429214338
// Returns either UNCLOSED_STRING or SUCCESS
1429314339
simdjson_inline error_code finish();
@@ -14983,17 +15029,18 @@ simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
1498315029
}
1498415030
uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
1498515031

14986-
// if the first code point is invalid we will get here, as we will go past
14987-
// the check for being outside the Basic Multilingual plane. If we don't
14988-
// find a \u immediately afterwards we fail out anyhow, but if we do,
14989-
// this check catches both the case of the first code point being invalid
14990-
// or the second code point being invalid.
14991-
if ((code_point | code_point_2) >> 16) {
15032+
// We have already checked that the high surrogate is valid and
15033+
// (code_point - 0xd800) < 1024.
15034+
//
15035+
// Check that code_point_2 is in the range 0xdc00..0xdfff
15036+
// and that code_point_2 was parsed from valid hex.
15037+
uint32_t low_bit = code_point_2 - 0xdc00;
15038+
if (low_bit >> 10) {
1499215039
return false;
1499315040
}
1499415041

1499515042
code_point =
14996-
(((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
15043+
(((code_point - 0xd800) << 10) | low_bit) + 0x10000;
1499715044
*src_ptr += 6;
1499815045
} else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
1499915046
// If we encounter a low surrogate (not preceded by a high surrogate)

0 commit comments

Comments
 (0)