1- /* auto-generated on 2022-07-28 21:45:54 -0400. Do not edit! */
1+ /* auto-generated on 2022-09-30 12:13:16 -0400. Do not edit! */
22/* begin file src/simdjson.cpp */
33#include "simdjson.h"
44
@@ -1589,7 +1589,8 @@ namespace internal {
15891589 { INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." },
15901590 { INCOMPLETE_ARRAY_OR_OBJECT, "JSON document ended early in the middle of an object or array." },
15911591 { SCALAR_DOCUMENT_AS_VALUE, "A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "},
1592- { OUT_OF_BOUNDS, "Attempted to access location outside of document."}
1592+ { OUT_OF_BOUNDS, "Attempted to access location outside of document."},
1593+ { TRAILING_CONTENT, "Unexpected trailing content in the JSON input."}
15931594 }; // error_messages[]
15941595
15951596} // namespace internal
@@ -3105,6 +3106,14 @@ using namespace simd;
31053106 this->error |= this->prev_incomplete;
31063107 }
31073108
3109+ #ifndef SIMDJSON_IF_CONSTEXPR
3110+ #if SIMDJSON_CPLUSPLUS17
3111+ #define SIMDJSON_IF_CONSTEXPR if constexpr
3112+ #else
3113+ #define SIMDJSON_IF_CONSTEXPR if
3114+ #endif
3115+ #endif
3116+
31083117 simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
31093118 if(simdjson_likely(is_ascii(input))) {
31103119 this->error |= this->prev_incomplete;
@@ -3114,12 +3123,12 @@ using namespace simd;
31143123 ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
31153124 || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
31163125 "We support one, two or four chunks per 64-byte block.");
3117- if (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
3126+ SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
31183127 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
3119- } if (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
3128+ } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
31203129 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
31213130 this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
3122- } else if (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
3131+ } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
31233132 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
31243133 this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
31253134 this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
@@ -3497,7 +3506,7 @@ struct json_block {
34973506 */
34983507class json_scanner {
34993508public:
3500- json_scanner() {}
3509+ json_scanner() = default;
35013510 simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
35023511 // Returns either UNCLOSED_STRING or SUCCESS
35033512 simdjson_inline error_code finish();
@@ -4194,17 +4203,18 @@ simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
41944203 }
41954204 uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
41964205
4197- // if the first code point is invalid we will get here, as we will go past
4198- // the check for being outside the Basic Multilingual plane. If we don't
4199- // find a \u immediately afterwards we fail out anyhow, but if we do,
4200- // this check catches both the case of the first code point being invalid
4201- // or the second code point being invalid.
4202- if ((code_point | code_point_2) >> 16) {
4206+ // We have already checked that the high surrogate is valid and
4207+ // (code_point - 0xd800) < 1024.
4208+ //
4209+ // Check that code_point_2 is in the range 0xdc00..0xdfff
4210+ // and that code_point_2 was parsed from valid hex.
4211+ uint32_t low_bit = code_point_2 - 0xdc00;
4212+ if (low_bit >> 10) {
42034213 return false;
42044214 }
42054215
42064216 code_point =
4207- (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00) ) + 0x10000;
4217+ (((code_point - 0xd800) << 10) | low_bit ) + 0x10000;
42084218 *src_ptr += 6;
42094219 } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
42104220 // If we encounter a low surrogate (not preceded by a high surrogate)
@@ -5668,17 +5678,18 @@ simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
56685678 }
56695679 uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
56705680
5671- // if the first code point is invalid we will get here, as we will go past
5672- // the check for being outside the Basic Multilingual plane. If we don't
5673- // find a \u immediately afterwards we fail out anyhow, but if we do,
5674- // this check catches both the case of the first code point being invalid
5675- // or the second code point being invalid.
5676- if ((code_point | code_point_2) >> 16) {
5681+ // We have already checked that the high surrogate is valid and
5682+ // (code_point - 0xd800) < 1024.
5683+ //
5684+ // Check that code_point_2 is in the range 0xdc00..0xdfff
5685+ // and that code_point_2 was parsed from valid hex.
5686+ uint32_t low_bit = code_point_2 - 0xdc00;
5687+ if (low_bit >> 10) {
56775688 return false;
56785689 }
56795690
56805691 code_point =
5681- (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00) ) + 0x10000;
5692+ (((code_point - 0xd800) << 10) | low_bit ) + 0x10000;
56825693 *src_ptr += 6;
56835694 } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
56845695 // If we encounter a low surrogate (not preceded by a high surrogate)
@@ -6883,6 +6894,14 @@ using namespace simd;
68836894 this->error |= this->prev_incomplete;
68846895 }
68856896
6897+ #ifndef SIMDJSON_IF_CONSTEXPR
6898+ #if SIMDJSON_CPLUSPLUS17
6899+ #define SIMDJSON_IF_CONSTEXPR if constexpr
6900+ #else
6901+ #define SIMDJSON_IF_CONSTEXPR if
6902+ #endif
6903+ #endif
6904+
68866905 simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
68876906 if(simdjson_likely(is_ascii(input))) {
68886907 this->error |= this->prev_incomplete;
@@ -6892,12 +6911,12 @@ using namespace simd;
68926911 ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
68936912 || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
68946913 "We support one, two or four chunks per 64-byte block.");
6895- if (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
6914+ SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
68966915 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
6897- } if (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
6916+ } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
68986917 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
68996918 this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
6900- } else if (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
6919+ } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
69016920 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
69026921 this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
69036922 this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
@@ -7277,7 +7296,7 @@ struct json_block {
72777296 */
72787297class json_scanner {
72797298public:
7280- json_scanner() {}
7299+ json_scanner() = default;
72817300 simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
72827301 // Returns either UNCLOSED_STRING or SUCCESS
72837302 simdjson_inline error_code finish();
@@ -8020,17 +8039,18 @@ simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
80208039 }
80218040 uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
80228041
8023- // if the first code point is invalid we will get here, as we will go past
8024- // the check for being outside the Basic Multilingual plane. If we don't
8025- // find a \u immediately afterwards we fail out anyhow, but if we do,
8026- // this check catches both the case of the first code point being invalid
8027- // or the second code point being invalid.
8028- if ((code_point | code_point_2) >> 16) {
8042+ // We have already checked that the high surrogate is valid and
8043+ // (code_point - 0xd800) < 1024.
8044+ //
8045+ // Check that code_point_2 is in the range 0xdc00..0xdfff
8046+ // and that code_point_2 was parsed from valid hex.
8047+ uint32_t low_bit = code_point_2 - 0xdc00;
8048+ if (low_bit >> 10) {
80298049 return false;
80308050 }
80318051
80328052 code_point =
8033- (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00) ) + 0x10000;
8053+ (((code_point - 0xd800) << 10) | low_bit ) + 0x10000;
80348054 *src_ptr += 6;
80358055 } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
80368056 // If we encounter a low surrogate (not preceded by a high surrogate)
@@ -9266,6 +9286,14 @@ using namespace simd;
92669286 this->error |= this->prev_incomplete;
92679287 }
92689288
9289+ #ifndef SIMDJSON_IF_CONSTEXPR
9290+ #if SIMDJSON_CPLUSPLUS17
9291+ #define SIMDJSON_IF_CONSTEXPR if constexpr
9292+ #else
9293+ #define SIMDJSON_IF_CONSTEXPR if
9294+ #endif
9295+ #endif
9296+
92699297 simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
92709298 if(simdjson_likely(is_ascii(input))) {
92719299 this->error |= this->prev_incomplete;
@@ -9275,12 +9303,12 @@ using namespace simd;
92759303 ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
92769304 || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
92779305 "We support one, two or four chunks per 64-byte block.");
9278- if (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
9306+ SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
92799307 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
9280- } if (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
9308+ } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
92819309 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
92829310 this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
9283- } else if (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
9311+ } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
92849312 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
92859313 this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
92869314 this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
@@ -9658,7 +9686,7 @@ struct json_block {
96589686 */
96599687class json_scanner {
96609688public:
9661- json_scanner() {}
9689+ json_scanner() = default;
96629690 simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
96639691 // Returns either UNCLOSED_STRING or SUCCESS
96649692 simdjson_inline error_code finish();
@@ -10354,17 +10382,18 @@ simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
1035410382 }
1035510383 uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
1035610384
10357- // if the first code point is invalid we will get here, as we will go past
10358- // the check for being outside the Basic Multilingual plane. If we don't
10359- // find a \u immediately afterwards we fail out anyhow, but if we do,
10360- // this check catches both the case of the first code point being invalid
10361- // or the second code point being invalid.
10362- if ((code_point | code_point_2) >> 16) {
10385+ // We have already checked that the high surrogate is valid and
10386+ // (code_point - 0xd800) < 1024.
10387+ //
10388+ // Check that code_point_2 is in the range 0xdc00..0xdfff
10389+ // and that code_point_2 was parsed from valid hex.
10390+ uint32_t low_bit = code_point_2 - 0xdc00;
10391+ if (low_bit >> 10) {
1036310392 return false;
1036410393 }
1036510394
1036610395 code_point =
10367- (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00) ) + 0x10000;
10396+ (((code_point - 0xd800) << 10) | low_bit ) + 0x10000;
1036810397 *src_ptr += 6;
1036910398 } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
1037010399 // If we encounter a low surrogate (not preceded by a high surrogate)
@@ -11563,6 +11592,14 @@ using namespace simd;
1156311592 this->error |= this->prev_incomplete;
1156411593 }
1156511594
11595+ #ifndef SIMDJSON_IF_CONSTEXPR
11596+ #if SIMDJSON_CPLUSPLUS17
11597+ #define SIMDJSON_IF_CONSTEXPR if constexpr
11598+ #else
11599+ #define SIMDJSON_IF_CONSTEXPR if
11600+ #endif
11601+ #endif
11602+
1156611603 simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
1156711604 if(simdjson_likely(is_ascii(input))) {
1156811605 this->error |= this->prev_incomplete;
@@ -11572,12 +11609,12 @@ using namespace simd;
1157211609 ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
1157311610 || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
1157411611 "We support one, two or four chunks per 64-byte block.");
11575- if (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
11612+ SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
1157611613 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
11577- } if (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
11614+ } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
1157811615 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
1157911616 this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
11580- } else if (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
11617+ } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
1158111618 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
1158211619 this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
1158311620 this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
@@ -11955,7 +11992,7 @@ struct json_block {
1195511992 */
1195611993class json_scanner {
1195711994public:
11958- json_scanner() {}
11995+ json_scanner() = default;
1195911996 simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
1196011997 // Returns either UNCLOSED_STRING or SUCCESS
1196111998 simdjson_inline error_code finish();
@@ -12651,17 +12688,18 @@ simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
1265112688 }
1265212689 uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
1265312690
12654- // if the first code point is invalid we will get here, as we will go past
12655- // the check for being outside the Basic Multilingual plane. If we don't
12656- // find a \u immediately afterwards we fail out anyhow, but if we do,
12657- // this check catches both the case of the first code point being invalid
12658- // or the second code point being invalid.
12659- if ((code_point | code_point_2) >> 16) {
12691+ // We have already checked that the high surrogate is valid and
12692+ // (code_point - 0xd800) < 1024.
12693+ //
12694+ // Check that code_point_2 is in the range 0xdc00..0xdfff
12695+ // and that code_point_2 was parsed from valid hex.
12696+ uint32_t low_bit = code_point_2 - 0xdc00;
12697+ if (low_bit >> 10) {
1266012698 return false;
1266112699 }
1266212700
1266312701 code_point =
12664- (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00) ) + 0x10000;
12702+ (((code_point - 0xd800) << 10) | low_bit ) + 0x10000;
1266512703 *src_ptr += 6;
1266612704 } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
1266712705 // If we encounter a low surrogate (not preceded by a high surrogate)
@@ -13895,6 +13933,14 @@ using namespace simd;
1389513933 this->error |= this->prev_incomplete;
1389613934 }
1389713935
13936+ #ifndef SIMDJSON_IF_CONSTEXPR
13937+ #if SIMDJSON_CPLUSPLUS17
13938+ #define SIMDJSON_IF_CONSTEXPR if constexpr
13939+ #else
13940+ #define SIMDJSON_IF_CONSTEXPR if
13941+ #endif
13942+ #endif
13943+
1389813944 simdjson_inline void check_next_input(const simd8x64<uint8_t>& input) {
1389913945 if(simdjson_likely(is_ascii(input))) {
1390013946 this->error |= this->prev_incomplete;
@@ -13904,12 +13950,12 @@ using namespace simd;
1390413950 ||(simd8x64<uint8_t>::NUM_CHUNKS == 2)
1390513951 || (simd8x64<uint8_t>::NUM_CHUNKS == 4),
1390613952 "We support one, two or four chunks per 64-byte block.");
13907- if (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
13953+ SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 1) {
1390813954 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
13909- } if (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
13955+ } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
1391013956 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
1391113957 this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
13912- } else if (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
13958+ } else SIMDJSON_IF_CONSTEXPR (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
1391313959 this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
1391413960 this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
1391513961 this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
@@ -14287,7 +14333,7 @@ struct json_block {
1428714333 */
1428814334class json_scanner {
1428914335public:
14290- json_scanner() {}
14336+ json_scanner() = default;
1429114337 simdjson_inline json_block next(const simd::simd8x64<uint8_t>& in);
1429214338 // Returns either UNCLOSED_STRING or SUCCESS
1429314339 simdjson_inline error_code finish();
@@ -14983,17 +15029,18 @@ simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
1498315029 }
1498415030 uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2);
1498515031
14986- // if the first code point is invalid we will get here, as we will go past
14987- // the check for being outside the Basic Multilingual plane. If we don't
14988- // find a \u immediately afterwards we fail out anyhow, but if we do,
14989- // this check catches both the case of the first code point being invalid
14990- // or the second code point being invalid.
14991- if ((code_point | code_point_2) >> 16) {
15032+ // We have already checked that the high surrogate is valid and
15033+ // (code_point - 0xd800) < 1024.
15034+ //
15035+ // Check that code_point_2 is in the range 0xdc00..0xdfff
15036+ // and that code_point_2 was parsed from valid hex.
15037+ uint32_t low_bit = code_point_2 - 0xdc00;
15038+ if (low_bit >> 10) {
1499215039 return false;
1499315040 }
1499415041
1499515042 code_point =
14996- (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00) ) + 0x10000;
15043+ (((code_point - 0xd800) << 10) | low_bit ) + 0x10000;
1499715044 *src_ptr += 6;
1499815045 } else if (code_point >= 0xdc00 && code_point <= 0xdfff) {
1499915046 // If we encounter a low surrogate (not preceded by a high surrogate)
0 commit comments