diff --git a/deps/ata/LICENSE b/deps/ata/LICENSE new file mode 100644 index 00000000000000..30700e7baaa251 --- /dev/null +++ b/deps/ata/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Mert Can Altin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/deps/ata/ata.cpp b/deps/ata/ata.cpp new file mode 100644 index 00000000000000..7a54db0ee517a7 --- /dev/null +++ b/deps/ata/ata.cpp @@ -0,0 +1,2406 @@ +#include "ata.h" + +// mimalloc: faster new/delete for small allocations. +#if __has_include() +#include +#endif + +#include +#include +#include +#ifndef ATA_NO_RE2 +#include +#endif +#include +#include + +#ifdef _WIN32 +#include +#include +#else +#include +#endif + +// MSVC implementation by Pavel P (https://gist.github.com/pps83/3210a2f980fd02bb2ba2e5a1fc4a2ef0) +#if defined(_MSC_VER) && !defined(__clang__) +#include +#ifndef __builtin_popcount +#define __builtin_popcount __popcnt +#endif +#endif // defined(_MSC_VER) && !defined(__clang__) + +#include "simdjson.h" + +// --- Fast format validators (no std::regex) --- + +static bool is_digit(char c) { return c >= '0' && c <= '9'; } +static bool is_alpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} +static bool is_alnum(char c) { return is_alpha(c) || is_digit(c); } +static bool is_hex(char c) { + return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); +} + +static bool fast_check_email(std::string_view s) { + auto at = s.find('@'); + if (at == std::string_view::npos || at == 0 || at == s.size() - 1) + return false; + auto dot = s.find('.', at + 1); + if (dot == std::string_view::npos || dot == at + 1 || + dot == s.size() - 1) + return false; + // Check TLD has at least 2 chars + return (s.size() - dot - 1) >= 2; +} + +static bool fast_check_date(std::string_view s) { + // YYYY-MM-DD + return s.size() == 10 && is_digit(s[0]) && is_digit(s[1]) && + is_digit(s[2]) && is_digit(s[3]) && s[4] == '-' && + is_digit(s[5]) && is_digit(s[6]) && s[7] == '-' && + is_digit(s[8]) && is_digit(s[9]); +} + +static bool fast_check_time(std::string_view s) { + // HH:MM:SS[.frac][Z|+HH:MM] + if (s.size() < 8) return false; + if (!is_digit(s[0]) || !is_digit(s[1]) || s[2] != ':' || + !is_digit(s[3]) || !is_digit(s[4]) || s[5] != ':' || + !is_digit(s[6]) || !is_digit(s[7])) + return false; + return true; +} + +static bool fast_check_datetime(std::string_view s) { + if (s.size() < 19) return false; + if (!fast_check_date(s.substr(0, 10))) return false; + if (s[10] != 'T' && s[10] != 't' && s[10] != ' ') return false; + return fast_check_time(s.substr(11)); +} + +static bool fast_check_ipv4(std::string_view s) { + int parts = 0, val = 0, digits = 0; + for (size_t i = 0; i <= s.size(); ++i) { + if (i == s.size() || s[i] == '.') { + if (digits == 0 || val > 255) return false; + ++parts; + val = 0; + digits = 0; + } else if (is_digit(s[i])) { + val = val * 10 + (s[i] - '0'); + ++digits; + if (digits > 3) return false; + } else { + return false; + } + } + return parts == 4; +} + +static bool fast_check_uri(std::string_view s) { + if (s.size() < 3) return false; + // Must start with alpha, then scheme chars, then ':' + if (!is_alpha(s[0])) return false; + size_t i = 1; + while (i < s.size() && (is_alnum(s[i]) || s[i] == '+' || s[i] == '-' || + s[i] == '.')) + ++i; + return i < s.size() && s[i] == ':' && i + 1 < s.size(); +} + +static bool fast_check_uuid(std::string_view s) { + // 8-4-4-4-12 + if (s.size() != 36) return false; + for (size_t i = 0; i < 36; ++i) { + if (i == 8 || i == 13 || i == 18 || i == 23) { + if (s[i] != '-') return false; + } else { + if (!is_hex(s[i])) return false; + } + } + return true; +} + +static bool fast_check_hostname(std::string_view s) { + if (s.empty() || s.size() > 253) return false; + size_t label_len = 0; + for (size_t i = 0; i < s.size(); ++i) { + if (s[i] == '.') { + if (label_len == 0) return false; + label_len = 0; + } else if (is_alnum(s[i]) || s[i] == '-') { + ++label_len; + if (label_len > 63) return false; + } else { + return false; + } + } + return label_len > 0; +} + +// Check format by pre-resolved numeric ID — no string comparisons. +static bool check_format_by_id(std::string_view sv, uint8_t fid) { + switch (fid) { + case 0: return fast_check_email(sv); + case 1: return fast_check_date(sv); + case 2: return fast_check_datetime(sv); + case 3: return fast_check_time(sv); + case 4: return fast_check_ipv4(sv); + case 5: return sv.find(':') != std::string_view::npos; + case 6: return fast_check_uri(sv); + case 7: return fast_check_uuid(sv); + case 8: return fast_check_hostname(sv); + default: return true; // unknown formats pass + } +} + +namespace ata { + +using namespace simdjson; + +// Canonical JSON: sort object keys for semantic equality comparison +static std::string canonical_json(dom::element el) { + switch (el.type()) { + case dom::element_type::OBJECT: { + dom::object obj; el.get(obj); + std::vector> entries; + for (auto [k, v] : obj) entries.push_back({k, v}); + std::sort(entries.begin(), entries.end(), + [](const auto& a, const auto& b) { return a.first < b.first; }); + std::string r = "{"; + for (size_t i = 0; i < entries.size(); ++i) { + if (i) r += ','; + r += '"'; + r += entries[i].first; + r += "\":"; + r += canonical_json(entries[i].second); + } + r += '}'; + return r; + } + case dom::element_type::ARRAY: { + dom::array arr; el.get(arr); + std::string r = "["; + bool first = true; + for (auto v : arr) { + if (!first) r += ','; + first = false; + r += canonical_json(v); + } + r += ']'; + return r; + } + default: + return std::string(minify(el)); + } +} + +// JSON Schema type enum — avoids string comparisons on the hot path. +enum class json_type : uint8_t { + string, number, integer, boolean, null_value, object, array +}; + +static json_type json_type_from_sv(std::string_view s) { + if (s == "string") return json_type::string; + if (s == "number") return json_type::number; + if (s == "integer") return json_type::integer; + if (s == "boolean") return json_type::boolean; + if (s == "null") return json_type::null_value; + if (s == "object") return json_type::object; + if (s == "array") return json_type::array; + return json_type::string; // fallback +} + +static const char* json_type_name(json_type t) { + switch (t) { + case json_type::string: return "string"; + case json_type::number: return "number"; + case json_type::integer: return "integer"; + case json_type::boolean: return "boolean"; + case json_type::null_value: return "null"; + case json_type::object: return "object"; + case json_type::array: return "array"; + } + return "unknown"; +} + +// Bitmask for O(1) type checking: one bit per json_type value. +static uint8_t json_type_bit(json_type t) { return 1u << static_cast(t); } + +// Map dom::element_type to a json_type bitmask (number matches integer too). +static uint8_t element_type_mask(dom::element_type t) { + switch (t) { + case dom::element_type::STRING: return json_type_bit(json_type::string); + case dom::element_type::INT64: + case dom::element_type::UINT64: return json_type_bit(json_type::integer) | json_type_bit(json_type::number); + case dom::element_type::DOUBLE: return json_type_bit(json_type::number); + case dom::element_type::BOOL: return json_type_bit(json_type::boolean); + case dom::element_type::NULL_VALUE: return json_type_bit(json_type::null_value); + case dom::element_type::ARRAY: return json_type_bit(json_type::array); + case dom::element_type::OBJECT: return json_type_bit(json_type::object); + } + return 0; +} + +// Resolve format string to numeric ID at compile time. +static uint8_t format_id_from_string(const std::string& f) { + if (f == "email") return 0; + if (f == "date") return 1; + if (f == "date-time") return 2; + if (f == "time") return 3; + if (f == "ipv4") return 4; + if (f == "ipv6") return 5; + if (f == "uri" || f == "uri-reference") return 6; + if (f == "uuid") return 7; + if (f == "hostname") return 8; + return 255; +} + +// Forward declarations +struct schema_node; +using schema_node_ptr = std::shared_ptr; + +struct schema_node { + // type constraint — bitmask for O(1) type checking + uint8_t type_mask = 0; // bit per json_type value + + // numeric + std::optional minimum; + std::optional maximum; + std::optional exclusive_minimum; + std::optional exclusive_maximum; + std::optional multiple_of; + + // string + std::optional min_length; + std::optional max_length; + std::optional pattern; +#ifndef ATA_NO_RE2 + std::shared_ptr compiled_pattern; // cached compiled regex (RE2) +#endif + + // array + std::optional min_items; + std::optional max_items; + bool unique_items = false; + schema_node_ptr items_schema; + std::vector prefix_items; + schema_node_ptr contains_schema; + std::optional min_contains; + std::optional max_contains; + + // object + std::unordered_map properties; + std::vector required; + std::optional additional_properties_bool; + schema_node_ptr additional_properties_schema; + std::optional min_properties; + std::optional max_properties; + schema_node_ptr property_names_schema; + std::unordered_map> dependent_required; + std::unordered_map dependent_schemas; + + // patternProperties — each entry: (pattern_string, schema, compiled_regex) + struct pattern_prop { + std::string pattern; + schema_node_ptr schema; +#ifndef ATA_NO_RE2 + std::shared_ptr compiled; +#endif + }; + std::vector pattern_properties; + + // enum / const + std::vector enum_values_minified; // pre-minified enum values + std::optional const_value_raw; // raw JSON value string + + // format + std::optional format; + uint8_t format_id = 255; // pre-resolved format ID (255 = unknown/pass) + + // composition + std::vector all_of; + std::vector any_of; + std::vector one_of; + schema_node_ptr not_schema; + + // conditional + schema_node_ptr if_schema; + schema_node_ptr then_schema; + schema_node_ptr else_schema; + + // $ref + std::string ref; + + // $defs — stored on node for pointer navigation + std::unordered_map defs; + + // boolean schema + std::optional boolean_schema; +}; + +// --- Codegen: flat bytecode plan --- +namespace cg { +enum class op : uint8_t { + END=0, EXPECT_OBJECT, EXPECT_ARRAY, EXPECT_STRING, EXPECT_NUMBER, + EXPECT_INTEGER, EXPECT_BOOLEAN, EXPECT_NULL, EXPECT_TYPE_MULTI, + CHECK_MINIMUM, CHECK_MAXIMUM, CHECK_EX_MINIMUM, CHECK_EX_MAXIMUM, + CHECK_MULTIPLE_OF, CHECK_MIN_LENGTH, CHECK_MAX_LENGTH, CHECK_PATTERN, + CHECK_FORMAT, CHECK_MIN_ITEMS, CHECK_MAX_ITEMS, CHECK_UNIQUE_ITEMS, + ARRAY_ITEMS, CHECK_REQUIRED, CHECK_MIN_PROPS, CHECK_MAX_PROPS, + OBJ_PROPS_START, OBJ_PROP, OBJ_PROPS_END, CHECK_NO_ADDITIONAL, + CHECK_ENUM_STR, CHECK_ENUM, CHECK_CONST, COMPOSITION, +}; +struct ins { op o; uint32_t a=0, b=0; }; +struct plan { + std::vector code; + std::vector doubles; + std::vector strings; +#ifndef ATA_NO_RE2 + std::vector> regexes; +#endif + std::vector> enum_sets; + std::vector type_masks; + std::vector format_ids; + std::vector> subs; +}; +} // namespace cg + +// --- On-Demand validation plan --- +// Grouped checks per value type. Each value consumed exactly once. +// Built from schema_node at compile time, used by od_exec_plan at runtime. +struct od_plan { + uint8_t type_mask = 0; + + // Numeric — bitmask for which checks to run + flat array of bounds + enum num_flag : uint8_t { + HAS_MIN = 1, HAS_MAX = 2, HAS_EX_MIN = 4, HAS_EX_MAX = 8, HAS_MUL = 16 + }; + uint8_t num_flags = 0; + double num_min = 0, num_max = 0, num_ex_min = 0, num_ex_max = 0, num_mul = 0; + + // String — single value.get(sv) then all checks + std::optional min_length, max_length; +#ifndef ATA_NO_RE2 + re2::RE2* pattern = nullptr; // borrowed pointer from schema_node +#endif + uint8_t format_id = 255; // 255 = no format check + + // Object — single iterate with merged required+property lookup + struct prop_entry { + std::string key; + int required_idx = -1; // bit index for required tracking, or -1 + std::shared_ptr sub; // property sub-plan, or nullptr + }; + struct obj_plan { + std::vector entries; // merged required + properties — single scan + size_t required_count = 0; + bool no_additional = false; + std::optional min_props, max_props; + }; + std::shared_ptr object; + + // Array — single iterate: items + count + struct arr_plan { + std::shared_ptr items; + std::optional min_items, max_items; + }; + std::shared_ptr array; + + // If false, schema uses unsupported features — must fall back to DOM path. + bool supported = true; +}; + +using od_plan_ptr = std::shared_ptr; + +struct compiled_schema { + schema_node_ptr root; + std::unordered_map defs; + std::string raw_schema; + std::string compile_error; // non-empty if compilation failed + dom::parser parser; // used only at compile time + cg::plan gen_plan; // codegen validation plan + bool use_ondemand = false; // true if codegen plan supports On Demand + od_plan_ptr od; // On-Demand execution plan +}; + +// Thread-local persistent parsers — reused across all validate calls on the +// same thread. Keeps internal buffers hot in cache and avoids re-allocation. +static dom::parser& tl_dom_parser() { + thread_local dom::parser p; + return p; +} +static dom::parser& tl_dom_key_parser() { + thread_local dom::parser p; + return p; +} +static simdjson::ondemand::parser& tl_od_parser() { + thread_local simdjson::ondemand::parser p; + return p; +}; + +// --- Schema compilation --- + +static schema_node_ptr compile_node(dom::element el, + compiled_schema& ctx); + +static schema_node_ptr compile_node(dom::element el, + compiled_schema& ctx) { + auto node = std::make_shared(); + + // Boolean schema + if (el.is()) { + bool bval; + el.get(bval); + node->boolean_schema = bval; + return node; + } + + if (!el.is()) { + return node; + } + + dom::object obj; + el.get(obj); + + // $ref + dom::element ref_el; + if (obj["$ref"].get(ref_el) == SUCCESS) { + std::string_view ref_sv; + if (ref_el.get(ref_sv) == SUCCESS) { + node->ref = std::string(ref_sv); + } + } + + // type + dom::element type_el; + if (obj["type"].get(type_el) == SUCCESS) { + if (type_el.is()) { + std::string_view sv; + type_el.get(sv); + node->type_mask |= json_type_bit(json_type_from_sv(sv)); + } else if (type_el.is()) { + dom::array type_arr; type_el.get(type_arr); for (auto t : type_arr) { + std::string_view sv; + if (t.get(sv) == SUCCESS) { + node->type_mask |= json_type_bit(json_type_from_sv(sv)); + } + } + } + } + + // numeric constraints + dom::element num_el; + if (obj["minimum"].get(num_el) == SUCCESS) { + double v; + if (num_el.get(v) == SUCCESS) node->minimum = v; + } + if (obj["maximum"].get(num_el) == SUCCESS) { + double v; + if (num_el.get(v) == SUCCESS) node->maximum = v; + } + if (obj["exclusiveMinimum"].get(num_el) == SUCCESS) { + double v; + if (num_el.get(v) == SUCCESS) node->exclusive_minimum = v; + } + if (obj["exclusiveMaximum"].get(num_el) == SUCCESS) { + double v; + if (num_el.get(v) == SUCCESS) node->exclusive_maximum = v; + } + if (obj["multipleOf"].get(num_el) == SUCCESS) { + double v; + if (num_el.get(v) == SUCCESS) node->multiple_of = v; + } + + // string constraints + dom::element str_el; + if (obj["minLength"].get(str_el) == SUCCESS) { + uint64_t v; + if (str_el.get(v) == SUCCESS) node->min_length = v; + } + if (obj["maxLength"].get(str_el) == SUCCESS) { + uint64_t v; + if (str_el.get(v) == SUCCESS) node->max_length = v; + } + if (obj["pattern"].get(str_el) == SUCCESS) { + std::string_view sv; + if (str_el.get(sv) == SUCCESS) { + node->pattern = std::string(sv); +#ifdef ATA_NO_RE2 + ctx.compile_error = "pattern keyword requires RE2 support (built with ATA_NO_RE2)"; + return node; +#else + auto re = std::make_shared(node->pattern.value()); + if (re->ok()) { + node->compiled_pattern = std::move(re); + } +#endif + } + } + + // array constraints + if (obj["minItems"].get(str_el) == SUCCESS) { + uint64_t v; + if (str_el.get(v) == SUCCESS) node->min_items = v; + } + if (obj["maxItems"].get(str_el) == SUCCESS) { + uint64_t v; + if (str_el.get(v) == SUCCESS) node->max_items = v; + } + dom::element ui_el; + if (obj["uniqueItems"].get(ui_el) == SUCCESS) { + bool v; + if (ui_el.get(v) == SUCCESS) node->unique_items = v; + } + // prefixItems (Draft 2020-12) + dom::element pi_el; + if (obj["prefixItems"].get(pi_el) == SUCCESS && pi_el.is()) { + dom::array pi_arr; pi_el.get(pi_arr); for (auto item : pi_arr) { + node->prefix_items.push_back(compile_node(item, ctx)); + } + } + + dom::element items_el; + if (obj["items"].get(items_el) == SUCCESS) { + node->items_schema = compile_node(items_el, ctx); + } + + // contains + dom::element contains_el; + if (obj["contains"].get(contains_el) == SUCCESS) { + node->contains_schema = compile_node(contains_el, ctx); + } + dom::element mc_el; + if (obj["minContains"].get(mc_el) == SUCCESS) { + uint64_t v; + if (mc_el.get(v) == SUCCESS) node->min_contains = v; + } + if (obj["maxContains"].get(mc_el) == SUCCESS) { + uint64_t v; + if (mc_el.get(v) == SUCCESS) node->max_contains = v; + } + + // object constraints + dom::element props_el; + if (obj["properties"].get(props_el) == SUCCESS && props_el.is()) { + dom::object props_obj; props_el.get(props_obj); for (auto [key, val] : props_obj) { + node->properties[std::string(key)] = compile_node(val, ctx); + } + } + + dom::element req_el; + if (obj["required"].get(req_el) == SUCCESS && req_el.is()) { + dom::array req_arr; req_el.get(req_arr); for (auto r : req_arr) { + std::string_view sv; + if (r.get(sv) == SUCCESS) { + node->required.emplace_back(sv); + } + } + } + + dom::element ap_el; + if (obj["additionalProperties"].get(ap_el) == SUCCESS) { + if (ap_el.is()) { + bool ap_bool; ap_el.get(ap_bool); node->additional_properties_bool = ap_bool; + } else { + node->additional_properties_schema = compile_node(ap_el, ctx); + } + } + + if (obj["minProperties"].get(str_el) == SUCCESS) { + uint64_t v; + if (str_el.get(v) == SUCCESS) node->min_properties = v; + } + if (obj["maxProperties"].get(str_el) == SUCCESS) { + uint64_t v; + if (str_el.get(v) == SUCCESS) node->max_properties = v; + } + + // propertyNames + dom::element pn_el; + if (obj["propertyNames"].get(pn_el) == SUCCESS) { + node->property_names_schema = compile_node(pn_el, ctx); + } + + // dependentRequired + dom::element dr_el; + if (obj["dependentRequired"].get(dr_el) == SUCCESS && + dr_el.is()) { + dom::object dr_obj; dr_el.get(dr_obj); for (auto [key, val] : dr_obj) { + std::vector deps; + if (val.is()) { + dom::array val_arr; val.get(val_arr); for (auto d : val_arr) { + std::string_view sv; + if (d.get(sv) == SUCCESS) deps.emplace_back(sv); + } + } + node->dependent_required[std::string(key)] = std::move(deps); + } + } + + // dependentSchemas + dom::element ds_el; + if (obj["dependentSchemas"].get(ds_el) == SUCCESS && + ds_el.is()) { + dom::object ds_obj; ds_el.get(ds_obj); for (auto [key, val] : ds_obj) { + node->dependent_schemas[std::string(key)] = compile_node(val, ctx); + } + } + + // patternProperties — compile regex at schema compile time + dom::element pp_el; + if (obj["patternProperties"].get(pp_el) == SUCCESS && + pp_el.is()) { +#ifdef ATA_NO_RE2 + ctx.compile_error = "patternProperties keyword requires RE2 support (built with ATA_NO_RE2)"; + return node; +#else + dom::object pp_obj; pp_el.get(pp_obj); + for (auto [key, val] : pp_obj) { + schema_node::pattern_prop pp; + pp.pattern = std::string(key); + pp.schema = compile_node(val, ctx); + auto re = std::make_shared(pp.pattern); + if (re->ok()) { + pp.compiled = std::move(re); + } + node->pattern_properties.push_back(std::move(pp)); + } +#endif + } + + // format + dom::element fmt_el; + if (obj["format"].get(fmt_el) == SUCCESS) { + std::string_view sv; + if (fmt_el.get(sv) == SUCCESS) { + node->format = std::string(sv); + node->format_id = format_id_from_string(node->format.value()); + } + } + + // $id (register in defs for potential resolution) + dom::element id_el; + if (obj["$id"].get(id_el) == SUCCESS) { + std::string_view sv; + if (id_el.get(sv) == SUCCESS) { + ctx.defs[std::string(sv)] = node; + } + } + + // enum — pre-minify each value at compile time + dom::element enum_el; + if (obj["enum"].get(enum_el) == SUCCESS) { + if (enum_el.is()) { + dom::array enum_arr; enum_el.get(enum_arr); for (auto e : enum_arr) { + node->enum_values_minified.push_back(canonical_json(e)); + } + } + } + + // const + dom::element const_el; + if (obj["const"].get(const_el) == SUCCESS) { + node->const_value_raw = canonical_json(const_el); + } + + // composition + dom::element comp_el; + if (obj["allOf"].get(comp_el) == SUCCESS && comp_el.is()) { + dom::array comp_arr; comp_el.get(comp_arr); + for (auto s : comp_arr) { + node->all_of.push_back(compile_node(s, ctx)); + } + } + if (obj["anyOf"].get(comp_el) == SUCCESS && comp_el.is()) { + dom::array comp_arr2; comp_el.get(comp_arr2); + for (auto s : comp_arr2) { + node->any_of.push_back(compile_node(s, ctx)); + } + } + if (obj["oneOf"].get(comp_el) == SUCCESS && comp_el.is()) { + dom::array comp_arr3; comp_el.get(comp_arr3); + for (auto s : comp_arr3) { + node->one_of.push_back(compile_node(s, ctx)); + } + } + dom::element not_el; + if (obj["not"].get(not_el) == SUCCESS) { + node->not_schema = compile_node(not_el, ctx); + } + + // conditional + dom::element if_el; + if (obj["if"].get(if_el) == SUCCESS) { + node->if_schema = compile_node(if_el, ctx); + } + dom::element then_el; + if (obj["then"].get(then_el) == SUCCESS) { + node->then_schema = compile_node(then_el, ctx); + } + dom::element else_el; + if (obj["else"].get(else_el) == SUCCESS) { + node->else_schema = compile_node(else_el, ctx); + } + + // $defs / definitions + dom::element defs_el; + if (obj["$defs"].get(defs_el) == SUCCESS && defs_el.is()) { + dom::object defs_obj; defs_el.get(defs_obj); for (auto [key, val] : defs_obj) { + std::string def_path = "#/$defs/" + std::string(key); + auto compiled = compile_node(val, ctx); + ctx.defs[def_path] = compiled; + node->defs[std::string(key)] = compiled; + } + } + if (obj["definitions"].get(defs_el) == SUCCESS && + defs_el.is()) { + dom::object defs_obj; defs_el.get(defs_obj); for (auto [key, val] : defs_obj) { + std::string def_path = "#/definitions/" + std::string(key); + auto compiled = compile_node(val, ctx); + ctx.defs[def_path] = compiled; + node->defs[std::string(key)] = compiled; + } + } + + return node; +} + +// --- Validation --- + +static void validate_node(const schema_node_ptr& node, + dom::element value, + const std::string& path, + const compiled_schema& ctx, + std::vector& errors, + bool all_errors = true); + +// Fast boolean-only tree walker — no error collection, no string allocation. +// Uses [[likely]]/[[unlikely]] hints. Returns true if valid. +static bool validate_fast(const schema_node_ptr& node, + dom::element value, + const compiled_schema& ctx); + +// Macro for early termination +#define ATA_CHECK_EARLY() if (!all_errors && !errors.empty()) return + +using et = dom::element_type; + + +// Use string_view to avoid allocations in hot path +static std::string_view type_of_sv(dom::element el) { + switch (el.type()) { + case et::STRING: return "string"; + case et::INT64: + case et::UINT64: return "integer"; + case et::DOUBLE: return "number"; + case et::BOOL: return "boolean"; + case et::NULL_VALUE:return "null"; + case et::ARRAY: return "array"; + case et::OBJECT: return "object"; + } + return "unknown"; +} + + +// O(1) type check: test element's type bits against the schema's type_mask. +static bool type_matches_mask(dom::element el, uint8_t type_mask) { + return (element_type_mask(el.type()) & type_mask) != 0; +} + +static double to_double(dom::element el) { + switch (el.type()) { + case et::DOUBLE: { double v; el.get(v); return v; } + case et::INT64: { int64_t v; el.get(v); return static_cast(v); } + case et::UINT64: { uint64_t v; el.get(v); return static_cast(v); } + default: return 0; + } +} + +// Count UTF-8 codepoints — branchless: count non-continuation bytes +static uint64_t utf8_length(std::string_view s) { + uint64_t count = 0; + for (size_t i = 0; i < s.size(); ++i) { + // Continuation bytes are 10xxxxxx (0x80-0xBF) + // Non-continuation bytes start codepoints + count += ((static_cast(s[i]) & 0xC0) != 0x80); + } + return count; +} + +static void validate_node(const schema_node_ptr& node, + dom::element value, + const std::string& path, + const compiled_schema& ctx, + std::vector& errors, + bool all_errors) { + if (!node) return; + + // Boolean schema + if (node->boolean_schema.has_value()) { + if (!node->boolean_schema.value()) { + errors.push_back({error_code::type_mismatch, path, + "schema is false, no value is valid"}); + } + return; + } + + // $ref — Draft 2020-12: $ref is not a short-circuit, sibling keywords still apply + bool ref_resolved = false; + if (!node->ref.empty()) { + // First check defs map + auto it = ctx.defs.find(node->ref); + if (it != ctx.defs.end()) { + validate_node(it->second, value, path, ctx, errors, all_errors); + ref_resolved = true; + } + // Try JSON Pointer resolution from root (e.g., "#/properties/foo") + if (node->ref.size() > 1 && node->ref[0] == '#' && + node->ref[1] == '/') { + // Decode JSON Pointer segments + auto decode_pointer_segment = [](const std::string& seg) -> std::string { + // Percent-decode first + std::string pct; + for (size_t i = 0; i < seg.size(); ++i) { + if (seg[i] == '%' && i + 2 < seg.size()) { + char h = seg[i+1], l = seg[i+2]; + auto hex = [](char c) -> int { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return 10 + c - 'a'; + if (c >= 'A' && c <= 'F') return 10 + c - 'A'; + return -1; + }; + int hv = hex(h), lv = hex(l); + if (hv >= 0 && lv >= 0) { + pct += static_cast(hv * 16 + lv); + i += 2; + } else { + pct += seg[i]; + } + } else { + pct += seg[i]; + } + } + // Then JSON Pointer unescape: ~1 -> /, ~0 -> ~ + std::string out; + for (size_t i = 0; i < pct.size(); ++i) { + if (pct[i] == '~' && i + 1 < pct.size()) { + if (pct[i + 1] == '1') { out += '/'; ++i; } + else if (pct[i + 1] == '0') { out += '~'; ++i; } + else out += pct[i]; + } else { + out += pct[i]; + } + } + return out; + }; + + // Split pointer into segments + std::string pointer = node->ref.substr(2); + std::vector segments; + size_t spos = 0; + while (spos < pointer.size()) { + size_t snext = pointer.find('/', spos); + segments.push_back(decode_pointer_segment( + pointer.substr(spos, snext == std::string::npos ? snext : snext - spos))); + spos = (snext == std::string::npos) ? pointer.size() : snext + 1; + } + + // Walk the schema tree + schema_node_ptr current = ctx.root; + bool resolved = true; + for (size_t si = 0; si < segments.size() && current; ++si) { + const auto& key = segments[si]; + + if (key == "properties" && si + 1 < segments.size()) { + const auto& prop_name = segments[++si]; + auto pit = current->properties.find(prop_name); + if (pit != current->properties.end()) { + current = pit->second; + } else { resolved = false; break; } + } else if (key == "items" && current->items_schema) { + current = current->items_schema; + } else if (key == "$defs" || key == "definitions") { + if (si + 1 < segments.size()) { + const auto& def_name = segments[++si]; + // Navigate into node's defs map + auto dit = current->defs.find(def_name); + if (dit != current->defs.end()) { + current = dit->second; + } else { + // Fallback: try ctx.defs with full path + std::string full_ref = "#/" + key + "/" + def_name; + auto cit = ctx.defs.find(full_ref); + if (cit != ctx.defs.end()) { + current = cit->second; + } else { resolved = false; break; } + } + } else { resolved = false; break; } + } else if (key == "allOf" || key == "anyOf" || key == "oneOf") { + if (si + 1 < segments.size()) { + size_t idx = std::stoul(segments[++si]); + auto& vec = (key == "allOf") ? current->all_of + : (key == "anyOf") ? current->any_of + : current->one_of; + if (idx < vec.size()) { current = vec[idx]; } + else { resolved = false; break; } + } else { resolved = false; break; } + } else if (key == "not" && current->not_schema) { + current = current->not_schema; + } else if (key == "if" && current->if_schema) { + current = current->if_schema; + } else if (key == "then" && current->then_schema) { + current = current->then_schema; + } else if (key == "else" && current->else_schema) { + current = current->else_schema; + } else if (key == "additionalProperties" && + current->additional_properties_schema) { + current = current->additional_properties_schema; + } else if (key == "prefixItems") { + if (si + 1 < segments.size()) { + size_t idx = std::stoul(segments[++si]); + if (idx < current->prefix_items.size()) { current = current->prefix_items[idx]; } + else { resolved = false; break; } + } else { resolved = false; break; } + } else { + resolved = false; break; + } + } + if (resolved && current) { + validate_node(current, value, path, ctx, errors, all_errors); + ref_resolved = true; + } + } + // Self-reference: "#" + if (!ref_resolved && node->ref == "#" && ctx.root) { + validate_node(ctx.root, value, path, ctx, errors, all_errors); + ref_resolved = true; + } + if (!ref_resolved) { + errors.push_back({error_code::ref_not_found, path, + "cannot resolve $ref: " + node->ref}); + } + } + + // type + if (node->type_mask) { + if (!type_matches_mask(value, node->type_mask)) { + std::string expected; + for (int b = 0; b < 7; ++b) { + if (node->type_mask & (1u << b)) { + if (!expected.empty()) expected += ", "; + expected += json_type_name(static_cast(b)); + } + } + errors.push_back({error_code::type_mismatch, path, + "expected type " + expected + ", got " + std::string(type_of_sv(value))}); + ATA_CHECK_EARLY(); + } + } + + // enum — use pre-minified values (no re-parsing) + if (!node->enum_values_minified.empty()) { + std::string val_str = canonical_json(value); + bool found = false; + for (const auto& ev : node->enum_values_minified) { + if (ev == val_str) { + found = true; + break; + } + } + if (!found) { + errors.push_back({error_code::enum_mismatch, path, + "value not in enum"}); + } + } + + // const + if (node->const_value_raw.has_value()) { + std::string val_str = canonical_json(value); + if (val_str != node->const_value_raw.value()) { + errors.push_back({error_code::const_mismatch, path, + "value does not match const"}); + ATA_CHECK_EARLY(); + } + } + + ATA_CHECK_EARLY(); + // Numeric validations + auto vtype = value.type(); + if (vtype == et::INT64 || vtype == et::UINT64 || vtype == et::DOUBLE) { + double v = to_double(value); + if (node->minimum.has_value() && v < node->minimum.value()) { + errors.push_back({error_code::minimum_violation, path, + "value " + std::to_string(v) + " < minimum " + + std::to_string(node->minimum.value())}); + } + if (node->maximum.has_value() && v > node->maximum.value()) { + errors.push_back({error_code::maximum_violation, path, + "value " + std::to_string(v) + " > maximum " + + std::to_string(node->maximum.value())}); + } + if (node->exclusive_minimum.has_value() && + v <= node->exclusive_minimum.value()) { + errors.push_back({error_code::exclusive_minimum_violation, path, + "value must be > " + + std::to_string(node->exclusive_minimum.value())}); + } + if (node->exclusive_maximum.has_value() && + v >= node->exclusive_maximum.value()) { + errors.push_back({error_code::exclusive_maximum_violation, path, + "value must be < " + + std::to_string(node->exclusive_maximum.value())}); + } + if (node->multiple_of.has_value()) { + double divisor = node->multiple_of.value(); + double rem = std::fmod(v, divisor); + // Use relative tolerance for floating point comparison + if (std::abs(rem) > 1e-8 && std::abs(rem - divisor) > 1e-8) { + errors.push_back({error_code::multiple_of_violation, path, + "value not a multiple of " + + std::to_string(node->multiple_of.value())}); + } + } + } + + // String validations + if (vtype == et::STRING) { + std::string_view sv; + value.get(sv); + uint64_t len = utf8_length(sv); + + if (node->min_length.has_value() && len < node->min_length.value()) { + errors.push_back({error_code::min_length_violation, path, + "string length " + std::to_string(len) + + " < minLength " + + std::to_string(node->min_length.value())}); + } + if (node->max_length.has_value() && len > node->max_length.value()) { + errors.push_back({error_code::max_length_violation, path, + "string length " + std::to_string(len) + + " > maxLength " + + std::to_string(node->max_length.value())}); + } +#ifndef ATA_NO_RE2 + if (node->compiled_pattern) { + if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *node->compiled_pattern)) { + errors.push_back({error_code::pattern_mismatch, path, + "string does not match pattern: " + + node->pattern.value()}); + } + } +#endif + + if (node->format.has_value()) { + if (!check_format_by_id(sv, node->format_id)) { + errors.push_back({error_code::format_mismatch, path, + "string does not match format: " + + node->format.value()}); + } + } + } + + // Array validations + if (vtype == et::ARRAY) { + dom::array arr; value.get(arr); + uint64_t arr_size = arr.size(); + if(arr_size == 0xFFFFFF) [[unlikely]] { + // Fallback for large arrays where size() saturates — count manually to avoid overflow + arr_size = 0; + for ([[maybe_unused]] auto _ : arr) ++arr_size; + } + + if (node->min_items.has_value() && arr_size < node->min_items.value()) { + errors.push_back({error_code::min_items_violation, path, + "array has " + std::to_string(arr_size) + + " items, minimum " + + std::to_string(node->min_items.value())}); + } + if (node->max_items.has_value() && arr_size > node->max_items.value()) { + errors.push_back({error_code::max_items_violation, path, + "array has " + std::to_string(arr_size) + + " items, maximum " + + std::to_string(node->max_items.value())}); + } + + if (node->unique_items) { + bool has_dup = false; + // Fast path: check if all items are the same simple type + auto first_it = arr.begin(); + if (first_it != arr.end()) { + auto first_type = (*first_it).type(); + bool all_same = true; + for (auto item : arr) { if (item.type() != first_type) { all_same = false; break; } } + if (all_same && first_type == et::STRING) { + std::set seen; + for (auto item : arr) { + std::string_view sv; item.get(sv); + if (!seen.insert(sv).second) { has_dup = true; break; } + } + } else if (all_same && (first_type == et::INT64 || first_type == et::UINT64 || first_type == et::DOUBLE)) { + std::set seen; + for (auto item : arr) { + if (!seen.insert(to_double(item)).second) { has_dup = true; break; } + } + } else { + std::set seen; + for (auto item : arr) { + if (!seen.insert(canonical_json(item)).second) { has_dup = true; break; } + } + } + } + if (has_dup) { + errors.push_back({error_code::unique_items_violation, path, + "array contains duplicate items"}); + } + } + + // prefixItems + items (Draft 2020-12 semantics) + { + uint64_t idx = 0; + for (auto item : arr) { + if (idx < node->prefix_items.size()) { + validate_node(node->prefix_items[idx], item, + path + "/" + std::to_string(idx), ctx, errors, all_errors); + } else if (node->items_schema) { + validate_node(node->items_schema, item, + path + "/" + std::to_string(idx), ctx, errors, all_errors); + } + ++idx; + } + } + + // contains / minContains / maxContains + if (node->contains_schema) { + uint64_t match_count = 0; + for (auto item : arr) { + if (validate_fast(node->contains_schema, item, ctx)) ++match_count; + } + uint64_t min_c = node->min_contains.value_or(1); + uint64_t max_c = node->max_contains.value_or(arr_size); + if (match_count < min_c) { + errors.push_back({error_code::min_items_violation, path, + "contains: " + std::to_string(match_count) + + " matches, minimum " + std::to_string(min_c)}); + } + if (match_count > max_c) { + errors.push_back({error_code::max_items_violation, path, + "contains: " + std::to_string(match_count) + + " matches, maximum " + std::to_string(max_c)}); + } + } + } + + // Object validations + if (vtype == et::OBJECT) { + dom::object obj; value.get(obj); + + if (node->min_properties.has_value() || node->max_properties.has_value()) { + uint64_t prop_count = 0; + for ([[maybe_unused]] auto _ : obj) ++prop_count; + if (node->min_properties.has_value() && + prop_count < node->min_properties.value()) { + errors.push_back({error_code::min_properties_violation, path, + "object has " + std::to_string(prop_count) + + " properties, minimum " + + std::to_string(node->min_properties.value())}); + } + if (node->max_properties.has_value() && + prop_count > node->max_properties.value()) { + errors.push_back({error_code::max_properties_violation, path, + "object has " + std::to_string(prop_count) + + " properties, maximum " + + std::to_string(node->max_properties.value())}); + } + } + + // required + for (const auto& req : node->required) { + dom::element dummy; + if (obj[req].get(dummy) != SUCCESS) { + errors.push_back({error_code::required_property_missing, path, + "missing required property: " + req}); + } + } + + // properties + patternProperties + additionalProperties + for (auto [key, val] : obj) { + std::string key_str(key); + bool matched = false; + + // Check properties + auto it = node->properties.find(key_str); + if (it != node->properties.end()) { + validate_node(it->second, val, path + "/" + key_str, ctx, errors, all_errors); + matched = true; + } + + // Check patternProperties (use cached compiled regex) + for (const auto& pp : node->pattern_properties) { +#ifndef ATA_NO_RE2 + if (pp.compiled && re2::RE2::PartialMatch(key_str, *pp.compiled)) { + validate_node(pp.schema, val, path + "/" + key_str, ctx, errors, all_errors); + matched = true; + } +#endif + } + + // additionalProperties (only if not matched by properties or patternProperties) + if (!matched) { + if (node->additional_properties_bool.has_value() && + !node->additional_properties_bool.value()) { + errors.push_back( + {error_code::additional_property_not_allowed, path, + "additional property not allowed: " + key_str}); + } else if (node->additional_properties_schema) { + validate_node(node->additional_properties_schema, val, + path + "/" + key_str, ctx, errors); + } + } + } + // propertyNames — validate key as string directly when possible + if (node->property_names_schema) { + auto pn = node->property_names_schema; + bool string_only = pn->ref.empty() && pn->all_of.empty() && + pn->any_of.empty() && pn->one_of.empty() && !pn->not_schema && + !pn->if_schema && pn->enum_values_minified.empty() && + !pn->const_value_raw.has_value(); + if (string_only) { + // Fast path: validate string constraints on key directly + for (auto [key, val] : obj) { + std::string_view key_sv(key); + if (pn->type_mask && !(pn->type_mask & json_type_bit(json_type::string))) { + errors.push_back({error_code::type_mismatch, path, + "propertyNames: key is string but schema requires different type"}); + continue; + } + uint64_t len = utf8_length(key_sv); + if (pn->min_length.has_value() && len < pn->min_length.value()) { + errors.push_back({error_code::min_length_violation, path, + "propertyNames: key too short: " + std::string(key_sv)}); + } + if (pn->max_length.has_value() && len > pn->max_length.value()) { + errors.push_back({error_code::max_length_violation, path, + "propertyNames: key too long: " + std::string(key_sv)}); + } +#ifndef ATA_NO_RE2 + if (pn->compiled_pattern) { + if (!re2::RE2::PartialMatch(re2::StringPiece(key_sv.data(), key_sv.size()), *pn->compiled_pattern)) { + errors.push_back({error_code::pattern_mismatch, path, + "propertyNames: key does not match pattern: " + std::string(key_sv)}); + } + } +#endif + if (pn->format.has_value() && !check_format_by_id(key_sv, pn->format_id)) { + errors.push_back({error_code::format_mismatch, path, + "propertyNames: key does not match format: " + std::string(key_sv)}); + } + } + } else { + // Fallback: parse key as JSON string element + for (auto [key, val] : obj) { + std::string key_json = "\"" + std::string(key) + "\""; + dom::element key_el; + if (tl_dom_key_parser().parse(key_json).get(key_el) == SUCCESS) { + validate_node(pn, key_el, path, ctx, errors, all_errors); + } + } + } + } + + // dependentRequired + for (const auto& [prop, deps] : node->dependent_required) { + dom::element dummy; + if (obj[prop].get(dummy) == SUCCESS) { + for (const auto& dep : deps) { + dom::element dep_dummy; + if (obj[dep].get(dep_dummy) != SUCCESS) { + errors.push_back({error_code::required_property_missing, path, + "property '" + prop + "' requires '" + dep + + "' to be present"}); + } + } + } + } + + // dependentSchemas + for (const auto& [prop, schema] : node->dependent_schemas) { + dom::element dummy; + if (obj[prop].get(dummy) == SUCCESS) { + validate_node(schema, value, path, ctx, errors, all_errors); + } + } + } + + // allOf + if (!node->all_of.empty()) { + for (const auto& sub : node->all_of) { + std::vector sub_errors; + validate_node(sub, value, path, ctx, sub_errors, all_errors); + if (!sub_errors.empty()) { + errors.push_back({error_code::all_of_failed, path, + "allOf subschema failed"}); + errors.insert(errors.end(), sub_errors.begin(), sub_errors.end()); + } + } + } + + // anyOf + if (!node->any_of.empty()) { + bool any_valid = false; + for (const auto& sub : node->any_of) { + std::vector sub_errors; + validate_node(sub, value, path, ctx, sub_errors, all_errors); + if (sub_errors.empty()) { + any_valid = true; + break; + } + } + if (!any_valid) { + errors.push_back({error_code::any_of_failed, path, + "no anyOf subschema matched"}); + } + } + + // oneOf + if (!node->one_of.empty()) { + int match_count = 0; + for (const auto& sub : node->one_of) { + std::vector sub_errors; + validate_node(sub, value, path, ctx, sub_errors, all_errors); + if (sub_errors.empty()) ++match_count; + } + if (match_count != 1) { + errors.push_back({error_code::one_of_failed, path, + "expected exactly one oneOf match, got " + + std::to_string(match_count)}); + } + } + + // not + if (node->not_schema) { + std::vector sub_errors; + validate_node(node->not_schema, value, path, ctx, sub_errors, all_errors); + if (sub_errors.empty()) { + errors.push_back({error_code::not_failed, path, + "value should not match 'not' schema"}); + } + } + + // if/then/else + if (node->if_schema) { + std::vector if_errors; + validate_node(node->if_schema, value, path, ctx, if_errors, all_errors); + if (if_errors.empty()) { + // if passed → validate then + if (node->then_schema) { + validate_node(node->then_schema, value, path, ctx, errors, all_errors); + } + } else { + // if failed → validate else + if (node->else_schema) { + validate_node(node->else_schema, value, path, ctx, errors, all_errors); + } + } + } +} + +// Fast boolean-only tree walker — stripped of all error collection. +// No std::string allocation, no path tracking, no error messages. +// Returns true if valid. Uses [[likely]]/[[unlikely]] branch hints. +static bool validate_fast(const schema_node_ptr& node, + dom::element value, + const compiled_schema& ctx) { + if (!node) [[unlikely]] return true; + + if (node->boolean_schema.has_value()) [[unlikely]] + return node->boolean_schema.value(); + + // $ref + if (!node->ref.empty()) [[unlikely]] { + auto it = ctx.defs.find(node->ref); + if (it != ctx.defs.end()) { + if (!validate_fast(it->second, value, ctx)) return false; + } else if (node->ref == "#" && ctx.root) { + if (!validate_fast(ctx.root, value, ctx)) return false; + } else { + return false; + } + } + + // type + if (node->type_mask) { + if (!type_matches_mask(value, node->type_mask)) [[unlikely]] return false; + } + + // enum + if (!node->enum_values_minified.empty()) { + auto val_str = canonical_json(value); + bool found = false; + for (const auto& ev : node->enum_values_minified) { + if (ev == val_str) { found = true; break; } + } + if (!found) [[unlikely]] return false; + } + + // const + if (node->const_value_raw.has_value()) { + if (canonical_json(value) != node->const_value_raw.value()) [[unlikely]] return false; + } + + auto vtype = value.type(); + + // Numeric + if (vtype == et::INT64 || vtype == et::UINT64 || vtype == et::DOUBLE) { + double v = to_double(value); + if (node->minimum.has_value() && v < node->minimum.value()) return false; + if (node->maximum.has_value() && v > node->maximum.value()) return false; + if (node->exclusive_minimum.has_value() && v <= node->exclusive_minimum.value()) return false; + if (node->exclusive_maximum.has_value() && v >= node->exclusive_maximum.value()) return false; + if (node->multiple_of.has_value()) { + double rem = std::fmod(v, node->multiple_of.value()); + if (std::abs(rem) > 1e-8 && std::abs(rem - node->multiple_of.value()) > 1e-8) return false; + } + } + + // String + if (vtype == et::STRING) { + std::string_view sv; + value.get(sv); + uint64_t len = utf8_length(sv); + if (node->min_length.has_value() && len < node->min_length.value()) return false; + if (node->max_length.has_value() && len > node->max_length.value()) return false; +#ifndef ATA_NO_RE2 + if (node->compiled_pattern) { + if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *node->compiled_pattern)) + return false; + } +#endif + if (node->format.has_value() && !check_format_by_id(sv, node->format_id)) return false; + } + + // Array + if (vtype == et::ARRAY) { + dom::array arr; value.get(arr); + uint64_t arr_size = arr.size(); + if(arr_size == 0xFFFFFF) [[unlikely]] { + // Fallback for large arrays where size() saturates — count manually to avoid overflow + arr_size = 0; + for ([[maybe_unused]] auto _ : arr) ++arr_size; + } + + if (node->min_items.has_value() && arr_size < node->min_items.value()) return false; + if (node->max_items.has_value() && arr_size > node->max_items.value()) return false; + + if (node->unique_items) { + auto first_it = arr.begin(); + if (first_it != arr.end()) { + auto first_type = (*first_it).type(); + bool all_same = true; + for (auto item : arr) { if (item.type() != first_type) { all_same = false; break; } } + if (all_same && first_type == et::STRING) { + std::set seen; + for (auto item : arr) { std::string_view sv; item.get(sv); if (!seen.insert(sv).second) return false; } + } else if (all_same && (first_type == et::INT64 || first_type == et::UINT64 || first_type == et::DOUBLE)) { + std::set seen; + for (auto item : arr) { if (!seen.insert(to_double(item)).second) return false; } + } else { + std::set seen; + for (auto item : arr) { if (!seen.insert(canonical_json(item)).second) return false; } + } + } + } + + { uint64_t idx = 0; + for (auto item : arr) { + if (idx < node->prefix_items.size()) { + if (!validate_fast(node->prefix_items[idx], item, ctx)) return false; + } else if (node->items_schema) { + if (!validate_fast(node->items_schema, item, ctx)) return false; + } + ++idx; + } + } + + if (node->contains_schema) { + uint64_t match_count = 0; + for (auto item : arr) { + if (validate_fast(node->contains_schema, item, ctx)) ++match_count; + } + uint64_t min_c = node->min_contains.value_or(1); + uint64_t max_c = node->max_contains.value_or(arr_size); + if (match_count < min_c || match_count > max_c) return false; + } + } + + // Object + if (vtype == et::OBJECT) { + dom::object obj; value.get(obj); + + if (node->min_properties.has_value() || node->max_properties.has_value()) { + uint64_t n = 0; + for ([[maybe_unused]] auto _ : obj) ++n; + if (node->min_properties.has_value() && n < node->min_properties.value()) return false; + if (node->max_properties.has_value() && n > node->max_properties.value()) return false; + } + + for (const auto& req : node->required) { + dom::element d; + if (obj[req].get(d) != SUCCESS) [[unlikely]] return false; + } + + for (auto [key, val] : obj) { + std::string_view key_sv(key); + bool matched = false; + + auto it = node->properties.find(std::string(key_sv)); + if (it != node->properties.end()) { + if (!validate_fast(it->second, val, ctx)) return false; + matched = true; + } + + for (const auto& pp : node->pattern_properties) { +#ifndef ATA_NO_RE2 + if (pp.compiled && re2::RE2::PartialMatch( + re2::StringPiece(key_sv.data(), key_sv.size()), *pp.compiled)) { + if (!validate_fast(pp.schema, val, ctx)) return false; + matched = true; + } +#endif + } + + if (!matched) { + if (node->additional_properties_bool.has_value() && + !node->additional_properties_bool.value()) return false; + if (node->additional_properties_schema && + !validate_fast(node->additional_properties_schema, val, ctx)) return false; + } + } + + for (const auto& [prop, deps] : node->dependent_required) { + dom::element d; + if (obj[prop].get(d) == SUCCESS) { + for (const auto& dep : deps) { + dom::element dd; + if (obj[dep].get(dd) != SUCCESS) return false; + } + } + } + + for (const auto& [prop, schema] : node->dependent_schemas) { + dom::element d; + if (obj[prop].get(d) == SUCCESS) { + if (!validate_fast(schema, value, ctx)) return false; + } + } + } + + // allOf + for (const auto& sub : node->all_of) { + if (!validate_fast(sub, value, ctx)) return false; + } + + // anyOf + if (!node->any_of.empty()) { + bool any = false; + for (const auto& sub : node->any_of) { + if (validate_fast(sub, value, ctx)) { any = true; break; } + } + if (!any) return false; + } + + // oneOf + if (!node->one_of.empty()) { + int n = 0; + for (const auto& sub : node->one_of) { + if (validate_fast(sub, value, ctx)) ++n; + if (n > 1) return false; + } + if (n != 1) return false; + } + + // not + if (node->not_schema) { + if (validate_fast(node->not_schema, value, ctx)) return false; + } + + // if/then/else + if (node->if_schema) { + if (validate_fast(node->if_schema, value, ctx)) { + if (node->then_schema && !validate_fast(node->then_schema, value, ctx)) return false; + } else { + if (node->else_schema && !validate_fast(node->else_schema, value, ctx)) return false; + } + } + + return true; +} + +// --- Codegen compiler --- +static void cg_compile(const schema_node* n, cg::plan& p, + std::vector& out) { + if (!n) return; + if (n->boolean_schema.has_value()) { + if (!*n->boolean_schema) out.push_back({cg::op::EXPECT_NULL}); + return; + } + // Composition fallback + if (!n->ref.empty() || !n->all_of.empty() || !n->any_of.empty() || + !n->one_of.empty() || n->not_schema || n->if_schema) { + uintptr_t ptr = reinterpret_cast(n); + out.push_back({cg::op::COMPOSITION, (uint32_t)(ptr & 0xFFFFFFFF), + (uint32_t)((ptr >> 32) & 0xFFFFFFFF)}); + return; + } + // Type + if (n->type_mask) { + int popcount = __builtin_popcount(n->type_mask); + if (popcount == 1) { + // Single type — emit specific opcode + for (int b = 0; b < 7; ++b) { + if (n->type_mask & (1u << b)) { + switch (static_cast(b)) { + case json_type::object: out.push_back({cg::op::EXPECT_OBJECT}); break; + case json_type::array: out.push_back({cg::op::EXPECT_ARRAY}); break; + case json_type::string: out.push_back({cg::op::EXPECT_STRING}); break; + case json_type::number: out.push_back({cg::op::EXPECT_NUMBER}); break; + case json_type::integer: out.push_back({cg::op::EXPECT_INTEGER}); break; + case json_type::boolean: out.push_back({cg::op::EXPECT_BOOLEAN}); break; + case json_type::null_value: out.push_back({cg::op::EXPECT_NULL}); break; + } + break; + } + } + } else { + uint32_t i = (uint32_t)p.type_masks.size(); + p.type_masks.push_back(n->type_mask); + out.push_back({cg::op::EXPECT_TYPE_MULTI, i}); + } + } + // Enum + if (!n->enum_values_minified.empty()) { + bool all_str = true; + for (auto& e : n->enum_values_minified) + if (e.empty() || e[0]!='"') { all_str=false; break; } + uint32_t i = (uint32_t)p.enum_sets.size(); + p.enum_sets.push_back(n->enum_values_minified); + out.push_back({all_str ? cg::op::CHECK_ENUM_STR : cg::op::CHECK_ENUM, i}); + } + if (n->const_value_raw.has_value()) { + uint32_t i=(uint32_t)p.strings.size(); + p.strings.push_back(*n->const_value_raw); + out.push_back({cg::op::CHECK_CONST, i}); + } + // Numeric + if (n->minimum.has_value()) { uint32_t i=(uint32_t)p.doubles.size(); p.doubles.push_back(*n->minimum); out.push_back({cg::op::CHECK_MINIMUM,i}); } + if (n->maximum.has_value()) { uint32_t i=(uint32_t)p.doubles.size(); p.doubles.push_back(*n->maximum); out.push_back({cg::op::CHECK_MAXIMUM,i}); } + if (n->exclusive_minimum.has_value()) { uint32_t i=(uint32_t)p.doubles.size(); p.doubles.push_back(*n->exclusive_minimum); out.push_back({cg::op::CHECK_EX_MINIMUM,i}); } + if (n->exclusive_maximum.has_value()) { uint32_t i=(uint32_t)p.doubles.size(); p.doubles.push_back(*n->exclusive_maximum); out.push_back({cg::op::CHECK_EX_MAXIMUM,i}); } + if (n->multiple_of.has_value()) { uint32_t i=(uint32_t)p.doubles.size(); p.doubles.push_back(*n->multiple_of); out.push_back({cg::op::CHECK_MULTIPLE_OF,i}); } + // String + if (n->min_length.has_value()) out.push_back({cg::op::CHECK_MIN_LENGTH,(uint32_t)*n->min_length}); + if (n->max_length.has_value()) out.push_back({cg::op::CHECK_MAX_LENGTH,(uint32_t)*n->max_length}); +#ifndef ATA_NO_RE2 + if (n->compiled_pattern) { uint32_t i=(uint32_t)p.regexes.size(); p.regexes.push_back(n->compiled_pattern); out.push_back({cg::op::CHECK_PATTERN,i}); } +#endif + if (n->format.has_value()) { + uint32_t i=(uint32_t)p.format_ids.size(); + p.format_ids.push_back(n->format_id); + out.push_back({cg::op::CHECK_FORMAT,i}); + } + // Array + if (n->min_items.has_value()) out.push_back({cg::op::CHECK_MIN_ITEMS,(uint32_t)*n->min_items}); + if (n->max_items.has_value()) out.push_back({cg::op::CHECK_MAX_ITEMS,(uint32_t)*n->max_items}); + if (n->unique_items) out.push_back({cg::op::CHECK_UNIQUE_ITEMS}); + if (n->items_schema) { + uint32_t si=(uint32_t)p.subs.size(); + p.subs.emplace_back(); + std::vector sub_code; + cg_compile(n->items_schema.get(), p, sub_code); + sub_code.push_back({cg::op::END}); + p.subs[si] = std::move(sub_code); + out.push_back({cg::op::ARRAY_ITEMS, si}); + } + // Object + for (auto& r : n->required) { uint32_t i=(uint32_t)p.strings.size(); p.strings.push_back(r); out.push_back({cg::op::CHECK_REQUIRED,i}); } + if (n->min_properties.has_value()) out.push_back({cg::op::CHECK_MIN_PROPS,(uint32_t)*n->min_properties}); + if (n->max_properties.has_value()) out.push_back({cg::op::CHECK_MAX_PROPS,(uint32_t)*n->max_properties}); + // additional_properties_schema requires tree walker — bail out to COMPOSITION + if (n->additional_properties_schema) { + out.push_back({cg::op::COMPOSITION, 0, 0}); + return; + } + if (!n->properties.empty() || (n->additional_properties_bool.has_value() && !*n->additional_properties_bool)) { + out.push_back({cg::op::OBJ_PROPS_START}); + if (n->additional_properties_bool.has_value() && !*n->additional_properties_bool) + out.push_back({cg::op::CHECK_NO_ADDITIONAL}); + for (auto& [name, schema] : n->properties) { + uint32_t ni=(uint32_t)p.strings.size(); p.strings.push_back(name); + uint32_t si=(uint32_t)p.subs.size(); + p.subs.emplace_back(); + std::vector sub_code; + cg_compile(schema.get(), p, sub_code); + sub_code.push_back({cg::op::END}); + p.subs[si] = std::move(sub_code); + out.push_back({cg::op::OBJ_PROP, ni, si}); + } + out.push_back({cg::op::OBJ_PROPS_END}); + } +} + +// --- Codegen executor --- + +static bool cg_exec(const cg::plan& p, const std::vector& code, + dom::element value) { + auto t = value.type(); + bool t_numeric = (t == et::INT64 || t == et::UINT64 || t == et::DOUBLE); + double t_dval = t_numeric ? to_double(value) : 0.0; + for (size_t i=0; ip.doubles[c.a])return false; break; + case cg::op::CHECK_EX_MINIMUM: if(t_numeric&&t_dval<=p.doubles[c.a])return false; break; + case cg::op::CHECK_EX_MAXIMUM: if(t_numeric&&t_dval>=p.doubles[c.a])return false; break; + case cg::op::CHECK_MULTIPLE_OF: if(t_numeric){double d=p.doubles[c.a],r=std::fmod(t_dval,d);if(std::abs(r)>1e-8&&std::abs(r-d)>1e-8)return false;} break; + case cg::op::CHECK_MIN_LENGTH: if(t==et::STRING){std::string_view sv;value.get(sv);if(utf8_length(sv)c.a)return false;} break; +#ifndef ATA_NO_RE2 + case cg::op::CHECK_PATTERN: if(t==et::STRING){std::string_view sv;value.get(sv);if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break; +#else + case cg::op::CHECK_PATTERN: break; +#endif + case cg::op::CHECK_FORMAT: if(t==et::STRING){std::string_view sv;value.get(sv);if(!check_format_by_id(sv,p.format_ids[c.a]))return false;} break; + case cg::op::CHECK_MIN_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(sc.a)return false;} break; + case cg::op::CHECK_UNIQUE_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);std::set seen;for(auto x:a)if(!seen.insert(canonical_json(x)).second)return false;} break; + case cg::op::ARRAY_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);for(auto x:a)if(!cg_exec(p,p.subs[c.a],x))return false;} break; + case cg::op::CHECK_REQUIRED: if(t==et::OBJECT){dom::object o;value.get(o);dom::element d;if(o[p.strings[c.a]].get(d)!=SUCCESS)return false;} break; + case cg::op::CHECK_MIN_PROPS: if(t==et::OBJECT){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(nc.a)return false;} break; + case cg::op::OBJ_PROPS_START: if(t==et::OBJECT){ + dom::object o; value.get(o); + // collect prop defs + struct pd{std::string_view nm;uint32_t si;}; + std::vector props; bool no_add=false; + size_t j=i+1; + for(;j()){int64_t v;value.get(v);auto s=std::to_string(v);for(auto& e:es)if(e==s){f=true;break;}} + if(!f){std::string v=canonical_json(value);for(auto& e:es)if(e==v){f=true;break;}} + if(!f)return false; break; + } + case cg::op::CHECK_CONST: if(canonical_json(value)!=p.strings[c.a])return false; break; + case cg::op::COMPOSITION: return false; // fallback to tree walker + } + } + return true; +} + +// --- On Demand fast path executor --- +// Uses simdjson On Demand API to avoid materializing the full DOM tree. +// Returns: true = valid, false = invalid OR unsupported (fallback to DOM). + +static json_type od_type(simdjson::ondemand::value& v) { + simdjson::ondemand::json_type t; + if (v.type().get(t) != SUCCESS) return json_type::null_value; + switch (t) { + case simdjson::ondemand::json_type::object: return json_type::object; + case simdjson::ondemand::json_type::array: return json_type::array; + case simdjson::ondemand::json_type::string: return json_type::string; + case simdjson::ondemand::json_type::boolean: return json_type::boolean; + case simdjson::ondemand::json_type::null: return json_type::null_value; + case simdjson::ondemand::json_type::number: { + simdjson::ondemand::number_type nt; + if (v.get_number_type().get(nt) == SUCCESS && + nt == simdjson::ondemand::number_type::floating_point_number) + return json_type::number; + return json_type::integer; + } + } + return json_type::string; +} + +static bool od_exec(const cg::plan& p, const std::vector& code, + simdjson::ondemand::value value) { + auto t = od_type(value); + bool t_numeric = (t == json_type::integer || t == json_type::number); + for (size_t i = 0; i < code.size(); ++i) { + auto& c = code[i]; + switch (c.o) { + case cg::op::END: return true; + case cg::op::EXPECT_OBJECT: if(t!=json_type::object) return false; break; + case cg::op::EXPECT_ARRAY: if(t!=json_type::array) return false; break; + case cg::op::EXPECT_STRING: if(t!=json_type::string) return false; break; + case cg::op::EXPECT_NUMBER: if(!t_numeric) return false; break; + case cg::op::EXPECT_INTEGER: if(t!=json_type::integer) return false; break; + case cg::op::EXPECT_BOOLEAN: if(t!=json_type::boolean) return false; break; + case cg::op::EXPECT_NULL: if(t!=json_type::null_value) return false; break; + case cg::op::EXPECT_TYPE_MULTI: { + // integer matches both "integer" and "number" type constraints + uint8_t tbits = json_type_bit(t); + if (t == json_type::integer) tbits |= json_type_bit(json_type::number); + if(!(tbits & p.type_masks[c.a])) return false; break; + } + case cg::op::CHECK_MINIMUM: + case cg::op::CHECK_MAXIMUM: + case cg::op::CHECK_EX_MINIMUM: + case cg::op::CHECK_EX_MAXIMUM: + case cg::op::CHECK_MULTIPLE_OF: { + if (t_numeric) { + double v; + if (t==json_type::integer) { int64_t iv; if(value.get(iv)!=SUCCESS) return false; v=(double)iv; } + else { if(value.get(v)!=SUCCESS) return false; } + double d=p.doubles[c.a]; + if(c.o==cg::op::CHECK_MINIMUM && vd) return false; + if(c.o==cg::op::CHECK_EX_MINIMUM && v<=d) return false; + if(c.o==cg::op::CHECK_EX_MAXIMUM && v>=d) return false; + if(c.o==cg::op::CHECK_MULTIPLE_OF){double r=std::fmod(v,d);if(std::abs(r)>1e-8&&std::abs(r-d)>1e-8)return false;} + } + break; + } + case cg::op::CHECK_MIN_LENGTH: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)c.a) return false;} break; +#ifndef ATA_NO_RE2 + case cg::op::CHECK_PATTERN: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break; +#else + case cg::op::CHECK_PATTERN: break; +#endif + case cg::op::CHECK_FORMAT: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!check_format_by_id(sv,p.format_ids[c.a]))return false;} break; + case cg::op::CHECK_MIN_ITEMS: if(t==json_type::array){ + simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false; + uint64_t s=0; for(auto x:a){(void)x;++s;} if(sc.a) return false; + } break; + case cg::op::ARRAY_ITEMS: if(t==json_type::array){ + simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false; + for(auto elem:a){ + simdjson::ondemand::value v; if(elem.get(v)!=SUCCESS) return false; + if(!od_exec(p,p.subs[c.a],v)) return false; + } + } break; + case cg::op::CHECK_REQUIRED: if(t==json_type::object){ + simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false; + auto f = o.find_field_unordered(p.strings[c.a]); + if(f.error()) return false; + } break; + case cg::op::CHECK_MIN_PROPS: if(t==json_type::object){ + simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false; + uint64_t n=0; for(auto f:o){(void)f;++n;} if(nc.a) return false; + } break; + case cg::op::OBJ_PROPS_START: if(t==json_type::object){ + simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false; + struct pd{std::string_view nm;uint32_t si;}; + std::vector props; bool no_add=false; + size_t j=i+1; + for(;j(buf + len - 1) % get_page_size()) + + REQUIRED_PADDING >= static_cast(get_page_size())); +} + +// Zero-copy validate with free padding (Lemire's trick). +// Almost never allocates — only if buffer is near a page boundary. +static simdjson::padded_string_view get_free_padded_view( + const char* data, size_t length, simdjson::padded_string& fallback) { + if (near_page_boundary(data, length)) { + // Rare: near page boundary, must copy + fallback = simdjson::padded_string(data, length); + return fallback; + } + // Common: free padding available, zero-copy + return simdjson::padded_string_view(data, length, length + REQUIRED_PADDING); +} + +// Build an od_plan from a schema_node tree. +static od_plan_ptr compile_od_plan(const schema_node_ptr& node) { + if (!node) return nullptr; + + auto plan = std::make_shared(); + + if (node->boolean_schema.has_value()) { + if (!node->boolean_schema.value()) plan->supported = false; + return plan; + } + + // Unsupported features → fall back to DOM + if (!node->ref.empty() || + !node->enum_values_minified.empty() || + node->const_value_raw.has_value() || + node->unique_items || + !node->all_of.empty() || + !node->any_of.empty() || + !node->one_of.empty() || + node->not_schema || + node->if_schema || + node->contains_schema || + !node->prefix_items.empty() || + !node->pattern_properties.empty() || + !node->dependent_required.empty() || + !node->dependent_schemas.empty() || + node->property_names_schema || + node->additional_properties_schema) { + plan->supported = false; + return plan; + } + + plan->type_mask = node->type_mask; + if (node->minimum) { plan->num_flags |= od_plan::HAS_MIN; plan->num_min = *node->minimum; } + if (node->maximum) { plan->num_flags |= od_plan::HAS_MAX; plan->num_max = *node->maximum; } + if (node->exclusive_minimum) { plan->num_flags |= od_plan::HAS_EX_MIN; plan->num_ex_min = *node->exclusive_minimum; } + if (node->exclusive_maximum) { plan->num_flags |= od_plan::HAS_EX_MAX; plan->num_ex_max = *node->exclusive_maximum; } + if (node->multiple_of) { plan->num_flags |= od_plan::HAS_MUL; plan->num_mul = *node->multiple_of; } + plan->min_length = node->min_length; + plan->max_length = node->max_length; +#ifndef ATA_NO_RE2 + plan->pattern = node->compiled_pattern.get(); +#endif + plan->format_id = node->format_id; + + // Object plan — build hash lookup for O(1) per-field dispatch + if (!node->properties.empty() || !node->required.empty() || + node->additional_properties_bool.has_value() || + node->min_properties.has_value() || node->max_properties.has_value()) { + auto op = std::make_shared(); + op->required_count = node->required.size(); + op->min_props = node->min_properties; + op->max_props = node->max_properties; + if (node->additional_properties_bool.has_value() && + !node->additional_properties_bool.value()) { + op->no_additional = true; + } + // Build merged entries: each key appears once with required_idx + sub_plan + std::unordered_map key_to_idx; + // Register required keys + for (size_t i = 0; i < node->required.size() && i < 64; i++) { + auto& rk = node->required[i]; + if (key_to_idx.find(rk) == key_to_idx.end()) { + key_to_idx[rk] = op->entries.size(); + op->entries.push_back({rk, static_cast(i), nullptr}); + } else { + op->entries[key_to_idx[rk]].required_idx = static_cast(i); + } + } + // Register properties + compile sub-plans + for (auto& [key, sub_node] : node->properties) { + auto sub = compile_od_plan(sub_node); + if (!sub || !sub->supported) { plan->supported = false; return plan; } + auto it = key_to_idx.find(key); + if (it != key_to_idx.end()) { + op->entries[it->second].sub = std::move(sub); + } else { + key_to_idx[key] = op->entries.size(); + op->entries.push_back({key, -1, std::move(sub)}); + } + } + plan->object = std::move(op); + } + + // Array plan + if (node->items_schema || node->min_items.has_value() || node->max_items.has_value()) { + auto ap = std::make_shared(); + ap->min_items = node->min_items; + ap->max_items = node->max_items; + if (node->items_schema) { + ap->items = compile_od_plan(node->items_schema); + if (!ap->items || !ap->items->supported) { plan->supported = false; return plan; } + } + plan->array = std::move(ap); + } + + return plan; +} + +// Fast ASCII check: if all bytes < 0x80, byte length == codepoint length +static inline uint64_t utf8_length_fast(std::string_view s) { + // Check 8 bytes at a time for non-ASCII + const uint8_t* p = reinterpret_cast(s.data()); + size_t n = s.size(); + size_t i = 0; + uint64_t has_high = 0; + for (; i + 8 <= n; i += 8) { + uint64_t block; + std::memcpy(&block, p + i, 8); + has_high |= block & 0x8080808080808080ULL; + } + for (; i < n; i++) has_high |= p[i] & 0x80; + if (has_high == 0) return n; // Pure ASCII — byte count == codepoint count + return utf8_length(s); // Fallback to full counting +} + +// Execute an od_plan against a simdjson On-Demand value. +// Each value consumed exactly once. Uses simdjson types directly — no od_type() overhead. +static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) { + // Use simdjson type directly — skip od_type() conversion + get_number_type() + using sjt = simdjson::ondemand::json_type; + sjt st; + if (value.type().get(st) != SUCCESS) return false; + + // Type check using simdjson type directly + if (plan.type_mask) { + uint8_t tbits; + switch (st) { + case sjt::string: tbits = json_type_bit(json_type::string); break; + case sjt::boolean: tbits = json_type_bit(json_type::boolean); break; + case sjt::null: tbits = json_type_bit(json_type::null_value); break; + case sjt::object: tbits = json_type_bit(json_type::object); break; + case sjt::array: tbits = json_type_bit(json_type::array); break; + case sjt::number: + // Only call get_number_type when schema has type constraint that distinguishes int/number + tbits = json_type_bit(json_type::number) | json_type_bit(json_type::integer); + if ((plan.type_mask & tbits) != tbits) { + // Schema distinguishes — need to check actual number type + simdjson::ondemand::number_type nt; + if (value.get_number_type().get(nt) == SUCCESS && + nt != simdjson::ondemand::number_type::floating_point_number) + tbits = json_type_bit(json_type::integer) | json_type_bit(json_type::number); + else + tbits = json_type_bit(json_type::number); + } + break; + default: tbits = 0; + } + if (!(tbits & plan.type_mask)) return false; + } + + switch (st) { + case sjt::number: { + if (!plan.num_flags) break; // No numeric constraints + double v; + // Try integer first (more common), fall back to double + int64_t iv; + if (value.get(iv) == SUCCESS) { + v = static_cast(iv); + } else if (value.get(v) != SUCCESS) { + return false; + } + uint8_t f = plan.num_flags; + if ((f & od_plan::HAS_MIN) && v < plan.num_min) return false; + if ((f & od_plan::HAS_MAX) && v > plan.num_max) return false; + if ((f & od_plan::HAS_EX_MIN) && v <= plan.num_ex_min) return false; + if ((f & od_plan::HAS_EX_MAX) && v >= plan.num_ex_max) return false; + if (f & od_plan::HAS_MUL) { + double r = std::fmod(v, plan.num_mul); + if (std::abs(r) > 1e-8 && std::abs(r - plan.num_mul) > 1e-8) return false; + } + break; + } + case sjt::string: { + std::string_view sv; + if (value.get(sv) != SUCCESS) return false; + if (plan.min_length || plan.max_length) { + uint64_t len = utf8_length_fast(sv); + if (plan.min_length && len < *plan.min_length) return false; + if (plan.max_length && len > *plan.max_length) return false; + } +#ifndef ATA_NO_RE2 + if (plan.pattern) { + if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *plan.pattern)) + return false; + } +#endif + if (plan.format_id != 255) { + if (!check_format_by_id(sv, plan.format_id)) return false; + } + break; + } + case sjt::object: { + if (!plan.object) break; + auto& op = *plan.object; + simdjson::ondemand::object obj; + if (value.get(obj) != SUCCESS) return false; + + uint64_t required_found = 0; + uint64_t prop_count = 0; + + for (auto field : obj) { + std::string_view key; + if (field.unescaped_key().get(key) != SUCCESS) continue; + prop_count++; + + // Single merged scan: required + property in one pass + bool matched = false; + for (auto& e : op.entries) { + if (key == e.key) { + if (e.required_idx >= 0) + required_found |= (1ULL << e.required_idx); + if (e.sub) { + simdjson::ondemand::value fv; + if (field.value().get(fv) != SUCCESS) return false; + if (!od_exec_plan(*e.sub, fv)) return false; + } + matched = true; + break; + } + } + if (!matched && op.no_additional) return false; + } + + uint64_t required_mask = (op.required_count >= 64) + ? ~0ULL : ((1ULL << op.required_count) - 1); + if ((required_found & required_mask) != required_mask) return false; + if (op.min_props && prop_count < *op.min_props) return false; + if (op.max_props && prop_count > *op.max_props) return false; + break; + } + case sjt::array: { + if (!plan.array) break; + auto& ap = *plan.array; + simdjson::ondemand::array arr; + if (value.get(arr) != SUCCESS) return false; + + uint64_t count = 0; + for (auto elem : arr) { + simdjson::ondemand::value v; + if (elem.get(v) != SUCCESS) return false; + if (ap.items && !od_exec_plan(*ap.items, v)) return false; + count++; + } + if (ap.min_items && count < *ap.min_items) return false; + if (ap.max_items && count > *ap.max_items) return false; + break; + } + default: + break; + } + + return true; +} + +schema_ref compile(std::string_view schema_json) { + auto ctx = std::make_shared(); + ctx->raw_schema = std::string(schema_json); + + dom::element doc; + auto result = ctx->parser.parse(ctx->raw_schema); + if (result.get(doc) != SUCCESS) { + return schema_ref{nullptr}; + } + + ctx->root = compile_node(doc, *ctx); + + if (!ctx->compile_error.empty()) { + return schema_ref{nullptr}; + } + + // Generate codegen plan + cg_compile(ctx->root.get(), ctx->gen_plan, ctx->gen_plan.code); + ctx->gen_plan.code.push_back({cg::op::END}); + ctx->use_ondemand = plan_supports_ondemand(ctx->gen_plan); + ctx->od = compile_od_plan(ctx->root); + + schema_ref ref; + ref.impl = ctx; + return ref; +} + +validation_result validate(const schema_ref& schema, std::string_view json, + const validate_options& opts) { + if (!schema.impl || !schema.impl->root) { + return {false, {{error_code::invalid_schema, "", "schema not compiled"}}}; + } + + // Free padding trick: avoid padded_string copy when possible + simdjson::padded_string fallback; + auto psv = get_free_padded_view(json.data(), json.size(), fallback); + + // Ultra-fast path: On Demand (no DOM materialization) + static constexpr size_t OD_THRESHOLD = 32; + if (schema.impl->use_ondemand && !schema.impl->gen_plan.code.empty() && + json.size() >= OD_THRESHOLD) { + auto od_result = tl_od_parser().iterate(psv); + if (!od_result.error()) { + simdjson::ondemand::value root_val; + if (od_result.get_value().get(root_val) == SUCCESS) { + if (od_exec(schema.impl->gen_plan, schema.impl->gen_plan.code, root_val)) { + return {true, {}}; + } + } + } + // Need fresh view for DOM parse (On Demand consumed it) + psv = get_free_padded_view(json.data(), json.size(), fallback); + } + + auto& dom_p = tl_dom_parser(); + dom::element doc; + if (dom_p.parse(psv).get(doc) != SUCCESS) { + return {false, {{error_code::invalid_json, "", "invalid JSON document"}}}; + } + + // Fast path: codegen bytecode execution (DOM) + if (!schema.impl->use_ondemand && !schema.impl->gen_plan.code.empty()) { + if (cg_exec(schema.impl->gen_plan, schema.impl->gen_plan.code, + doc)) { + return {true, {}}; + } + // Codegen said invalid OR hit COMPOSITION — fall through to tree walker + } + + // Slow path: tree walker with error details (reuse already-parsed DOM) + std::vector errors; + validate_node(schema.impl->root, doc, "", *schema.impl, errors, + opts.all_errors); + + return {errors.empty(), std::move(errors)}; +} + +validation_result validate(std::string_view schema_json, + std::string_view json, + const validate_options& opts) { + auto s = compile(schema_json); + if (!s) { + return {false, {{error_code::invalid_schema, "", "failed to compile schema"}}}; + } + return validate(s, json, opts); +} + + +bool is_valid_prepadded(const schema_ref& schema, const char* data, size_t length) { + if (!schema.impl || !schema.impl->root) return false; + + simdjson::padded_string fallback; + auto psv = get_free_padded_view(data, length, fallback); + + // On-Demand fast path: skip DOM parse entirely + // Minimum 32 bytes — On-Demand doesn't fully validate small malformed docs + if (schema.impl->od && schema.impl->od->supported && length >= 32) { + auto od_result = tl_od_parser().iterate(psv); + if (!od_result.error()) { + simdjson::ondemand::value root_val; + if (od_result.get_value().get(root_val) == SUCCESS) { + if (od_exec_plan(*schema.impl->od, root_val)) { + return true; + } + } + } + psv = get_free_padded_view(data, length, fallback); + } + + dom::element doc; + if (tl_dom_parser().parse(psv).get(doc) != SUCCESS) return false; + + if (!schema.impl->gen_plan.code.empty()) { + return cg_exec(schema.impl->gen_plan, schema.impl->gen_plan.code, doc); + } + + return validate_fast(schema.impl->root, doc, *schema.impl); +} + +bool is_valid_buf(const schema_ref& schema, const uint8_t* data, size_t length) { + if (!schema.impl || !schema.impl->root || !data || length == 0) return false; + + // Thread-local buffer with simdjson padding — reused across calls + thread_local std::string tl_buf; + const size_t needed = length + REQUIRED_PADDING; + if (tl_buf.size() < needed) tl_buf.resize(needed); + std::memcpy(tl_buf.data(), data, length); + std::memset(tl_buf.data() + length, 0, REQUIRED_PADDING); + + return is_valid_prepadded(schema, tl_buf.data(), length); +} + +} // namespace ata diff --git a/deps/ata/ata.gyp b/deps/ata/ata.gyp new file mode 100644 index 00000000000000..ab8e4225ed35b6 --- /dev/null +++ b/deps/ata/ata.gyp @@ -0,0 +1,40 @@ +{ + 'variables': { + 'ata_sources': [ + 'ata.cpp', + ] + }, + 'targets': [ + { + 'target_name': 'ata', + 'type': 'static_library', + 'include_dirs': ['.'], + 'direct_dependent_settings': { + 'include_dirs': ['.'], + }, + 'dependencies': [ + '../simdjson/simdjson.gyp:simdjson', + ], + 'defines': [ + 'ATA_NO_RE2', + ], + 'sources': [ + '<@(ata_sources)', + ], + 'conditions': [ + ['OS=="win"', { + 'msvs_settings': { + 'VCCLCompilerTool': { + 'AdditionalOptions': ['/std:c++20'], + }, + }, + }, { + 'cflags_cc': ['-std=c++20'], + 'xcode_settings': { + 'CLANG_CXX_LANGUAGE_STANDARD': 'c++20', + }, + }], + ], + }, + ] +} diff --git a/deps/ata/ata.h b/deps/ata/ata.h new file mode 100644 index 00000000000000..d121a9e2ba01fd --- /dev/null +++ b/deps/ata/ata.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#define ATA_VERSION "0.4.3" + +namespace ata { + +inline constexpr uint32_t VERSION_MAJOR = 0; +inline constexpr uint32_t VERSION_MINOR = 4; +inline constexpr uint32_t VERSION_REVISION = 3; + +inline constexpr std::string_view version() noexcept { + return "0.4.3"; +} + +enum class error_code : uint8_t { + ok = 0, + invalid_json, + invalid_schema, + type_mismatch, + required_property_missing, + additional_property_not_allowed, + enum_mismatch, + const_mismatch, + minimum_violation, + maximum_violation, + exclusive_minimum_violation, + exclusive_maximum_violation, + min_length_violation, + max_length_violation, + pattern_mismatch, + format_mismatch, + min_items_violation, + max_items_violation, + unique_items_violation, + min_properties_violation, + max_properties_violation, + multiple_of_violation, + all_of_failed, + any_of_failed, + one_of_failed, + not_failed, + ref_not_found, + if_then_else_failed, +}; + +struct validation_error { + error_code code; + std::string path; + std::string message; +}; + +struct validation_result { + bool valid; + std::vector errors; + + explicit operator bool() const noexcept { return valid; } +}; + +struct compiled_schema; + +struct schema_ref { + std::shared_ptr impl; + + explicit operator bool() const noexcept { return impl != nullptr; } +}; + +struct validate_options { + bool all_errors = true; // false = stop at first error (faster) +}; + +// Compile a JSON Schema string into an internal representation. +schema_ref compile(std::string_view schema_json); + +// Validate a JSON document against a compiled schema. +validation_result validate(const schema_ref& schema, std::string_view json, + const validate_options& opts = {}); + +// Validate a JSON document against a schema (compiles schema each time). +validation_result validate(std::string_view schema_json, + std::string_view json, + const validate_options& opts = {}); + +// Ultra-fast boolean validation — no error collection, no allocation. +// Input MUST have at least 64 bytes of padding after data (simdjson requirement). +// Use this when you only need true/false and can provide pre-padded input. +bool is_valid_prepadded(const schema_ref& schema, const char* data, size_t length); + +// Validate raw buffer — handles padding internally via thread-local copy. +// Use this when input doesn't have simdjson padding (e.g., from V8 TypedArray). +bool is_valid_buf(const schema_ref& schema, const uint8_t* data, size_t length); + +// Required padding size for is_valid_prepadded +inline constexpr size_t REQUIRED_PADDING = 64; + +} // namespace ata diff --git a/node.gypi b/node.gypi index 64250ed19837e4..bfcb6ac004c080 100644 --- a/node.gypi +++ b/node.gypi @@ -237,6 +237,10 @@ 'dependencies': [ 'deps/simdjson/simdjson.gyp:simdjson' ], }], + [ 'node_shared_simdjson=="false"', { + 'dependencies': [ 'deps/ata/ata.gyp:ata' ], + }], + [ 'node_shared_simdutf=="false" and node_use_bundled_v8!="false"', { 'dependencies': [ 'tools/v8_gypfiles/v8.gyp:simdutf' ], }], diff --git a/src/node_config_file.cc b/src/node_config_file.cc index ce89a51b7b97d3..fac26a0fd50900 100644 --- a/src/node_config_file.cc +++ b/src/node_config_file.cc @@ -1,5 +1,7 @@ #include "node_config_file.h" +#include "ata.h" #include "debug_utils-inl.h" +#include "node_config_schema.h" #include "simdjson.h" #include @@ -323,6 +325,32 @@ ParseResult ConfigReader::ParseConfig(const std::string_view& config_path) { } } + // Validate against JSON Schema. Skip additionalProperties errors + // because unknown namespaces are intentionally ignored. + { + auto schema = ata::compile(kNodeConfigSchema); + if (schema) { + auto result = ata::validate(schema, file_content); + if (!result.valid) { + for (const auto& err : result.errors) { + if (err.code != ata::error_code::additional_property_not_allowed) { + FPrintF( + stderr, "Invalid configuration in %s:\n", config_path.data()); + for (const auto& e : result.errors) { + if (e.code != ata::error_code::additional_property_not_allowed) { + FPrintF(stderr, + " %s: %s\n", + e.path.empty() ? "/" : e.path, + e.message); + } + } + return ParseResult::InvalidContent; + } + } + } + } + } + return ParseResult::Valid; } diff --git a/src/node_config_schema.h b/src/node_config_schema.h new file mode 100644 index 00000000000000..ae88d2cf5b5cd8 --- /dev/null +++ b/src/node_config_schema.h @@ -0,0 +1,1031 @@ +#ifndef SRC_NODE_CONFIG_SCHEMA_H_ +#define SRC_NODE_CONFIG_SCHEMA_H_ + +static constexpr const char kNodeConfigSchema[] = R"JSON( +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": false, + "required": [], + "properties": { + "$schema": { + "type": "string" + }, + "nodeOptions": { + "additionalProperties": false, + "required": [], + "properties": { + "addons": { + "type": "boolean", + "description": "disable loading native addons" + }, + "allow-addons": { + "type": "boolean", + "description": "allow use of addons when any permissions are set" + }, + "allow-child-process": { + "type": "boolean", + "description": "allow use of child process when any permissions are set" + }, + "allow-fs-read": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "allow permissions to read the filesystem" + }, + "allow-fs-write": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "allow permissions to write in the filesystem" + }, + "allow-inspector": { + "type": "boolean", + "description": "allow use of inspector when any permissions are set" + }, + "allow-net": { + "type": "boolean", + "description": "allow use of network when any permissions are set" + }, + "allow-wasi": { + "type": "boolean", + "description": "allow wasi when any permissions are set" + }, + "allow-worker": { + "type": "boolean", + "description": "allow worker threads when any permissions are set" + }, + "async-context-frame": { + "type": "boolean", + "description": "Improve AsyncLocalStorage performance with AsyncContextFrame" + }, + "conditions": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "additional user conditions for conditional exports and imports" + }, + "cpu-prof": { + "type": "boolean", + "description": "Start the V8 CPU profiler on start up, and write the CPU profile to disk before exit. If --cpu-prof-dir is not specified, write the profile to the current working directory." + }, + "cpu-prof-dir": { + "type": "string", + "description": "Directory where the V8 profiles generated by --cpu-prof will be placed. Does not affect --prof." + }, + "cpu-prof-interval": { + "type": "number", + "description": "specified sampling interval in microseconds for the V8 CPU profile generated with --cpu-prof. (default: 1000)" + }, + "cpu-prof-name": { + "type": "string", + "description": "specified file name of the V8 CPU profile generated with --cpu-prof" + }, + "debug-arraybuffer-allocations": { + "type": "boolean", + "description": "" + }, + "deprecation": { + "type": "boolean", + "description": "silence deprecation warnings" + }, + "diagnostic-dir": { + "type": "string", + "description": "set dir for all output files (default: current working directory)" + }, + "disable-proto": { + "type": "string", + "description": "disable Object.prototype.__proto__" + }, + "disable-sigusr1": { + "type": "boolean", + "description": "Disable inspector thread to be listening for SIGUSR1 signal" + }, + "disable-warning": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "silence specific process warnings" + }, + "disable-wasm-trap-handler": { + "type": "boolean", + "description": "Disable trap-handler-based WebAssembly bound checks. V8 will insert inline bound checks when compiling WebAssembly which may slow down performance." + }, + "dns-result-order": { + "type": "string", + "description": "set default value of verbatim in dns.lookup. Options are 'ipv4first' (IPv4 addresses are placed before IPv6 addresses) 'ipv6first' (IPv6 addresses are placed before IPv4 addresses) 'verbatim' (addresses are in the order the DNS resolver returned)" + }, + "enable-fips": { + "type": "boolean", + "description": "enable FIPS crypto at startup" + }, + "enable-source-maps": { + "type": "boolean", + "description": "Source Map V3 support for stack traces" + }, + "entry-url": { + "type": "boolean", + "description": "Treat the entrypoint as a URL" + }, + "experimental-addon-modules": { + "type": "boolean", + "description": "experimental import support for addons" + }, + "experimental-detect-module": { + "type": "boolean", + "description": "when ambiguous modules fail to evaluate because they contain ES module syntax, try again to evaluate them as ES modules" + }, + "experimental-eventsource": { + "type": "boolean", + "description": "experimental EventSource API" + }, + "experimental-global-navigator": { + "type": "boolean", + "description": "expose experimental Navigator API on the global scope" + }, + "experimental-import-meta-resolve": { + "type": "boolean", + "description": "experimental ES Module import.meta.resolve() parentURL support" + }, + "experimental-loader": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "use the specified module as a custom loader" + }, + "experimental-print-required-tla": { + "type": "boolean", + "description": "Print pending top-level await. If --require-module is true, evaluate asynchronous graphs loaded by `require()` but do not run the microtasks, in order to to find and print top-level await in the graph" + }, + "experimental-repl-await": { + "type": "boolean", + "description": "experimental await keyword support in REPL" + }, + "experimental-require-module": { + "type": "boolean", + "description": "Legacy alias for --require-module" + }, + "experimental-shadow-realm": { + "type": "boolean", + "description": "" + }, + "experimental-sqlite": { + "type": "boolean", + "description": "experimental node:sqlite module" + }, + "experimental-vm-modules": { + "type": "boolean", + "description": "experimental ES Module support in vm module" + }, + "experimental-websocket": { + "type": "boolean", + "description": "experimental WebSocket API" + }, + "experimental-webstorage": { + "type": "boolean", + "description": "experimental Web Storage API" + }, + "extra-info-on-fatal-exception": { + "type": "boolean", + "description": "hide extra information on fatal exception that causes exit" + }, + "force-async-hooks-checks": { + "type": "boolean", + "description": "disable checks for async_hooks" + }, + "force-context-aware": { + "type": "boolean", + "description": "disable loading non-context-aware addons" + }, + "force-fips": { + "type": "boolean", + "description": "force FIPS crypto (cannot be disabled)" + }, + "force-node-api-uncaught-exceptions-policy": { + "type": "boolean", + "description": "enforces 'uncaughtException' event on Node API asynchronous callbacks" + }, + "frozen-intrinsics": { + "type": "boolean", + "description": "experimental frozen intrinsics support" + }, + "global-search-paths": { + "type": "boolean", + "description": "disable global module search paths" + }, + "heap-prof": { + "type": "boolean", + "description": "Start the V8 heap profiler on start up, and write the heap profile to disk before exit. If --heap-prof-dir is not specified, write the profile to the current working directory." + }, + "heap-prof-dir": { + "type": "string", + "description": "Directory where the V8 heap profiles generated by --heap-prof will be placed." + }, + "heap-prof-interval": { + "type": "number", + "description": "specified sampling interval in bytes for the V8 heap profile generated with --heap-prof. (default: 512 * 1024)" + }, + "heap-prof-name": { + "type": "string", + "description": "specified file name of the V8 heap profile generated with --heap-prof" + }, + "heapsnapshot-near-heap-limit": { + "type": "number", + "description": "Generate heap snapshots whenever V8 is approaching the heap limit. No more than the specified number of heap snapshots will be generated." + }, + "heapsnapshot-signal": { + "type": "string", + "description": "Generate heap snapshot on specified signal" + }, + "icu-data-dir": { + "type": "string", + "description": "set ICU data load path to dir (overrides NODE_ICU_DATA) (note: linked-in ICU data is present)" + }, + "import": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "ES module to preload (option can be repeated)" + }, + "input-type": { + "type": "string", + "description": "set module type for string input" + }, + "insecure-http-parser": { + "type": "boolean", + "description": "use an insecure HTTP parser that accepts invalid HTTP headers" + }, + "inspect": { + "type": "boolean", + "description": "activate inspector on host:port (default: 127.0.0.1:9229)" + }, + "inspect-brk": { + "type": "boolean", + "description": "activate inspector on host:port and break at start of user script" + }, + "inspect-port": { + "type": "number", + "description": "set host:port for inspector" + }, + "inspect-publish-uid": { + "type": "string", + "description": "comma separated list of destinations for inspector uid(default: stderr,http)" + }, + "inspect-wait": { + "type": "boolean", + "description": "activate inspector on host:port and wait for debugger to be attached" + }, + "localstorage-file": { + "type": "string", + "description": "file used to persist localStorage data" + }, + "max-http-header-size": { + "type": "number", + "description": "set the maximum size of HTTP headers (default: 16384 (16KB))" + }, + "max-old-space-size-percentage": { + "type": "string", + "description": "set V8's max old space size as a percentage of available memory (e.g., '50%'). Takes precedence over --max-old-space-size." + }, + "network-family-autoselection": { + "type": "boolean", + "description": "Disable network address family autodetection algorithm" + }, + "network-family-autoselection-attempt-timeout": { + "type": "number", + "description": "Sets the default value for the network family autoselection attempt timeout." + }, + "node-snapshot": { + "type": "boolean", + "description": "" + }, + "openssl-config": { + "type": "string", + "description": "load OpenSSL configuration from the specified file (overrides OPENSSL_CONF)" + }, + "openssl-legacy-provider": { + "type": "boolean", + "description": "enable OpenSSL 3.0 legacy provider" + }, + "openssl-shared-config": { + "type": "boolean", + "description": "enable OpenSSL shared configuration" + }, + "pending-deprecation": { + "type": "boolean", + "description": "emit pending deprecation warnings" + }, + "permission": { + "type": "boolean", + "description": "enable the permission system" + }, + "preserve-symlinks": { + "type": "boolean", + "description": "preserve symbolic links when resolving" + }, + "preserve-symlinks-main": { + "type": "boolean", + "description": "preserve symbolic links when resolving the main module" + }, + "redirect-warnings": { + "type": "string", + "description": "write warnings to file instead of stderr" + }, + "report-compact": { + "type": "boolean", + "description": "output compact single-line JSON" + }, + "report-dir": { + "type": "string", + "description": "define custom report pathname. (default: current working directory)" + }, + "report-exclude-env": { + "type": "boolean", + "description": "Exclude environment variables when generating report (default: false)" + }, + "report-exclude-network": { + "type": "boolean", + "description": "exclude network interface diagnostics. (default: false)" + }, + "report-filename": { + "type": "string", + "description": "define custom report file name. (default: YYYYMMDD.HHMMSS.PID.SEQUENCE#.txt)" + }, + "report-on-fatalerror": { + "type": "boolean", + "description": "generate diagnostic report on fatal (internal) errors" + }, + "report-on-signal": { + "type": "boolean", + "description": "generate diagnostic report upon receiving signals" + }, + "report-signal": { + "type": "string", + "description": "causes diagnostic report to be produced on provided signal, unsupported in Windows. (default: SIGUSR2)" + }, + "report-uncaught-exception": { + "type": "boolean", + "description": "generate diagnostic report on uncaught exceptions" + }, + "require": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "CommonJS module to preload (option can be repeated)" + }, + "require-module": { + "type": "boolean", + "description": "Allow loading synchronous ES Modules in require()." + }, + "secure-heap": { + "type": "number", + "description": "total size of the OpenSSL secure heap" + }, + "secure-heap-min": { + "type": "number", + "description": "minimum allocation size from the OpenSSL secure heap" + }, + "snapshot-blob": { + "type": "string", + "description": "Path to the snapshot blob that's either the result of snapshotbuilding, or the blob that is used to restore the application state" + }, + "stack-trace-limit": { + "type": "number", + "description": "" + }, + "strip-types": { + "type": "boolean", + "description": "Type-stripping for TypeScript files." + }, + "test-coverage-branches": { + "type": "number", + "description": "the branch coverage minimum threshold" + }, + "test-coverage-exclude": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "exclude files from coverage report that match this glob pattern" + }, + "test-coverage-functions": { + "type": "number", + "description": "the function coverage minimum threshold" + }, + "test-coverage-include": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "include files in coverage report that match this glob pattern" + }, + "test-coverage-lines": { + "type": "number", + "description": "the line coverage minimum threshold" + }, + "test-global-setup": { + "type": "string", + "description": "specifies the path to the global setup file" + }, + "test-isolation": { + "type": "string", + "description": "configures the type of test isolation used in the test runner" + }, + "test-name-pattern": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "run tests whose name matches this regular expression" + }, + "test-only": { + "type": "boolean", + "description": "run tests with 'only' option set" + }, + "test-random-seed": { + "type": "number", + "description": "seed used to randomize test execution order" + }, + "test-randomize": { + "type": "boolean", + "description": "run tests in a random order" + }, + "test-reporter": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "report test output using the given reporter" + }, + "test-reporter-destination": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "report given reporter to the given destination" + }, + "test-rerun-failures": { + "type": "string", + "description": "specifies the path to the rerun state file" + }, + "test-shard": { + "type": "string", + "description": "run test at specific shard" + }, + "test-skip-pattern": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "run tests whose name do not match this regular expression" + }, + "throw-deprecation": { + "type": "boolean", + "description": "throw an exception on deprecations" + }, + "title": { + "type": "string", + "description": "the process title to use on startup" + }, + "tls-cipher-list": { + "type": "string", + "description": "use an alternative default TLS cipher list" + }, + "tls-keylog": { + "type": "string", + "description": "log TLS decryption keys to named file for traffic analysis" + }, + "tls-max-v1.2": { + "type": "boolean", + "description": "set default TLS maximum to TLSv1.2 (default: TLSv1.3)" + }, + "tls-max-v1.3": { + "type": "boolean", + "description": "set default TLS maximum to TLSv1.3 (default: TLSv1.3)" + }, + "tls-min-v1.0": { + "type": "boolean", + "description": "set default TLS minimum to TLSv1.0 (default: TLSv1.2)" + }, + "tls-min-v1.1": { + "type": "boolean", + "description": "set default TLS minimum to TLSv1.1 (default: TLSv1.2)" + }, + "tls-min-v1.2": { + "type": "boolean", + "description": "set default TLS minimum to TLSv1.2 (default: TLSv1.2)" + }, + "tls-min-v1.3": { + "type": "boolean", + "description": "set default TLS minimum to TLSv1.3 (default: TLSv1.2)" + }, + "trace-deprecation": { + "type": "boolean", + "description": "show stack traces on deprecations" + }, + "trace-env": { + "type": "boolean", + "description": "Print accesses to the environment variables" + }, + "trace-env-js-stack": { + "type": "boolean", + "description": "Print accesses to the environment variables and the JavaScript stack trace" + }, + "trace-env-native-stack": { + "type": "boolean", + "description": "Print accesses to the environment variables and the native stack trace" + }, + "trace-event-categories": { + "type": "string", + "description": "comma separated list of trace event categories to record" + }, + "trace-event-file-pattern": { + "type": "string", + "description": "Template string specifying the filepath for the trace-events data, it supports ${rotation} and ${pid}." + }, + "trace-exit": { + "type": "boolean", + "description": "show stack trace when an environment exits" + }, + "trace-promises": { + "type": "boolean", + "description": "show stack traces on promise initialization and resolution" + }, + "trace-require-module": { + "type": "string", + "description": "Print access to require(esm). Options are 'all' (print all usage) and 'no-node-modules' (excluding usage from the node_modules folder)" + }, + "trace-sigint": { + "type": "boolean", + "description": "enable printing JavaScript stacktrace on SIGINT" + }, + "trace-sync-io": { + "type": "boolean", + "description": "show stack trace when use of sync IO is detected after the first tick" + }, + "trace-tls": { + "type": "boolean", + "description": "prints TLS packet trace information to stderr" + }, + "trace-uncaught": { + "type": "boolean", + "description": "show stack traces for the `throw` behind uncaught exceptions" + }, + "trace-warnings": { + "type": "boolean", + "description": "show stack traces on process warnings" + }, + "track-heap-objects": { + "type": "boolean", + "description": "track heap object allocations for heap snapshots" + }, + "unhandled-rejections": { + "type": "string", + "description": "define unhandled rejections behavior. Options are 'strict' (always raise an error), 'throw' (raise an error unless 'unhandledRejection' hook is set), 'warn' (log a warning), 'none' (silence warnings), 'warn-with-error-code' (log a warning and set exit code 1 unless 'unhandledRejection' hook is set). (default: throw)" + }, + "use-bundled-ca": { + "type": "boolean", + "description": "use bundled CA store (default)" + }, + "use-env-proxy": { + "type": "boolean", + "description": "parse proxy settings from HTTP_PROXY/HTTPS_PROXY/NO_PROXYenvironment variables and apply the setting in global HTTP/HTTPS clients" + }, + "use-largepages": { + "type": "string", + "description": "Map the Node.js static code to large pages. Options are 'off' (the default value, meaning do not map), 'on' (map and ignore failure, reporting it to stderr), or 'silent' (map and silently ignore failure)" + }, + "use-openssl-ca": { + "type": "boolean", + "description": "use OpenSSL's default CA store" + }, + "use-system-ca": { + "type": "boolean", + "description": "use system's CA store" + }, + "v8-pool-size": { + "type": "number", + "description": "set V8's thread pool size" + }, + "verify-base-objects": { + "type": "boolean", + "description": "" + }, + "warnings": { + "type": "boolean", + "description": "silence all process warnings" + }, + "watch": { + "type": "boolean", + "description": "run in watch mode" + }, + "watch-kill-signal": { + "type": "string", + "description": "kill signal to send to the process on watch mode restarts(default: SIGTERM)" + }, + "watch-path": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "path to watch" + }, + "watch-preserve-output": { + "type": "boolean", + "description": "preserve outputs on watch mode restart" + }, + "zero-fill-buffers": { + "type": "boolean", + "description": "automatically zero-fill all newly allocated Buffer instances" + } + }, + "type": "object" + }, + "permission": { + "type": "object", + "additionalProperties": false, + "required": [], + "properties": { + "allow-addons": { + "type": "boolean", + "description": "allow use of addons when any permissions are set" + }, + "allow-child-process": { + "type": "boolean", + "description": "allow use of child process when any permissions are set" + }, + "allow-fs-read": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "allow permissions to read the filesystem" + }, + "allow-fs-write": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "allow permissions to write in the filesystem" + }, + "allow-inspector": { + "type": "boolean", + "description": "allow use of inspector when any permissions are set" + }, + "allow-net": { + "type": "boolean", + "description": "allow use of network when any permissions are set" + }, + "allow-wasi": { + "type": "boolean", + "description": "allow wasi when any permissions are set" + }, + "allow-worker": { + "type": "boolean", + "description": "allow worker threads when any permissions are set" + }, + "permission": { + "type": "boolean", + "description": "enable the permission system" + } + } + }, + "test": { + "type": "object", + "additionalProperties": false, + "required": [], + "properties": { + "experimental-test-coverage": { + "type": "boolean", + "description": "enable code coverage in the test runner" + }, + "experimental-test-module-mocks": { + "type": "boolean", + "description": "enable module mocking in the test runner" + }, + "test": { + "type": "boolean", + "description": "launch test runner on startup" + }, + "test-concurrency": { + "type": "number", + "description": "specify test runner concurrency" + }, + "test-coverage-branches": { + "type": "number", + "description": "the branch coverage minimum threshold" + }, + "test-coverage-exclude": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "exclude files from coverage report that match this glob pattern" + }, + "test-coverage-functions": { + "type": "number", + "description": "the function coverage minimum threshold" + }, + "test-coverage-include": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "include files in coverage report that match this glob pattern" + }, + "test-coverage-lines": { + "type": "number", + "description": "the line coverage minimum threshold" + }, + "test-force-exit": { + "type": "boolean", + "description": "force test runner to exit upon completion" + }, + "test-global-setup": { + "type": "string", + "description": "specifies the path to the global setup file" + }, + "test-isolation": { + "type": "string", + "description": "configures the type of test isolation used in the test runner" + }, + "test-name-pattern": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "run tests whose name matches this regular expression" + }, + "test-only": { + "type": "boolean", + "description": "run tests with 'only' option set" + }, + "test-random-seed": { + "type": "number", + "description": "seed used to randomize test execution order" + }, + "test-randomize": { + "type": "boolean", + "description": "run tests in a random order" + }, + "test-reporter": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "report test output using the given reporter" + }, + "test-reporter-destination": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "report given reporter to the given destination" + }, + "test-rerun-failures": { + "type": "string", + "description": "specifies the path to the rerun state file" + }, + "test-shard": { + "type": "string", + "description": "run test at specific shard" + }, + "test-skip-pattern": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "run tests whose name do not match this regular expression" + }, + "test-timeout": { + "type": "number", + "description": "specify test runner timeout" + }, + "test-update-snapshots": { + "type": "boolean", + "description": "regenerate test snapshots" + } + } + }, + "watch": { + "type": "object", + "additionalProperties": false, + "required": [], + "properties": { + "watch": { + "type": "boolean", + "description": "run in watch mode" + }, + "watch-kill-signal": { + "type": "string", + "description": "kill signal to send to the process on watch mode restarts(default: SIGTERM)" + }, + "watch-path": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + } + ], + "description": "path to watch" + }, + "watch-preserve-output": { + "type": "boolean", + "description": "preserve outputs on watch mode restart" + } + } + } + }, + "type": "object" +} +)JSON"; + +#endif // SRC_NODE_CONFIG_SCHEMA_H_ diff --git a/src/node_metadata.cc b/src/node_metadata.cc index 7343c62c197503..19279d5efec95b 100644 --- a/src/node_metadata.cc +++ b/src/node_metadata.cc @@ -3,6 +3,7 @@ #include "ada.h" #include "amaro_version.h" #include "ares.h" +#include "ata.h" #include "brotli/encode.h" #include "llhttp.h" #include "merve.h" @@ -171,6 +172,7 @@ Metadata::Versions::Versions() { #endif // HAVE_SQLITE ada = ADA_VERSION; nbytes = NBYTES_VERSION; + ata = ATA_VERSION; } std::array, diff --git a/src/node_metadata.h b/src/node_metadata.h index 8d38d840c1fd3d..61f5f1a1704dbd 100644 --- a/src/node_metadata.h +++ b/src/node_metadata.h @@ -64,7 +64,8 @@ namespace node { V(nghttp3) \ NODE_VERSIONS_KEY_AMARO(V) \ NODE_VERSIONS_KEY_UNDICI(V) \ - V(merve) + V(merve) \ + V(ata) #if HAVE_OPENSSL #define NODE_VERSIONS_KEY_CRYPTO(V) V(openssl) V(ncrypto) diff --git a/test/parallel/test-ata-version.js b/test/parallel/test-ata-version.js new file mode 100644 index 00000000000000..0dd984d5234aa5 --- /dev/null +++ b/test/parallel/test-ata-version.js @@ -0,0 +1,9 @@ +'use strict'; + +require('../common'); +const assert = require('assert'); + +// Verify that ata-validator is available in process.versions +assert.ok(process.versions.ata, 'process.versions.ata should be defined'); +assert.match(process.versions.ata, /^\d+\.\d+\.\d+$/, + 'process.versions.ata should be a semver string'); diff --git a/test/parallel/test-process-versions.js b/test/parallel/test-process-versions.js index c084c582b95e87..f5b19faa86928c 100644 --- a/test/parallel/test-process-versions.js +++ b/test/parallel/test-process-versions.js @@ -25,6 +25,7 @@ const expected_keys = [ 'ada', 'nbytes', 'merve', + 'ata', ];