diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum.rb index 8ff361ee2..8635d32ba 100644 --- a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum.rb +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum.rb @@ -7,6 +7,7 @@ # frozen_string_literal: true require "delegate" +require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/value_semantics" require "elastic_graph/schema_definition/indexing/field_type/enum" module ElasticGraph @@ -19,6 +20,10 @@ module FieldType # # @private class Enum < DelegateClass(ElasticGraph::SchemaDefinition::Indexing::FieldType::Enum) + prepend ValueSemantics + + # @dynamic __getobj__ + # @return [Hash] additional ElasticGraph metadata to put in the JSON schema for this enum type. def json_schema_field_metadata_by_field_name {} diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar.rb index e30a1969c..472428a7b 100644 --- a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar.rb +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar.rb @@ -7,6 +7,7 @@ # frozen_string_literal: true require "delegate" +require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/value_semantics" require "elastic_graph/schema_definition/indexing/field_type/scalar" require "elastic_graph/support/hash_util" @@ -19,6 +20,10 @@ module FieldType # # @private class Scalar < DelegateClass(ElasticGraph::SchemaDefinition::Indexing::FieldType::Scalar) + prepend ValueSemantics + + # @dynamic __getobj__ + # @return [Hash] empty hash, as scalar types have no subfields def json_schema_field_metadata_by_field_name {} diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rb index 526467cda..e1233977a 100644 --- a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rb +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rb @@ -7,6 +7,7 @@ # frozen_string_literal: true require "delegate" +require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/value_semantics" require "elastic_graph/schema_definition/indexing/field_type/union" module ElasticGraph @@ -18,6 +19,10 @@ module FieldType # # @private class Union < DelegateClass(ElasticGraph::SchemaDefinition::Indexing::FieldType::Union) + prepend ValueSemantics + + # @dynamic __getobj__ + # @return [Hash] empty hash, as union types have no subfields def json_schema_field_metadata_by_field_name {} diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/value_semantics.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/value_semantics.rb new file mode 100644 index 000000000..fd9b52c12 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/value_semantics.rb @@ -0,0 +1,52 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + module FieldType + # Provides value-equality semantics for the JSON-schema-aware field type wrappers that delegate + # to a wrapped core field type without adding any state of their own (`Scalar`, `Enum`, `Union`). + # + # `DelegateClass` defines `==` so that it unwraps only the *left* operand before comparing, which + # means `wrapper == equivalent_wrapper` compares the wrapped object against the right-hand + # *wrapper* (rather than against its wrapped object) and is therefore never equal--even though + # `hash` delegates to the wrapped object and reports them equal. That inconsistency breaks the + # `eql?`/`hash` contract and causes `Set`/`Hash`/`uniq` de-duplication to treat equivalent + # wrappers as distinct. Here we unwrap both sides so two wrappers around equal objects compare + # equal, keeping `==`/`eql?`/`hash` consistent. (`FieldType::Object` solves the same problem with + # its own implementation because it carries additional JSON schema state in its equality.) + # + # @private + module ValueSemantics + # @param other [Object] the object to compare against + # @return [Boolean] true when `other` wraps an equal field type (or is the wrapped field type itself) + def ==(other) + case other + when ValueSemantics + __getobj__ == other.__getobj__ + else + super + end + end + + def eql?(other) + self == other + end + + # @return [Integer] a hash code derived from the wrapped field type + def hash + __getobj__.hash + end + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum.rbs index da9a29da1..de12fab64 100644 --- a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum.rbs +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum.rbs @@ -8,9 +8,12 @@ module ElasticGraph end class Enum < EnumSupertype + include ValueSemantics + def json_schema_field_metadata_by_field_name: () -> ::Hash[::String, JSONSchemaFieldMetadata] def format_field_json_schema_customizations: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] def to_json_schema: () -> ::Hash[::String, untyped] + def __getobj__: () -> ::ElasticGraph::SchemaDefinition::Indexing::FieldType::Enum end end end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar.rbs index ce85b0b8c..dec7efc12 100644 --- a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar.rbs +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar.rbs @@ -8,9 +8,12 @@ module ElasticGraph end class Scalar < ScalarSupertype + include ValueSemantics + def json_schema_field_metadata_by_field_name: () -> ::Hash[::String, JSONSchemaFieldMetadata] def format_field_json_schema_customizations: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] def to_json_schema: () -> ::Hash[::String, untyped] + def __getobj__: () -> ::ElasticGraph::SchemaDefinition::Indexing::FieldType::Scalar end end end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rbs index 5fbae3383..bf51feac6 100644 --- a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rbs +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rbs @@ -8,11 +8,14 @@ module ElasticGraph end class Union < UnionSupertype + include ValueSemantics + def self.new: (::ElasticGraph::SchemaDefinition::Indexing::FieldType::Union) -> instance def json_schema_field_metadata_by_field_name: () -> ::Hash[::String, JSONSchemaFieldMetadata] def format_field_json_schema_customizations: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] def to_json_schema: () -> ::Hash[::String, untyped] + def __getobj__: () -> ::ElasticGraph::SchemaDefinition::Indexing::FieldType::Union end end end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/value_semantics.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/value_semantics.rbs new file mode 100644 index 000000000..f3d7741c3 --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/value_semantics.rbs @@ -0,0 +1,22 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + module FieldType + module ValueSemantics : _Delegator + def ==: (untyped other) -> bool + def eql?: (untyped other) -> bool + def hash: () -> ::Integer + end + + interface _Delegator + def __getobj__: () -> untyped + + # Provided by `DelegateClass`; `ValueSemantics#==` calls `super` to fall back to it. + def ==: (untyped other) -> bool + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/spec/integration/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension_spec.rb b/elasticgraph-json_ingestion/spec/integration/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension_spec.rb new file mode 100644 index 000000000..079f348b4 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/integration/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension_spec.rb @@ -0,0 +1,653 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/api_extension" +require "elastic_graph/schema_definition/rake_tasks" +require "yaml" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + RSpec.describe SchemaArtifactManagerExtension, :in_temp_dir, :rake_task do + after do + Thread.current[:eg_schema_load_count] = nil + end + + it "throws an error if the json_schemas artifact is (attempted to be) changed without json_schema_version being bumped" do + write_elastic_graph_schema_def_code(json_schema_version: 1) + expect_all_artifacts_out_of_date_because_they_havent_been_dumped + + # Should succeed, for first artifact. + expect { + output = run_rake("schema_artifacts:dump") + expect(output.lines).to include( + a_string_including("Dumped", JSON_SCHEMAS_FILE), + a_string_including("Dumped", versioned_json_schema_file(1)) + ) + }.to change { read_artifact(JSON_SCHEMAS_FILE) } + .from(a_falsy_value) + .to(a_string_including("\njson_schema_version: 1\n")) + .and change { read_artifact(versioned_json_schema_file(1)) } + .from(a_falsy_value) + .to(a_string_including("\njson_schema_version: 1\n")) + + expect_up_to_date_artifacts + + write_elastic_graph_schema_def_code(json_schema_version: 2) + + # Should succeed, it is ok to update the schema_version without underlying contents changing. + expect { + output = run_rake("schema_artifacts:dump") + expect(output.lines).to include( + a_string_including("Dumped", JSON_SCHEMAS_FILE), + a_string_including("Dumped", versioned_json_schema_file(2)) + ) + }.to change { read_artifact(JSON_SCHEMAS_FILE) } + .from(a_string_including("\njson_schema_version: 1")) + .to(a_string_including("\njson_schema_version: 2")) + .and change { read_artifact(versioned_json_schema_file(2)) } + .from(a_falsy_value) + .to(a_string_including("\njson_schema_version: 2\n")) + + write_elastic_graph_schema_def_code(component_suffix: "2", json_schema_version: 2, component_extras: "t.renamed_from 'Component'") + expect_out_of_date_artifacts + + expect { + run_rake("schema_artifacts:dump") + }.to abort_with a_string_including( + "A change has been attempted to `json_schemas.yaml`", + "`schema.json_schema_version 3`" + ).and matching(json_schema_version_setter_location_regex) + + # Still out of date. + expect_out_of_date_artifacts + + # Decreasing the json_schema_version should also result in a failure. + write_elastic_graph_schema_def_code(component_suffix: "2", json_schema_version: 1, component_extras: "t.renamed_from 'Component'") + expect_out_of_date_artifacts + + expect { + run_rake("schema_artifacts:dump") + }.to abort_with a_string_including( + "A change has been attempted to `json_schemas.yaml`", + "`schema.json_schema_version 3`" + ).and matching(json_schema_version_setter_location_regex) + + write_elastic_graph_schema_def_code(component_suffix: "2", json_schema_version: 3, component_extras: "t.renamed_from 'Component'") + + # Now dump should succeed, as schema_version has been bumped. + expect { + output = run_rake("schema_artifacts:dump") + expect(output.lines).to include( + a_string_including("Dumped", JSON_SCHEMAS_FILE), + a_string_including("Dumped", versioned_json_schema_file(3)) + ) + }.to change { read_artifact(JSON_SCHEMAS_FILE) } + .from(a_string_including("\njson_schema_version: 2")) + .to(a_string_including("\njson_schema_version: 3")) + .and change { read_artifact(versioned_json_schema_file(3)) } + .from(a_falsy_value) + .to(a_string_including("\njson_schema_version: 3\n")) + + # Should be able to run `schema_artifacts:dump` idempotently. + output = run_rake("schema_artifacts:dump") + expect(output.lines).to include( + a_string_including("is already up to date", JSON_SCHEMAS_FILE), + a_string_including("is already up to date", versioned_json_schema_file(3)) + ) + + write_elastic_graph_schema_def_code(component_suffix: "3", json_schema_version: 3, component_extras: "t.renamed_from 'Component'") + expect_out_of_date_artifacts + + expect { + run_rake("schema_artifacts:dump") + }.to abort_with a_string_including( + "A change has been attempted to `json_schemas.yaml`", + "`schema.json_schema_version 4`" + ).and matching(json_schema_version_setter_location_regex) + + write_elastic_graph_schema_def_code( + component_suffix: "3", + json_schema_version: 3, + component_extras: "t.renamed_from 'Component'", + enforce_json_schema_version: false + ) + + expect { + output = run_rake("schema_artifacts:dump") + expect(output.lines).to include( + a_string_including("Dumped", JSON_SCHEMAS_FILE), + a_string_including("Dumped", versioned_json_schema_file(3)) + ) + }.to change { read_artifact(JSON_SCHEMAS_FILE) } + .and change { read_artifact(versioned_json_schema_file(3)) } + end + + it "dumps the ElasticGraph JSON schema metadata only on the internal versioned JSON schema, omitting it from the public copy" do + write_elastic_graph_schema_def_code(json_schema_version: 1) + run_rake("schema_artifacts:dump") + + expect(::YAML.safe_load(read_artifact(JSON_SCHEMAS_FILE)).dig("$defs", "Component", "properties", "id")).to eq( + json_schema_for_keyword_type("ID") + ) + + expect(::YAML.safe_load(read_artifact(versioned_json_schema_file(1))).dig("$defs", "Component", "properties", "id")).to eq( + json_schema_for_keyword_type("ID", { + "ElasticGraph" => { + "type" => "ID!", + "nameInIndex" => "id" + } + }) + ) + end + + it "keeps the ElasticGraph JSON schema metadata up-to-date on all versioned JSON schemas" do + write_elastic_graph_schema_def_code(json_schema_version: 1) + run_rake("schema_artifacts:dump") + + expect(::YAML.safe_load(read_artifact(versioned_json_schema_file(1))).dig("$defs", "Component", "properties", "name")).to eq( + json_schema_for_keyword_type("String", { + "ElasticGraph" => { + "type" => "String!", + "nameInIndex" => "name" + } + }) + ) + + # Here we add a new field `another: String` + write_elastic_graph_schema_def_code(json_schema_version: 2, component_name_extras: "\nt.field 'another', 'String!'") + run_rake("schema_artifacts:dump") + + # It's not added to v1.yaml... + loaded_v1 = ::YAML.safe_load(read_artifact(versioned_json_schema_file(1))) + expect(loaded_v1.dig("$defs", "Component", "properties", "name")).to eq( + json_schema_for_keyword_type("String", { + "ElasticGraph" => { + "type" => "String!", + "nameInIndex" => "name" + } + }) + ) + expect(loaded_v1.dig("$defs", "Component", "properties", "another")).to eq(nil) + + # ..but is added to v2.yaml. + loaded_v2 = ::YAML.safe_load(read_artifact(versioned_json_schema_file(2))) + expect(loaded_v2.dig("$defs", "Component", "properties", "name")).to eq( + json_schema_for_keyword_type("String", { + "ElasticGraph" => { + "type" => "String!", + "nameInIndex" => "name" + } + }) + ) + expect(loaded_v2.dig("$defs", "Component", "properties", "another")).to eq( + json_schema_for_keyword_type("String", { + "ElasticGraph" => { + "type" => "String!", + "nameInIndex" => "another" + } + }) + ) + + # Here we keep the newly added field `another: String` and also change the `name_in_index` of `name`. + write_elastic_graph_schema_def_code(json_schema_version: 2, component_name_extras: ", name_in_index: 'name2'\nt.field 'another', 'String!'") + run_rake("schema_artifacts:dump") + + # The `name_in_index` for `name` should be changed to `name2` in the v1 schema... + loaded_v1 = ::YAML.safe_load(read_artifact(versioned_json_schema_file(1))) + expect(loaded_v1.dig("$defs", "Component", "properties", "name")).to eq( + json_schema_for_keyword_type("String", { + "ElasticGraph" => { + "type" => "String!", + "nameInIndex" => "name2" + } + }) + ) + expect(loaded_v1.dig("$defs", "Component", "properties", "another")).to eq(nil) + + # ...and in the v2 schema. + loaded_v2 = ::YAML.safe_load(read_artifact(versioned_json_schema_file(2))) + expect(loaded_v2.dig("$defs", "Component", "properties", "name")).to eq( + json_schema_for_keyword_type("String", { + "ElasticGraph" => { + "type" => "String!", + "nameInIndex" => "name2" + } + }) + ) + expect(loaded_v2.dig("$defs", "Component", "properties", "another")).to eq( + json_schema_for_keyword_type("String", { + "ElasticGraph" => { + "type" => "String!", + "nameInIndex" => "another" + } + }) + ) + + # Here we add a different new field (`ordinal: Int!`), without bumping the version (and using `enforce_json_schema_version: false` + # to not have to bump the version)... + write_elastic_graph_schema_def_code( + json_schema_version: 2, + component_name_extras: "\nt.field 'ordinal', 'Int!'", + enforce_json_schema_version: false + ) + run_rake("schema_artifacts:dump") + + # It should not be added to the v1 schema... + loaded_v1 = ::YAML.safe_load(read_artifact(versioned_json_schema_file(1))) + expect(loaded_v1.dig("$defs", "Component", "properties", "ordinal")).to eq(nil) + + # ...but it should be added to the v2 schema. + loaded_v2 = ::YAML.safe_load(read_artifact(versioned_json_schema_file(2))) + expect(loaded_v2.dig("$defs", "Component", "properties", "ordinal")).to eq({ + "$ref" => "#/$defs/Int", + "ElasticGraph" => {"type" => "Int!", "nameInIndex" => "ordinal"} + }) + end + + it "gives the user a clear error when there is ambiguity about what to do with a renamed or deleted field" do + # Verify the error message with 1 old JSON schema version (v8). + write_elastic_graph_schema_def_code(json_schema_version: 8) + run_rake("schema_artifacts:dump") + write_elastic_graph_schema_def_code(json_schema_version: 9, omit_component_name_field: true) + expect { run_rake("schema_artifacts:dump") }.to abort_with <<~EOS + The `Component.name` field (which existed in JSON schema version 8) no longer exists in the current schema definition. + ElasticGraph cannot guess what it should do with this field's data when ingesting events at this old version. + To continue, do one of the following: + + 1. If the `Component.name` field has been renamed, indicate this by calling `field.renamed_from "name"` on the renamed field. + 2. If the `Component.name` field has been dropped, indicate this by calling `type.deleted_field "name"` on the `Component` type. + 3. Alternately, if no publishers or in-flight events use JSON schema version 8, delete its file from `json_schemas_by_version`, and no further changes are required. + EOS + + # Verify the error message with 2 old JSON schema version (v8 and v9). + # The grammar/phrasing is adjusted slightly (e.g. "versions 8 and 9"). + write_elastic_graph_schema_def_code(json_schema_version: 9) + run_rake("schema_artifacts:dump") + write_elastic_graph_schema_def_code(json_schema_version: 10, omit_component_name_field: true) + expect { run_rake("schema_artifacts:dump") }.to abort_with <<~EOS + The `Component.name` field (which existed in JSON schema versions 8 and 9) no longer exists in the current schema definition. + ElasticGraph cannot guess what it should do with this field's data when ingesting events at these old versions. + To continue, do one of the following: + + 1. If the `Component.name` field has been renamed, indicate this by calling `field.renamed_from "name"` on the renamed field. + 2. If the `Component.name` field has been dropped, indicate this by calling `type.deleted_field "name"` on the `Component` type. + 3. Alternately, if no publishers or in-flight events use JSON schema versions 8 or 9, delete their files from `json_schemas_by_version`, and no further changes are required. + EOS + + # Verify the error message with 3 old JSON schema version (v8, v9, and v10). + # The grammar/phrasing is adjusted slightly (e.g. "versions 8, 9, and 10"). + write_elastic_graph_schema_def_code(json_schema_version: 10) + run_rake("schema_artifacts:dump") + write_elastic_graph_schema_def_code(json_schema_version: 11, omit_component_name_field: true) + expect { run_rake("schema_artifacts:dump") }.to abort_with <<~EOS + The `Component.name` field (which existed in JSON schema versions 8, 9, and 10) no longer exists in the current schema definition. + ElasticGraph cannot guess what it should do with this field's data when ingesting events at these old versions. + To continue, do one of the following: + + 1. If the `Component.name` field has been renamed, indicate this by calling `field.renamed_from "name"` on the renamed field. + 2. If the `Component.name` field has been dropped, indicate this by calling `type.deleted_field "name"` on the `Component` type. + 3. Alternately, if no publishers or in-flight events use JSON schema versions 8, 9, or 10, delete their files from `json_schemas_by_version`, and no further changes are required. + EOS + + # Demonstrate that these issues can be solved by each of the 3 options given. + # First, demonstrate indicating the field has been renamed. + write_elastic_graph_schema_def_code(json_schema_version: 11, omit_component_name_field: true, component_extras: "t.field('full_name', 'String') { |f| f.renamed_from 'name' }") + run_rake("schema_artifacts:dump") + delete_artifact(JSON_SCHEMAS_FILE) # so it doesn't force us to increment the version to 5 + + # Next, demonstrate indicating the field has been deleted. + write_elastic_graph_schema_def_code(json_schema_version: 11, omit_component_name_field: true, component_extras: "t.deleted_field 'name'") + run_rake("schema_artifacts:dump") + + # Finally, demonstrate deleting the old JSON schema version artifacts + delete_artifact(versioned_json_schema_file(8)) + delete_artifact(versioned_json_schema_file(9)) + delete_artifact(versioned_json_schema_file(10)) + write_elastic_graph_schema_def_code(json_schema_version: 11, omit_component_name_field: true) + run_rake("schema_artifacts:dump") + end + + it "gives the user a clear error when there is ambiguity about what to do with a renamed or deleted type" do + # Verify the error message with 1 old JSON schema version (v1). + write_elastic_graph_schema_def_code(json_schema_version: 1) + run_rake("schema_artifacts:dump") + write_elastic_graph_schema_def_code(json_schema_version: 2, component_suffix: "2") + expect { run_rake("schema_artifacts:dump") }.to abort_with <<~EOS + The `Component` type (which existed in JSON schema version 1) no longer exists in the current schema definition. + ElasticGraph cannot guess what it should do with this type's data when ingesting events at this old version. + To continue, do one of the following: + + 1. If the `Component` type has been renamed, indicate this by calling `type.renamed_from "Component"` on the renamed type. + 2. If the `Component` type has been dropped, indicate this by calling `schema.deleted_type "Component"` on the schema. + 3. Alternately, if no publishers or in-flight events use JSON schema version 1, delete its file from `json_schemas_by_version`, and no further changes are required. + EOS + + # Verify the error message with 2 old JSON schema version (v1 and v2). + # The grammar/phrasing is adjusted slightly (e.g. "versions 1 and 2"). + write_elastic_graph_schema_def_code(json_schema_version: 2) + run_rake("schema_artifacts:dump") + write_elastic_graph_schema_def_code(json_schema_version: 3, component_suffix: "2") + expect { run_rake("schema_artifacts:dump") }.to abort_with <<~EOS + The `Component` type (which existed in JSON schema versions 1 and 2) no longer exists in the current schema definition. + ElasticGraph cannot guess what it should do with this type's data when ingesting events at these old versions. + To continue, do one of the following: + + 1. If the `Component` type has been renamed, indicate this by calling `type.renamed_from "Component"` on the renamed type. + 2. If the `Component` type has been dropped, indicate this by calling `schema.deleted_type "Component"` on the schema. + 3. Alternately, if no publishers or in-flight events use JSON schema versions 1 or 2, delete their files from `json_schemas_by_version`, and no further changes are required. + EOS + + # Verify the error message with 3 old JSON schema version (v1, v2, and v3). + # The grammar/phrasing is adjusted slightly (e.g. "versions 1, 2, and 3"). + write_elastic_graph_schema_def_code(json_schema_version: 3) + run_rake("schema_artifacts:dump") + write_elastic_graph_schema_def_code(json_schema_version: 4, component_suffix: "2") + expect { run_rake("schema_artifacts:dump") }.to abort_with <<~EOS + The `Component` type (which existed in JSON schema versions 1, 2, and 3) no longer exists in the current schema definition. + ElasticGraph cannot guess what it should do with this type's data when ingesting events at these old versions. + To continue, do one of the following: + + 1. If the `Component` type has been renamed, indicate this by calling `type.renamed_from "Component"` on the renamed type. + 2. If the `Component` type has been dropped, indicate this by calling `schema.deleted_type "Component"` on the schema. + 3. Alternately, if no publishers or in-flight events use JSON schema versions 1, 2, or 3, delete their files from `json_schemas_by_version`, and no further changes are required. + EOS + + # Demonstrate that these issues can be solved by each of the 3 options given. + # First, demonstrate indicating the type has been renamed. + write_elastic_graph_schema_def_code(json_schema_version: 4, component_suffix: "2", component_extras: "t.renamed_from 'Component'") + run_rake("schema_artifacts:dump") + delete_artifact(JSON_SCHEMAS_FILE) # so it doesn't force us to increment the version to 5 + + # Next, demonstrate indicating the type has been deleted. + write_elastic_graph_schema_def_code(json_schema_version: 4, component_suffix: "2", component_extras: "schema.deleted_type 'Component'") + run_rake("schema_artifacts:dump") + + # Finally, demonstrate deleting the old JSON schema version artifacts + delete_artifact(versioned_json_schema_file(1)) + delete_artifact(versioned_json_schema_file(2)) + delete_artifact(versioned_json_schema_file(3)) + write_elastic_graph_schema_def_code(json_schema_version: 4, component_suffix: "2") + run_rake("schema_artifacts:dump") + end + + it "warns if there are `deleted_*` or `renamed_from` calls that are not needed so the user knows they can remove them" do + ::File.write("schema.rb", <<~EOS) + ElasticGraph.define_schema do |schema| + schema.json_schema_version 1 + schema.deleted_type "SomeType" + + schema.object_type "Widget" do |t| + t.renamed_from "Widget2" + t.deleted_field "name" + t.field "description", "String" do |f| + f.renamed_from "old_description" + end + t.renamed_from "Widget3" + + t.field "id", "ID" + t.index "widgets" + end + end + EOS + + output = run_rake("schema_artifacts:dump") + expect(output.split("\n").first(9).join("\n")).to eq(<<~EOS.strip) + The schema definition has 5 unneeded reference(s) to deprecated schema elements. These can all be safely deleted: + + 1. `schema.deleted_type "SomeType"` at schema.rb:3 + 2. `type.renamed_from "Widget2"` at schema.rb:6 + 3. `type.deleted_field "name"` at schema.rb:7 + 4. `field.renamed_from "old_description"` at schema.rb:9 + 5. `type.renamed_from "Widget3"` at schema.rb:11 + + Dumped schema artifact to `config/schema/artifacts/datastore_config.yaml`. + EOS + end + + it "gives a clear error if excess `deleted_*` or `renamed_from` calls create a conflict" do + ::File.write("schema.rb", <<~EOS) + ElasticGraph.define_schema do |schema| + schema.json_schema_version 1 + schema.deleted_type "Widget" + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.index "widgets" + + t.field "token", "ID" do |f| + f.renamed_from "id" + end + t.deleted_field "id" + end + end + EOS + + expect { + run_rake("schema_artifacts:dump") + }.to abort_with(<<~EOS) + The schema definition of `Widget` has conflicts. To resolve the conflict, remove the unneeded definitions from the following: + + 1. `schema.deleted_type "Widget"` at schema.rb:3 + + + The schema definition of `Widget.id` has conflicts. To resolve the conflict, remove the unneeded definitions from the following: + + 1. `field.renamed_from "id"` at schema.rb:10 + 2. `type.deleted_field "id"` at schema.rb:12 + EOS + end + + it "does not allow a routing or rollover field to be deleted since we cannot index documents without values for those fields" do + ::File.write("schema.rb", <<~EOS) + ElasticGraph.define_schema do |schema| + schema.json_schema_version 1 + + schema.object_type "Embedded" do |t| + t.field "workspace_id", "ID" + t.field "created_at", "DateTime" + end + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "embedded", "Embedded" + t.index "widgets" do |i| + i.route_with "embedded.workspace_id" + i.rollover :yearly, "embedded.created_at" + end + end + end + EOS + + run_rake("schema_artifacts:dump") + + ::File.write("schema.rb", <<~EOS) + ElasticGraph.define_schema do |schema| + schema.json_schema_version 2 + + schema.object_type "Embedded" do |t| + t.field "workspace_id2", "ID", name_in_index: "workspace_id" + t.deleted_field "workspace_id" + + t.field "created_at2", "DateTime", name_in_index: "created_at" + t.deleted_field "created_at" + end + + schema.object_type "Widget" do |t| + t.field "id", "ID" + t.field "embedded", "Embedded" + t.index "widgets" do |i| + i.route_with "embedded.workspace_id2" + i.rollover :yearly, "embedded.created_at2" + end + end + end + EOS + + expect { run_rake("schema_artifacts:dump") }.to abort_with(<<~EOS) + JSON schema version 1 has no field that maps to the routing field path of `Widget.embedded.workspace_id`. + Since the field path is required for routing, ElasticGraph cannot ingest events that lack it. To continue, do one of the following: + + 1. If the `Widget.embedded.workspace_id` field has been renamed, indicate this by calling `field.renamed_from "workspace_id"` on the renamed field rather than using `deleted_field`. + 2. Alternately, if no publishers or in-flight events use JSON schema version 1, delete its file from `json_schemas_by_version`, and no further changes are required. + + + JSON schema version 1 has no field that maps to the rollover field path of `Widget.embedded.created_at`. + Since the field path is required for rollover, ElasticGraph cannot ingest events that lack it. To continue, do one of the following: + + 1. If the `Widget.embedded.created_at` field has been renamed, indicate this by calling `field.renamed_from "created_at"` on the renamed field rather than using `deleted_field`. + 2. Alternately, if no publishers or in-flight events use JSON schema version 1, delete its file from `json_schemas_by_version`, and no further changes are required. + EOS + end + + let(:json_schema_version_setter_location_regex) do + # In `write_elastic_graph_schema_def_code` `json_schema_version` is called on the 7th line of + # the file written to `schema.rb` (after the 5-line double-load guard). See below. + # + # Note: on Ruby 3.3, the path here winds up being slightly different; instead of just `schema.rb` it is something like: + # `../d20240216-23551-cvdjzo/schema.rb`. I think it's related to the temp directory we run these specs within. + /line 7 at `(\S*\/?)schema\.rb`/ + end + + def write_elastic_graph_schema_def_code(json_schema_version:, component_suffix: "", component_name_extras: "", component_extras: "", omit_component_name_field: false, enforce_json_schema_version: true) + code = <<~EOS + Thread.current[:eg_schema_load_count] = (Thread.current[:eg_schema_load_count] || 0) + 1 + if Thread.current[:eg_schema_load_count] > 1 + raise "Schema file \#{__FILE__} was loaded \#{Thread.current[:eg_schema_load_count]} times in a single run!" + end + + ElasticGraph.define_schema do |schema| + schema.json_schema_version #{json_schema_version} + #{"schema.enforce_json_schema_version false" unless enforce_json_schema_version} + schema.enum_type "Size" do |t| + t.values "SMALL", "MEDIUM", "LAGE" + end + + schema.object_type "MechanicalPart" do |t| + t.field "id", "ID!" do |f| + f.directive "fromExtensionModule" + end + + t.index "mechanical_parts" + end + + schema.object_type "ElectricalPart" do |t| + t.field "id", "ID!" + t.field "size", "Size" + t.index "electrical_parts" + end + + schema.union_type "Part" do |t| + t.subtypes %w[MechanicalPart ElectricalPart] + end + + schema.object_type "ComponentDesigner#{component_suffix}" do |t| + t.field "id", "ID!" + t.field "designed_component_names", "[String!]!" + t.index "component_designers#{component_suffix}" + end + + schema.object_type "Component#{component_suffix}" do |t| + t.field "id", "ID!" + #{%(t.field "name", "String!"#{component_name_extras}) unless omit_component_name_field} + t.field "designer_id", "ID" + t.index "components#{component_suffix}", number_of_shards: 5 + + t.derive_indexed_type_fields "ComponentDesigner#{component_suffix}", from_id: "designer_id" do |derive| + derive.append_only_set "designed_component_names", from: "name" + end + #{component_extras} + end + end + EOS + + ::File.write("schema.rb", code) + end + + def expect_up_to_date_artifacts + output = nil + + expect { + output = run_rake("schema_artifacts:check") + }.not_to raise_error + + expect(output).to include(DATASTORE_CONFIG_FILE, JSON_SCHEMAS_FILE, "up to date") + end + + def expect_all_artifacts_out_of_date_because_they_havent_been_dumped + expect { + run_rake("schema_artifacts:check") + }.to abort_with { |error| + expect(error.message).to eq(<<~EOS.strip) + 5 schema artifact(s) are out of date. Run `bundle exec rake schema_artifacts:dump` to update the following artifact(s): + + 1. config/schema/artifacts/datastore_config.yaml (file does not exist) + 2. config/schema/artifacts/json_schemas.yaml (file does not exist) + 3. config/schema/artifacts/json_schemas_by_version/v1.yaml (file does not exist) + 4. config/schema/artifacts/runtime_metadata.yaml (file does not exist) + 5. config/schema/artifacts/schema.graphql (file does not exist) + EOS + } + end + + def expect_out_of_date_artifacts + expect { + run_rake("schema_artifacts:check") + }.to abort_with a_string_including("out of date", DATASTORE_CONFIG_FILE, JSON_SCHEMAS_FILE) + end + + def run_rake(*args) + Thread.current[:eg_schema_load_count] = nil + + # The schema definition code written by `write_elastic_graph_schema_def_code` uses a + # `fromExtensionModule` directive, which this extension module defines. + extension_module = ::Module.new do + def as_active_instance + raw_sdl "directive @fromExtensionModule on FIELD_DEFINITION" + super + end + end + + super(*args) do |output| + ::ElasticGraph::SchemaDefinition::RakeTasks.new( + schema_element_name_form: :snake_case, + index_document_sizes: true, + path_to_schema: "schema.rb", + schema_artifacts_directory: "config/schema/artifacts", + extension_modules: [APIExtension, extension_module], + output: output + ) + end + end + + def read_artifact(*name_parts) + path = ::File.join("config", "schema", "artifacts", *name_parts) + ::File.exist?(path) && ::File.read(path) + end + + def delete_artifact(*name_parts) + ::File.delete(::File.join("config", "schema", "artifacts", *name_parts)) + end + + def versioned_json_schema_file(version) + ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v#{version}.yaml") + end + + def json_schema_for_keyword_type(type, extras = {}) + { + "allOf" => [ + {"$ref" => "#/$defs/#{type}"}, + {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH} + ] + }.merge(extras) + end + end + end + end +end diff --git a/elasticgraph-schema_definition/spec/support/example_extensions/scalar_coercion_adapter.rb b/elasticgraph-json_ingestion/spec/spec_helper.rb similarity index 59% rename from elasticgraph-schema_definition/spec/support/example_extensions/scalar_coercion_adapter.rb rename to elasticgraph-json_ingestion/spec/spec_helper.rb index 5306d1d91..138d7e9ca 100644 --- a/elasticgraph-schema_definition/spec/support/example_extensions/scalar_coercion_adapter.rb +++ b/elasticgraph-json_ingestion/spec/spec_helper.rb @@ -6,10 +6,5 @@ # # frozen_string_literal: true -class ExampleScalarCoercionAdapter - def self.coerce_input(value, ctx) - end - - def self.coerce_result(value, ctx) - end -end +# This file contains RSpec configuration for `elasticgraph-json_ingestion`. +# It is loaded by the shared spec helper at `spec_support/spec_helper.rb`. diff --git a/elasticgraph-schema_definition/spec/support/json_schema_matcher.rb b/elasticgraph-json_ingestion/spec/support/json_schema_matcher.rb similarity index 100% rename from elasticgraph-schema_definition/spec/support/json_schema_matcher.rb rename to elasticgraph-json_ingestion/spec/support/json_schema_matcher.rb diff --git a/elasticgraph-schema_definition/spec/support/json_schema_matcher_spec.rb b/elasticgraph-json_ingestion/spec/support/json_schema_matcher_spec.rb similarity index 100% rename from elasticgraph-schema_definition/spec/support/json_schema_matcher_spec.rb rename to elasticgraph-json_ingestion/spec/support/json_schema_matcher_spec.rb diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/indexing/json_schema_with_metadata_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata_spec.rb similarity index 99% rename from elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/indexing/json_schema_with_metadata_spec.rb rename to elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata_spec.rb index 2e87e57ce..021d71793 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/indexing/json_schema_with_metadata_spec.rb +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata_spec.rb @@ -10,9 +10,9 @@ require "elastic_graph/spec_support/schema_definition_helpers" module ElasticGraph - module SchemaDefinition + module JSONIngestion::SchemaDefinition module Indexing - ::RSpec.describe JSONIngestion::SchemaDefinition::Indexing::JSONSchemaWithMetadata do + ::RSpec.describe JSONSchemaWithMetadata do include_context "SchemaDefinitionHelpers" it "ignores derived indexed types that do not show up in the JSON schema" do @@ -1056,7 +1056,7 @@ def metadata_for(json_schema, type, field) def define_schema(&schema_definition) super( schema_element_name_form: "snake_case", - extension_modules: [JSONIngestion::SchemaDefinition::APIExtension], + extension_modules: [APIExtension], &schema_definition ) end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/wrappers_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/wrappers_spec.rb new file mode 100644 index 000000000..feb265303 --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/indexing/wrappers_spec.rb @@ -0,0 +1,226 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/errors" +require "elastic_graph/json_ingestion/schema_definition/indexing/field" +require "elastic_graph/json_ingestion/schema_definition/indexing/field_reference" +require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/object" +require "elastic_graph/spec_support/schema_definition_helpers" +require "support/json_schema_matcher" + +module ElasticGraph + module JSONIngestion::SchemaDefinition + ::RSpec.describe "JSON schema indexing wrappers" do + include_context "SchemaDefinitionHelpers" + + # `FieldReference#resolve` is a lazy reference: the referenced type need not exist when a field is + # defined, only when artifacts are dumped. These two specs drive both outcomes (resolves / never + # resolves) through the public schema-definition API. + describe "lazy field resolution" do + it "resolves a field whose type is defined after the referencing type" do + json_schema = dump_schema do |s| + s.object_type "MyType" do |t| + t.field "id", "ID!" + t.field "other", "OtherType" + end + + s.object_type "OtherType" do |t| + t.field "name", "String" + end + end + + expect(json_schema).to have_json_schema_like("MyType", { + "type" => "object", + "properties" => { + "id" => json_schema_ref("ID!"), + "other" => json_schema_ref("OtherType") + }, + "required" => %w[id other] + }) + end + + it "raises a clear error (rather than blowing up internally) for a field whose type never resolves" do + # When a field references a type that is never defined, the wrapped `FieldReference#resolve` + # returns `nil`. The schema definition machinery relies on that `nil` to detect the unresolvable + # type and surface a helpful error instead of crashing. + expect { + dump_schema do |s| + s.object_type "MyType" do |t| + t.field "id", "ID!" + t.field "mystery", "DoesNotExist" + end + end + }.to raise_error(Errors::SchemaError, a_string_including("Type `DoesNotExist` cannot be resolved", "misspelled")) + end + end + + # Each wrapper is a value object that augments a wrapped schema-definition object with JSON schema + # state. Their `==`/`eql?`/`hash` implementations exist so the schema-definition machinery can treat + # two wrappers of equal state as interchangeable (e.g. for `Set`/`Hash`/`uniq` de-duplication) and + # treat a wrapper as equal to the object it wraps. No public-API path depends on the *outcome* of + # these comparisons today, so we exercise them directly — but on wrappers obtained from a schema + # defined via the public API (converting the same schema element twice to obtain equal-but-distinct + # wrappers), so they're used in context rather than fabricated with internal collaborators. + describe "value semantics" do + it "treats `FieldReference`s derived from the same field as interchangeable, and distinguishes other fields" do + schema_field = widget_indexing_field("name") + reference = schema_field.to_indexing_field_reference + equivalent_reference = schema_field.to_indexing_field_reference + other_reference = widget_indexing_field("id").to_indexing_field_reference + + expect_equivalent(reference, equivalent_reference) + expect_distinct(reference, other_reference) + expect_equal_to_wrapped(reference) + end + + it "treats `Field`s derived from the same field as interchangeable, and distinguishes other fields" do + field = widget_indexing_object_field_type.subfields.fetch(0) + equivalent_field = widget_indexing_object_field_type.subfields.fetch(0) + other_field = widget_indexing_object_field_type.subfields.fetch(1) + + expect_equivalent(field, equivalent_field) + expect_distinct(field, other_field) + expect_equal_to_wrapped(field) + end + + it "treats `FieldType::Object`s derived from the same type as interchangeable, and distinguishes other types" do + object_field_type = indexing_object_field_type_for("Widget") + equivalent_object_field_type = indexing_object_field_type_for("Widget") + other_object_field_type = indexing_object_field_type_for("Gadget") + + expect_equivalent(object_field_type, equivalent_object_field_type) + expect_distinct(object_field_type, other_object_field_type) + expect_equal_to_wrapped(object_field_type) + end + + # The stateless leaf field-type wrappers (`Scalar`, `Enum`, `Union`) share their value semantics + # via `FieldType::ValueSemantics`. They carry no JSON schema state of their own, so equality is + # purely a function of the wrapped field type: two wrappers around equal field types are equal and + # a wrapper equals the field type it wraps. We cover all three kinds since each must `prepend` + # the shared module and nothing else (compiler or type checker) catches an omission. + {"scalar" => "enum", "enum" => "union", "union" => "scalar"}.each do |leaf_kind, other_leaf_kind| + it "treats `#{leaf_kind}` field-type wrappers as interchangeable when they wrap equal field types" do + field_type = indexing_leaf_field_type(leaf_kind) + equivalent_field_type = indexing_leaf_field_type(leaf_kind) + other_field_type = indexing_leaf_field_type(other_leaf_kind) + + expect_equivalent(field_type, equivalent_field_type) + expect_distinct(field_type, other_field_type) + expect_equal_to_wrapped(field_type) + end + end + + # The assertions below compare the boolean result of `==`/`eql?` rather than passing the wrappers + # to the `eq`/`eql` matchers directly. These wrappers delegate to the deep, cross-referential + # schema-definition object graph, and on failure RSpec's differ would `pretty_print` both sides -- + # which balloons to tens of megabytes and takes seconds. Asserting on booleans keeps a failure + # cheap to render (`expected true, got false`) regardless of how the comparison turns out. + + # Asserts the full value-object contract: equal-by-`==`, equal-by-`eql?`, equal `hash`, and -- the + # behavior the contract exists for -- interchangeable as `Set`/`Hash` members. + def expect_equivalent(wrapper, equivalent_wrapper) + expect(wrapper == equivalent_wrapper).to be(true) + expect(wrapper.eql?(equivalent_wrapper)).to be(true) + expect(wrapper.hash).to eq(equivalent_wrapper.hash) + expect(::Set.new([wrapper, equivalent_wrapper]).size).to eq(1) + end + + def expect_distinct(wrapper, other_wrapper) + expect(wrapper == other_wrapper).to be(false) + end + + def expect_equal_to_wrapped(wrapper) + expect(wrapper == wrapper.__getobj__).to be(true) + end + + def widget_indexing_field(name) + object_types_by_name.fetch("Widget").indexing_fields_by_name_in_index.fetch(name) + end + + def widget_indexing_object_field_type + indexing_object_field_type_for("Widget") + end + + def indexing_object_field_type_for(type_name) + object_types_by_name.fetch(type_name).to_indexing_field_type + end + + # The `Widget` field whose indexing field type is the requested leaf kind. A different kind's field + # gives an "other" leaf wrapper for inequality assertions. + def indexing_leaf_field_type(leaf_kind) + field_name = {"scalar" => "name", "enum" => "color", "union" => "thing"}.fetch(leaf_kind) + subfield = widget_indexing_object_field_type.subfields.find { |f| f.name == field_name } + subfield.indexing_field_type + end + + def object_types_by_name + @object_types_by_name ||= define_schema(schema_element_name_form: "snake_case") do |s| + s.enum_type "Color" do |t| + t.values "RED", "BLUE" + end + + s.object_type "Square" do |t| + t.field "side", "Int!" + end + + s.object_type "Circle" do |t| + t.field "radius", "Int!" + end + + s.union_type "Shape" do |t| + t.subtypes "Square", "Circle" + end + + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "name", "String" do |f| + f.json_schema minLength: 1 + end + t.field "color", "Color" + t.field "thing", "Shape" + end + + s.object_type "Gadget" do |t| + t.field "id", "ID!" + t.field "size", "Int" + end + end.state.object_types_by_name + end + end + + def dump_schema(&schema_definition) + define_schema(schema_element_name_form: "snake_case", &schema_definition).current_public_json_schema + end + + def json_schema_ref(type, is_keyword_type: %w[ID! ID String! String].include?(type)) + if type.end_with?("!") + basic_json_schema_ref = {"$ref" => "#/$defs/#{type.delete_suffix("!")}"} + + if is_keyword_type + { + "allOf" => [ + basic_json_schema_ref, + {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH} + ] + } + else + basic_json_schema_ref + end + else + { + "anyOf" => [ + json_schema_ref("#{type}!", is_keyword_type: is_keyword_type), + {"type" => "null"} + ] + } + end + end + end + end +end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_field_metadata_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_field_metadata_spec.rb similarity index 95% rename from elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_field_metadata_spec.rb rename to elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_field_metadata_spec.rb index 450ae447d..0ebfeafe0 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_field_metadata_spec.rb +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_field_metadata_spec.rb @@ -10,7 +10,7 @@ require "elastic_graph/spec_support/schema_definition_helpers" module ElasticGraph - module SchemaDefinition + module JSONIngestion::SchemaDefinition ::RSpec.describe "JSON schema field metadata generation" do include_context "SchemaDefinitionHelpers" @@ -143,13 +143,13 @@ def dump_metadata(&schema_definition) def define_schema(&schema_definition) super( schema_element_name_form: "snake_case", - extension_modules: [JSONIngestion::SchemaDefinition::APIExtension], + extension_modules: [APIExtension], &schema_definition ) end def field_meta_of(type, name_in_index) - JSONIngestion::SchemaDefinition::Indexing::JSONSchemaFieldMetadata.new(type: type, name_in_index: name_in_index) + Indexing::JSONSchemaFieldMetadata.new(type: type, name_in_index: name_in_index) end end end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_pruner_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_pruner_spec.rb similarity index 97% rename from elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_pruner_spec.rb rename to elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_pruner_spec.rb index e9c01c9f2..c8327661f 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_pruner_spec.rb +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_pruner_spec.rb @@ -11,8 +11,8 @@ require "elastic_graph/spec_support/schema_definition_helpers" module ElasticGraph - module SchemaDefinition - RSpec.describe JSONIngestion::SchemaDefinition::JSONSchemaPruner do + module JSONIngestion::SchemaDefinition + RSpec.describe JSONSchemaPruner do include_context "SchemaDefinitionHelpers" describe ".prune" do diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_spec.rb similarity index 99% rename from elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_spec.rb rename to elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_spec.rb index cdaa8ba7b..1c964801d 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_spec.rb +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/json_schema_spec.rb @@ -13,7 +13,7 @@ require "support/json_schema_matcher" module ElasticGraph - module SchemaDefinition + module JSONIngestion::SchemaDefinition ::RSpec.describe "JSON schema generation" do include_context "SchemaDefinitionHelpers" json_schema_id = {"allOf" => [{"$ref" => "#/$defs/ID"}, {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH}]} @@ -3151,6 +3151,27 @@ def link_supertype_to_subtypes(interface_type, *subtype_names) expect(widget_schema.dig("properties", "undocumented_field")).not_to have_key("description") end + it "does not care if the interface and object fields have different JSON schema" do + json_schema = dump_schema do |schema| + schema.object_type "Thing" do |t| + t.implements "HasID" + t.field "id", "ID!" do |f| + f.json_schema maxLength: 40 + end + t.field "name", "String" + t.index "things" + end + + schema.interface_type "HasID" do |t| + t.field "id", "ID!" do |f| + f.json_schema maxLength: 30 + end + end + end + + expect(json_schema.dig("$defs", "Thing", "properties", "id", "allOf")).to include({"maxLength" => 40}) + end + def all_type_definitions_for(&schema_definition) dump_schema(&schema_definition).fetch("$defs") end diff --git a/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/schema_elements/scalar_type_extension_spec.rb b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/schema_elements/scalar_type_extension_spec.rb new file mode 100644 index 000000000..364dfe2ee --- /dev/null +++ b/elasticgraph-json_ingestion/spec/unit/elastic_graph/json_ingestion/schema_definition/schema_elements/scalar_type_extension_spec.rb @@ -0,0 +1,232 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/errors" +require "elastic_graph/json_ingestion/schema_definition/schema_elements/scalar_type_extension" +require "elastic_graph/spec_support/schema_definition_helpers" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module SchemaElements + RSpec.describe ScalarTypeExtension do + include_context "SchemaDefinitionHelpers" + + it "requires custom scalar types to declare their JSON schema representation" do + expect { + define_schema(schema_element_name_form: "snake_case") do |schema| + schema.scalar_type "BigInt" do |type| + type.mapping type: "long" + end + end + }.to raise_error Errors::SchemaError, a_string_including("BigInt", "lacks `json_schema`") + end + + it "extends schema elements created without customization blocks" do + api = build_api + api.enum_type "EmptyEnum" + api.interface_type "EmptyInterface" + direct_type_with_subfields = api.factory.new_type_with_subfields( + :object, + "DirectObject", + wrapping_type: nil, + field_factory: api.factory.method(:new_field) + ) + + # An enum's derived GraphQL types are built from a derived scalar twin, which can only be + # built if `EnumTypeExtension` configured the twin's `json_schema`; otherwise building it + # raises a "lacks `json_schema`" error. + expect { + api.state.enum_types_by_name.fetch("EmptyEnum").derived_graphql_types + }.not_to raise_error + + # `json_schema` is only available on types extended with `TypeWithSubfieldsExtension`. + interface_type = api.state.object_types_by_name.fetch("EmptyInterface") + interface_type.json_schema minProperties: 1 + expect(interface_type.json_schema_options).to eq({minProperties: 1}) + + direct_type_with_subfields.json_schema minProperties: 2 + expect(direct_type_with_subfields.json_schema_options).to eq({minProperties: 2}) + + expect { + build_api.scalar_type "BigInt" + }.to raise_error Errors::SchemaError, a_string_including("BigInt", "lacks `json_schema`") + end + + it "infers a numeric missing-value placeholder for JSON-safe unsigned_long scalars with custom coercion" do + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for( + "unsigned_long", + type: "integer", + maximum: JSON_SAFE_LONG_MAX + ) do |type| + type.coerce_with "ExampleScalarCoercionAdapter", defined_at: scalar_coercion_adapter_path + end + + expect(grouping_missing_value_placeholder).to eq(MISSING_NUMERIC_PLACEHOLDER) + end + + it "does not infer a numeric missing-value placeholder for unsigned_long scalars outside the JSON-safe range" do + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for( + "unsigned_long", + type: "integer", + maximum: JSON_SAFE_LONG_MAX + 1 + ) do |type| + type.coerce_with "ExampleScalarCoercionAdapter", defined_at: scalar_coercion_adapter_path + end + + expect(grouping_missing_value_placeholder).to eq(nil) + end + + it "does not infer a placeholder for JSON-safe unsigned_long scalars with the default coercion adapter (which would not coerce floats back to integers)" do + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for( + "unsigned_long", + type: "integer", + maximum: JSON_SAFE_LONG_MAX + ) + + expect(grouping_missing_value_placeholder).to eq(nil) + end + + it "does not infer a placeholder for unsigned_long scalars when no maximum is specified" do + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("unsigned_long", type: "integer") do |type| + type.coerce_with "ExampleScalarCoercionAdapter", defined_at: scalar_coercion_adapter_path + end + + expect(grouping_missing_value_placeholder).to eq(nil) + end + + it "infers a numeric missing-value placeholder for long scalars exactly at the JSON-safe boundaries with custom coercion" do + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for( + "long", + type: "integer", + minimum: JSON_SAFE_LONG_MIN, + maximum: JSON_SAFE_LONG_MAX + ) do |type| + type.coerce_with "ExampleScalarCoercionAdapter", defined_at: scalar_coercion_adapter_path + end + + expect(grouping_missing_value_placeholder).to eq(MISSING_NUMERIC_PLACEHOLDER) + end + + it "does not infer a placeholder for JSON-safe long scalars with the default coercion adapter" do + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for( + "long", + type: "integer", + minimum: JSON_SAFE_LONG_MIN, + maximum: JSON_SAFE_LONG_MAX + ) + + expect(grouping_missing_value_placeholder).to eq(nil) + end + + it "does not infer a placeholder for long scalars when the minimum is one below the JSON-safe range" do + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for( + "long", + type: "integer", + minimum: JSON_SAFE_LONG_MIN - 1, + maximum: JSON_SAFE_LONG_MAX + ) do |type| + type.coerce_with "ExampleScalarCoercionAdapter", defined_at: scalar_coercion_adapter_path + end + + expect(grouping_missing_value_placeholder).to eq(nil) + end + + it "does not infer a placeholder for long scalars when the maximum is one above the JSON-safe range" do + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for( + "long", + type: "integer", + minimum: JSON_SAFE_LONG_MIN, + maximum: JSON_SAFE_LONG_MAX + 1 + ) do |type| + type.coerce_with "ExampleScalarCoercionAdapter", defined_at: scalar_coercion_adapter_path + end + + expect(grouping_missing_value_placeholder).to eq(nil) + end + + it "does not infer a placeholder for long scalars when only one bound is specified (the other defaults to the LongString range)" do + only_min = grouping_missing_value_placeholder_for("long", type: "integer", minimum: 0) do |type| + type.coerce_with "ExampleScalarCoercionAdapter", defined_at: scalar_coercion_adapter_path + end + + only_max = grouping_missing_value_placeholder_for("long", type: "integer", maximum: 1000) do |type| + type.coerce_with "ExampleScalarCoercionAdapter", defined_at: scalar_coercion_adapter_path + end + + expect(only_min).to eq(nil) + expect(only_max).to eq(nil) + end + + it "does not infer a placeholder for long scalars when no bounds are specified" do + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer") do |type| + type.coerce_with "ExampleScalarCoercionAdapter", defined_at: scalar_coercion_adapter_path + end + + expect(grouping_missing_value_placeholder).to eq(nil) + end + + it "has the expected placeholder for each built-in scalar type, including the JSON-safe-range-aware `JsonSafeLong` inference" do + results = define_schema(schema_element_name_form: "snake_case") { |schema| } + built_in_scalars = results.state.scalar_types_by_name.keys + scalar_types_by_name = results.runtime_metadata.scalar_types_by_name + + placeholders_by_scalar_type = built_in_scalars.to_h do |scalar_type| + [scalar_type, scalar_types_by_name.fetch(scalar_type).grouping_missing_value_placeholder] + end + + expect(placeholders_by_scalar_type).to eq({ + "Boolean" => nil, + "Cursor" => MISSING_STRING_PLACEHOLDER, + "Date" => nil, + "DateTime" => nil, + "Float" => MISSING_NUMERIC_PLACEHOLDER, + "ID" => MISSING_STRING_PLACEHOLDER, + "Int" => MISSING_NUMERIC_PLACEHOLDER, # GraphQL automatically coerces Int values + "JsonSafeLong" => MISSING_NUMERIC_PLACEHOLDER, # custom coercion adapter coerces floats back to integers + "LocalTime" => nil, + "LongString" => nil, # outside of the JSON safe range. + "String" => MISSING_STRING_PLACEHOLDER, + "TimeZone" => MISSING_STRING_PLACEHOLDER, + "Untyped" => MISSING_STRING_PLACEHOLDER + }) + end + + def grouping_missing_value_placeholder_for(mapping_type, **json_schema_options) + define_schema(schema_element_name_form: "snake_case") do |schema| + schema.scalar_type "CustomScalar" do |type| + type.mapping type: mapping_type + type.json_schema(**json_schema_options) + yield type if block_given? + end + end.runtime_metadata.scalar_types_by_name.fetch("CustomScalar").grouping_missing_value_placeholder + end + + def scalar_coercion_adapter_path + # The adapter lives in `spec_support` so that every suite which loads it (e.g. this one and + # `elasticgraph-schema_definition`) uses the same require path: the extension loader raises + # if the same extension is loaded from two different paths within one process, as can happen + # when one worker runs multiple suites. + "elastic_graph/spec_support/example_extensions/scalar_coercion_adapter" + end + + def build_api + schema_elements = SchemaArtifacts::RuntimeMetadata::SchemaElementNames.new(form: "snake_case") + ::ElasticGraph::SchemaDefinition::API.new( + schema_elements, + true, + extension_modules: [APIExtension], + output: log_device + ) + end + end + end + end + end +end diff --git a/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb b/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb index edb0a967e..892544da0 100644 --- a/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb +++ b/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb @@ -33,7 +33,7 @@ module SchemaDefinition describe "schema_artifacts:dump", :in_temp_dir do it "idempotently dumps all schema artifacts, and is able to check if they are current with `:check`" do - write_elastic_graph_schema_def_code(json_schema_version: 1) + write_elastic_graph_schema_def_code expect_all_artifacts_out_of_date_because_they_havent_been_dumped expect { @@ -41,8 +41,6 @@ module SchemaDefinition expect(output.lines).to include( a_string_including("Dumped", DATASTORE_CONFIG_FILE), a_string_including("Dumped", RUNTIME_METADATA_FILE), - a_string_including("Dumped", JSON_SCHEMAS_FILE), - a_string_including("Dumped", versioned_json_schema_file(1)), a_string_including("Dumped", GRAPHQL_SCHEMA_FILE) ) }.to change { read_artifact(DATASTORE_CONFIG_FILE) } @@ -54,17 +52,11 @@ module SchemaDefinition .and change { read_artifact(RUNTIME_METADATA_FILE) } .from(a_falsy_value) .to(a_string_including("script_id: update_ComponentDesigner_from_Component_").and(excluding("ruby/object"))) - .and change { read_artifact(JSON_SCHEMAS_FILE) } - .from(a_falsy_value) - .to(a_string_including("\n Component:", "\njson_schema_version: 1")) .and change { read_artifact(GRAPHQL_SCHEMA_FILE) } .from(a_falsy_value) .to(a_string_including("type Component {", "directive @fromExtensionModule")) - # Verify the data is dumped in Alphabetical order for consistency, and is pruned - # (Except for `EVENT_ENVELOPE_JSON_SCHEMA_NAME` -- it goes first). - definition_names = YAML.safe_load(read_artifact(JSON_SCHEMAS_FILE)).fetch("$defs").keys - expect(definition_names).to eq(%w[ElasticGraphEventEnvelope Component ElectricalPart ID MechanicalPart Size String]) + # Verify the data is dumped in Alphabetical order for consistency. expect(YAML.safe_load(read_artifact(DATASTORE_CONFIG_FILE)).fetch("indices").keys).to eq %w[ component_designers components electrical_parts mechanical_parts ] @@ -77,10 +69,9 @@ module SchemaDefinition expect(output.lines).to include(a_string_including("already up to date")) }.to maintain { read_artifact(DATASTORE_CONFIG_FILE) } .and maintain { read_artifact(RUNTIME_METADATA_FILE) } - .and maintain { read_artifact(JSON_SCHEMAS_FILE) } .and maintain { read_artifact(GRAPHQL_SCHEMA_FILE) } - write_elastic_graph_schema_def_code(component_suffix: "2", component_extras: "schema.deleted_type 'Component'", json_schema_version: 2) + write_elastic_graph_schema_def_code(component_suffix: "2") expect_out_of_date_artifacts_with_details(<<~EOS.strip) - component_designers: @@ -97,9 +88,6 @@ module SchemaDefinition expect(output.lines).to include( a_string_including("Dumped", DATASTORE_CONFIG_FILE), a_string_including("Dumped", RUNTIME_METADATA_FILE), - a_string_including("Dumped", JSON_SCHEMAS_FILE), - a_string_including("Dumped", versioned_json_schema_file(1)), - a_string_including("Dumped", versioned_json_schema_file(2)), a_string_including("Dumped", GRAPHQL_SCHEMA_FILE) ) }.to change { read_artifact(DATASTORE_CONFIG_FILE) } @@ -111,127 +99,26 @@ module SchemaDefinition .and change { read_artifact(RUNTIME_METADATA_FILE) } .from(a_string_including("script_id: update_ComponentDesigner_from_Component_")) .to(a_string_including("script_id: update_ComponentDesigner2_from_Component2_")) - .and change { read_artifact(JSON_SCHEMAS_FILE) } - .from(a_string_including("\n Component:", "\njson_schema_version: 1")) - .to(a_string_including("\n Component2:", "\njson_schema_version: 2").and(excluding("\n Component:"))) .and change { read_artifact(GRAPHQL_SCHEMA_FILE) } .from(a_string_including("type Component {")) .to(a_string_including("type Component2 {").and(excluding("Component "))) expect_up_to_date_artifacts - - delete_artifact versioned_json_schema_file(2) - expect_missing_versioned_json_schema_artifact "v2.yaml" end - it "throws an error if the json_schemas artifact is (attempted to be) changed without json_schema_version being bumped" do - write_elastic_graph_schema_def_code(json_schema_version: 1) - expect_all_artifacts_out_of_date_because_they_havent_been_dumped - - # Should succeed, for first artifact. - expect { - output = run_rake("schema_artifacts:dump") - expect(output.lines).to include( - a_string_including("Dumped", JSON_SCHEMAS_FILE), - a_string_including("Dumped", versioned_json_schema_file(1)) - ) - }.to change { read_artifact(JSON_SCHEMAS_FILE) } - .from(a_falsy_value) - .to(a_string_including("\njson_schema_version: 1\n")) - .and change { read_artifact(versioned_json_schema_file(1)) } - .from(a_falsy_value) - .to(a_string_including("\njson_schema_version: 1\n")) - - expect_up_to_date_artifacts - - write_elastic_graph_schema_def_code(json_schema_version: 2) - - # Should succeed, it is ok to update the schema_version without underlying contents changing. - expect { - output = run_rake("schema_artifacts:dump") - expect(output.lines).to include( - a_string_including("Dumped", JSON_SCHEMAS_FILE), - a_string_including("Dumped", versioned_json_schema_file(2)) - ) - }.to change { read_artifact(JSON_SCHEMAS_FILE) } - .from(a_string_including("\njson_schema_version: 1")) - .to(a_string_including("\njson_schema_version: 2")) - .and change { read_artifact(versioned_json_schema_file(2)) } - .from(a_falsy_value) - .to(a_string_including("\njson_schema_version: 2\n")) - - write_elastic_graph_schema_def_code(component_suffix: "2", json_schema_version: 2, component_extras: "t.renamed_from 'Component'") - expect_out_of_date_artifacts - - expect { - run_rake("schema_artifacts:dump") - }.to abort_with a_string_including( - "A change has been attempted to `json_schemas.yaml`", - "`schema.json_schema_version 3`" - ).and matching(json_schema_version_setter_location_regex) - - # Still out of date. - expect_out_of_date_artifacts - - # Decreasing the json_schema_version should also result in a failure. - write_elastic_graph_schema_def_code(component_suffix: "2", json_schema_version: 1, component_extras: "t.renamed_from 'Component'") - expect_out_of_date_artifacts - - expect { - run_rake("schema_artifacts:dump") - }.to abort_with a_string_including( - "A change has been attempted to `json_schemas.yaml`", - "`schema.json_schema_version 3`" - ).and matching(json_schema_version_setter_location_regex) - - write_elastic_graph_schema_def_code(component_suffix: "2", json_schema_version: 3, component_extras: "t.renamed_from 'Component'") - - # Now dump should succeed, as schema_version has been bumped. - expect { - output = run_rake("schema_artifacts:dump") - expect(output.lines).to include( - a_string_including("Dumped", JSON_SCHEMAS_FILE), - a_string_including("Dumped", versioned_json_schema_file(3)) - ) - }.to change { read_artifact(JSON_SCHEMAS_FILE) } - .from(a_string_including("\njson_schema_version: 2")) - .to(a_string_including("\njson_schema_version: 3")) - .and change { read_artifact(versioned_json_schema_file(3)) } - .from(a_falsy_value) - .to(a_string_including("\njson_schema_version: 3\n")) - - # Should be able to run `schema_artifacts:dump` idempotently. - output = run_rake("schema_artifacts:dump") - expect(output.lines).to include( - a_string_including("is already up to date", JSON_SCHEMAS_FILE), - a_string_including("is already up to date", versioned_json_schema_file(3)) - ) - - write_elastic_graph_schema_def_code(component_suffix: "3", json_schema_version: 3, component_extras: "t.renamed_from 'Component'") - expect_out_of_date_artifacts + it "shows the full diff for an out-of-date artifact when the diff is short" do + write_elastic_graph_schema_def_code + run_rake("schema_artifacts:dump") - expect { - run_rake("schema_artifacts:dump") - }.to abort_with a_string_including( - "A change has been attempted to `json_schemas.yaml`", - "`schema.json_schema_version 4`" - ).and matching(json_schema_version_setter_location_regex) - - write_elastic_graph_schema_def_code( - component_suffix: "3", - json_schema_version: 3, - component_extras: "t.renamed_from 'Component'", - enforce_json_schema_version: false - ) + write_elastic_graph_schema_def_code(number_of_shards: 7) expect { - output = run_rake("schema_artifacts:dump") - expect(output.lines).to include( - a_string_including("Dumped", JSON_SCHEMAS_FILE), - a_string_including("Dumped", versioned_json_schema_file(3)) - ) - }.to change { read_artifact(JSON_SCHEMAS_FILE) } - .and change { read_artifact(versioned_json_schema_file(3)) } + run_rake("schema_artifacts:check") + }.to abort_with { |error| + expect(error.message) + .to include("1. config/schema/artifacts/datastore_config.yaml (see [1] below for the diff)", "number_of_shards") + .and exclude("lines of the diff") + } end it "allows the derived GraphQL type name formats to be customized" do @@ -243,7 +130,7 @@ module SchemaDefinition block.call(line) end - write_elastic_graph_schema_def_code(json_schema_version: 1) + write_elastic_graph_schema_def_code run_rake("schema_artifacts:dump") # We strip the comment preamble so we can compare it with an SDL string that lacks it below. @@ -285,7 +172,7 @@ module SchemaDefinition end it "generates separate input vs output enums by default, but allows them to be the same if desired" do - write_elastic_graph_schema_def_code(json_schema_version: 1) + write_elastic_graph_schema_def_code run_rake("schema_artifacts:dump") expect(enum_types_in_dumped_graphql_schema).to contain_exactly( @@ -325,6 +212,7 @@ module SchemaDefinition output = run_rake( "schema_artifacts:dump", + extension_modules: [JSONIngestion::SchemaDefinition::APIExtension], type_name_overrides: overrides.merge({"Widgets" => "Unused"}), enum_value_overrides_by_type: { "PreColor" => {"GREAN" => "GREENISH", "MAGENTA" => "RED"}, @@ -375,7 +263,7 @@ module SchemaDefinition overrides = original_core_types.to_h { |name| [name, "Pre#{name}"] } - output = run_rake("schema_artifacts:dump", type_name_overrides: overrides) + output = run_rake("schema_artifacts:dump", extension_modules: [JSONIngestion::SchemaDefinition::APIExtension], type_name_overrides: overrides) expect(output).to exclude(does_not_match_warning_snippet) overriden_types = graphql_types_defined_in(read_artifact(GRAPHQL_SCHEMA_FILE)) @@ -399,386 +287,14 @@ module SchemaDefinition expect(filtered_types).to match_array(allowed_list) end - it "dumps the ElasticGraph JSON schema metadata only on the internal versioned JSON schema, omitting it from the public copy" do - write_elastic_graph_schema_def_code(json_schema_version: 1) - run_rake("schema_artifacts:dump") - - expect(::YAML.safe_load(read_artifact(JSON_SCHEMAS_FILE)).dig("$defs", "Component", "properties", "id")).to eq( - json_schema_for_keyword_type("ID") - ) - - expect(::YAML.safe_load(read_artifact(versioned_json_schema_file(1))).dig("$defs", "Component", "properties", "id")).to eq( - json_schema_for_keyword_type("ID", { - "ElasticGraph" => { - "type" => "ID!", - "nameInIndex" => "id" - } - }) - ) - end - - it "keeps the ElasticGraph JSON schema metadata up-to-date on all versioned JSON schemas" do - write_elastic_graph_schema_def_code(json_schema_version: 1) - run_rake("schema_artifacts:dump") - - expect(::YAML.safe_load(read_artifact(versioned_json_schema_file(1))).dig("$defs", "Component", "properties", "name")).to eq( - json_schema_for_keyword_type("String", { - "ElasticGraph" => { - "type" => "String!", - "nameInIndex" => "name" - } - }) - ) - - # Here we add a new field `another: String` - write_elastic_graph_schema_def_code(json_schema_version: 2, component_name_extras: "\nt.field 'another', 'String!'") - run_rake("schema_artifacts:dump") - - # It's not added to v1.yaml... - loaded_v1 = ::YAML.safe_load(read_artifact(versioned_json_schema_file(1))) - expect(loaded_v1.dig("$defs", "Component", "properties", "name")).to eq( - json_schema_for_keyword_type("String", { - "ElasticGraph" => { - "type" => "String!", - "nameInIndex" => "name" - } - }) - ) - expect(loaded_v1.dig("$defs", "Component", "properties", "another")).to eq(nil) - - # ..but is added to v2.yaml. - loaded_v2 = ::YAML.safe_load(read_artifact(versioned_json_schema_file(2))) - expect(loaded_v2.dig("$defs", "Component", "properties", "name")).to eq( - json_schema_for_keyword_type("String", { - "ElasticGraph" => { - "type" => "String!", - "nameInIndex" => "name" - } - }) - ) - expect(loaded_v2.dig("$defs", "Component", "properties", "another")).to eq( - json_schema_for_keyword_type("String", { - "ElasticGraph" => { - "type" => "String!", - "nameInIndex" => "another" - } - }) - ) - - # Here we keep the newly added field `another: String` and also change the `name_in_index` of `name`. - write_elastic_graph_schema_def_code(json_schema_version: 2, component_name_extras: ", name_in_index: 'name2'\nt.field 'another', 'String!'") - run_rake("schema_artifacts:dump") - - # The `name_in_index` for `name` should be changed to `name2` in the v1 schema... - loaded_v1 = ::YAML.safe_load(read_artifact(versioned_json_schema_file(1))) - expect(loaded_v1.dig("$defs", "Component", "properties", "name")).to eq( - json_schema_for_keyword_type("String", { - "ElasticGraph" => { - "type" => "String!", - "nameInIndex" => "name2" - } - }) - ) - expect(loaded_v1.dig("$defs", "Component", "properties", "another")).to eq(nil) - - # ...and in the v1 schema. - loaded_v2 = ::YAML.safe_load(read_artifact(versioned_json_schema_file(2))) - expect(loaded_v2.dig("$defs", "Component", "properties", "name")).to eq( - json_schema_for_keyword_type("String", { - "ElasticGraph" => { - "type" => "String!", - "nameInIndex" => "name2" - } - }) - ) - expect(loaded_v2.dig("$defs", "Component", "properties", "another")).to eq( - json_schema_for_keyword_type("String", { - "ElasticGraph" => { - "type" => "String!", - "nameInIndex" => "another" - } - }) - ) - - # Here we add a different new field (`ordinal: Int!`), without bumping the version (and using `enforce_json_schema_version: false` - # to not have to bump the version)... - write_elastic_graph_schema_def_code( - json_schema_version: 2, - component_name_extras: "\nt.field 'ordinal', 'Int!'", - enforce_json_schema_version: false - ) - run_rake("schema_artifacts:dump") - - # It should not be added to the v1 schema... - loaded_v1 = ::YAML.safe_load(read_artifact(versioned_json_schema_file(1))) - expect(loaded_v1.dig("$defs", "Component", "properties", "ordinal")).to eq(nil) - - # ...but it should be added to the v2 schema. - loaded_v2 = ::YAML.safe_load(read_artifact(versioned_json_schema_file(2))) - expect(loaded_v2.dig("$defs", "Component", "properties", "ordinal")).to eq({ - "$ref" => "#/$defs/Int", - "ElasticGraph" => {"type" => "Int!", "nameInIndex" => "ordinal"} - }) - end - - it "gives the user a clear error when there is ambiguity about what to do with a renamed or deleted field" do - # Verify the error message with 1 old JSON schema version (v8). - write_elastic_graph_schema_def_code(json_schema_version: 8) - run_rake("schema_artifacts:dump") - write_elastic_graph_schema_def_code(json_schema_version: 9, omit_component_name_field: true) - expect { run_rake("schema_artifacts:dump") }.to abort_with <<~EOS - The `Component.name` field (which existed in JSON schema version 8) no longer exists in the current schema definition. - ElasticGraph cannot guess what it should do with this field's data when ingesting events at this old version. - To continue, do one of the following: - - 1. If the `Component.name` field has been renamed, indicate this by calling `field.renamed_from "name"` on the renamed field. - 2. If the `Component.name` field has been dropped, indicate this by calling `type.deleted_field "name"` on the `Component` type. - 3. Alternately, if no publishers or in-flight events use JSON schema version 8, delete its file from `json_schemas_by_version`, and no further changes are required. - EOS - - # Verify the error message with 2 old JSON schema version (v8 and v9). - # The grammar/phrasing is adjusted slightly (e.g. "versions 8 and 9"). - write_elastic_graph_schema_def_code(json_schema_version: 9) - run_rake("schema_artifacts:dump") - write_elastic_graph_schema_def_code(json_schema_version: 10, omit_component_name_field: true) - expect { run_rake("schema_artifacts:dump") }.to abort_with <<~EOS - The `Component.name` field (which existed in JSON schema versions 8 and 9) no longer exists in the current schema definition. - ElasticGraph cannot guess what it should do with this field's data when ingesting events at these old versions. - To continue, do one of the following: - - 1. If the `Component.name` field has been renamed, indicate this by calling `field.renamed_from "name"` on the renamed field. - 2. If the `Component.name` field has been dropped, indicate this by calling `type.deleted_field "name"` on the `Component` type. - 3. Alternately, if no publishers or in-flight events use JSON schema versions 8 or 9, delete their files from `json_schemas_by_version`, and no further changes are required. - EOS - - # Verify the error message with 3 old JSON schema version (v8, v9, and v10). - # The grammar/phrasing is adjusted slightly (e.g. "versions 8, 9, and 10"). - write_elastic_graph_schema_def_code(json_schema_version: 10) - run_rake("schema_artifacts:dump") - write_elastic_graph_schema_def_code(json_schema_version: 11, omit_component_name_field: true) - expect { run_rake("schema_artifacts:dump") }.to abort_with <<~EOS - The `Component.name` field (which existed in JSON schema versions 8, 9, and 10) no longer exists in the current schema definition. - ElasticGraph cannot guess what it should do with this field's data when ingesting events at these old versions. - To continue, do one of the following: - - 1. If the `Component.name` field has been renamed, indicate this by calling `field.renamed_from "name"` on the renamed field. - 2. If the `Component.name` field has been dropped, indicate this by calling `type.deleted_field "name"` on the `Component` type. - 3. Alternately, if no publishers or in-flight events use JSON schema versions 8, 9, or 10, delete their files from `json_schemas_by_version`, and no further changes are required. - EOS - - # Demonstrate that these issues can be solved by each of the 3 options given. - # First, demonstrate indicating the field has been renamed. - write_elastic_graph_schema_def_code(json_schema_version: 11, omit_component_name_field: true, component_extras: "t.field('full_name', 'String') { |f| f.renamed_from 'name' }") - run_rake("schema_artifacts:dump") - delete_artifact(JSON_SCHEMAS_FILE) # so it doesn't force us to increment the version to 5 - - # Next, demonstrate indicating the field has been deleted. - write_elastic_graph_schema_def_code(json_schema_version: 11, omit_component_name_field: true, component_extras: "t.deleted_field 'name'") - run_rake("schema_artifacts:dump") - - # Finally, demonstrate deleting the old JSON schema version artifacts - delete_artifact(versioned_json_schema_file(8)) - delete_artifact(versioned_json_schema_file(9)) - delete_artifact(versioned_json_schema_file(10)) - write_elastic_graph_schema_def_code(json_schema_version: 11, omit_component_name_field: true) - run_rake("schema_artifacts:dump") - end - - it "gives the user a clear error when there is ambiguity about what to do with a renamed or deleted type" do - # Verify the error message with 1 old JSON schema version (v1). - write_elastic_graph_schema_def_code(json_schema_version: 1) - run_rake("schema_artifacts:dump") - write_elastic_graph_schema_def_code(json_schema_version: 2, component_suffix: "2") - expect { run_rake("schema_artifacts:dump") }.to abort_with <<~EOS - The `Component` type (which existed in JSON schema version 1) no longer exists in the current schema definition. - ElasticGraph cannot guess what it should do with this type's data when ingesting events at this old version. - To continue, do one of the following: - - 1. If the `Component` type has been renamed, indicate this by calling `type.renamed_from "Component"` on the renamed type. - 2. If the `Component` type has been dropped, indicate this by calling `schema.deleted_type "Component"` on the schema. - 3. Alternately, if no publishers or in-flight events use JSON schema version 1, delete its file from `json_schemas_by_version`, and no further changes are required. - EOS - - # Verify the error message with 2 old JSON schema version (v1 and v2). - # The grammar/phrasing is adjusted slightly (e.g. "versions 1 and 2"). - write_elastic_graph_schema_def_code(json_schema_version: 2) - run_rake("schema_artifacts:dump") - write_elastic_graph_schema_def_code(json_schema_version: 3, component_suffix: "2") - expect { run_rake("schema_artifacts:dump") }.to abort_with <<~EOS - The `Component` type (which existed in JSON schema versions 1 and 2) no longer exists in the current schema definition. - ElasticGraph cannot guess what it should do with this type's data when ingesting events at these old versions. - To continue, do one of the following: - - 1. If the `Component` type has been renamed, indicate this by calling `type.renamed_from "Component"` on the renamed type. - 2. If the `Component` type has been dropped, indicate this by calling `schema.deleted_type "Component"` on the schema. - 3. Alternately, if no publishers or in-flight events use JSON schema versions 1 or 2, delete their files from `json_schemas_by_version`, and no further changes are required. - EOS - - # Verify the error message with 3 old JSON schema version (v1, v2, and v3). - # The grammar/phrasing is adjusted slightly (e.g. "versions 1, 2, and 3"). - write_elastic_graph_schema_def_code(json_schema_version: 3) - run_rake("schema_artifacts:dump") - write_elastic_graph_schema_def_code(json_schema_version: 4, component_suffix: "2") - expect { run_rake("schema_artifacts:dump") }.to abort_with <<~EOS - The `Component` type (which existed in JSON schema versions 1, 2, and 3) no longer exists in the current schema definition. - ElasticGraph cannot guess what it should do with this type's data when ingesting events at these old versions. - To continue, do one of the following: - - 1. If the `Component` type has been renamed, indicate this by calling `type.renamed_from "Component"` on the renamed type. - 2. If the `Component` type has been dropped, indicate this by calling `schema.deleted_type "Component"` on the schema. - 3. Alternately, if no publishers or in-flight events use JSON schema versions 1, 2, or 3, delete their files from `json_schemas_by_version`, and no further changes are required. - EOS - - # Demonstrate that these issues can be solved by each of the 3 options given. - # First, demonstrate indicating the type has been renamed. - write_elastic_graph_schema_def_code(json_schema_version: 4, component_suffix: "2", component_extras: "t.renamed_from 'Component'") - run_rake("schema_artifacts:dump") - delete_artifact(JSON_SCHEMAS_FILE) # so it doesn't force us to increment the version to 5 - - # Next, demonstrate indicating the type has been deleted. - write_elastic_graph_schema_def_code(json_schema_version: 4, component_suffix: "2", component_extras: "schema.deleted_type 'Component'") - run_rake("schema_artifacts:dump") - - # Finally, demonstrate deleting the old JSON schema version artifacts - delete_artifact(versioned_json_schema_file(1)) - delete_artifact(versioned_json_schema_file(2)) - delete_artifact(versioned_json_schema_file(3)) - write_elastic_graph_schema_def_code(json_schema_version: 4, component_suffix: "2") - run_rake("schema_artifacts:dump") - end - - it "warns if there are `deleted_*` or `renamed_from` calls that are not needed so the user knows they can remove them" do - ::File.write("schema.rb", <<~EOS) - ElasticGraph.define_schema do |schema| - schema.json_schema_version 1 - schema.deleted_type "SomeType" - - schema.object_type "Widget" do |t| - t.renamed_from "Widget2" - t.deleted_field "name" - t.field "description", "String" do |f| - f.renamed_from "old_description" - end - t.renamed_from "Widget3" - - t.field "id", "ID" - t.index "widgets" - end - end - EOS - - output = run_rake("schema_artifacts:dump") - expect(output.split("\n").first(9).join("\n")).to eq(<<~EOS.strip) - The schema definition has 5 unneeded reference(s) to deprecated schema elements. These can all be safely deleted: - - 1. `schema.deleted_type "SomeType"` at schema.rb:3 - 2. `type.renamed_from "Widget2"` at schema.rb:6 - 3. `type.deleted_field "name"` at schema.rb:7 - 4. `field.renamed_from "old_description"` at schema.rb:9 - 5. `type.renamed_from "Widget3"` at schema.rb:11 - - Dumped schema artifact to `config/schema/artifacts/datastore_config.yaml`. - EOS - end - - it "gives a clear error if excess `deleted_*` or `renamed_from` calls create a conflict" do - ::File.write("schema.rb", <<~EOS) - ElasticGraph.define_schema do |schema| - schema.json_schema_version 1 - schema.deleted_type "Widget" - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.index "widgets" - - t.field "token", "ID" do |f| - f.renamed_from "id" - end - t.deleted_field "id" - end - end - EOS - - expect { - run_rake("schema_artifacts:dump") - }.to abort_with(<<~EOS) - The schema definition of `Widget` has conflicts. To resolve the conflict, remove the unneeded definitions from the following: - - 1. `schema.deleted_type "Widget"` at schema.rb:3 - - - The schema definition of `Widget.id` has conflicts. To resolve the conflict, remove the unneeded definitions from the following: - - 1. `field.renamed_from "id"` at schema.rb:10 - 2. `type.deleted_field "id"` at schema.rb:12 - EOS - end - - it "does not allow a routing or rollover field to be deleted since we cannot index documents without values for those fields" do - ::File.write("schema.rb", <<~EOS) - ElasticGraph.define_schema do |schema| - schema.json_schema_version 1 - - schema.object_type "Embedded" do |t| - t.field "workspace_id", "ID" - t.field "created_at", "DateTime" - end - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "embedded", "Embedded" - t.index "widgets" do |i| - i.route_with "embedded.workspace_id" - i.rollover :yearly, "embedded.created_at" - end - end - end - EOS - - run_rake("schema_artifacts:dump") - - ::File.write("schema.rb", <<~EOS) - ElasticGraph.define_schema do |schema| - schema.json_schema_version 2 - - schema.object_type "Embedded" do |t| - t.field "workspace_id2", "ID", name_in_index: "workspace_id" - t.deleted_field "workspace_id" - - t.field "created_at2", "DateTime", name_in_index: "created_at" - t.deleted_field "created_at" - end - - schema.object_type "Widget" do |t| - t.field "id", "ID" - t.field "embedded", "Embedded" - t.index "widgets" do |i| - i.route_with "embedded.workspace_id2" - i.rollover :yearly, "embedded.created_at2" - end - end - end - EOS - - expect { run_rake("schema_artifacts:dump") }.to abort_with(<<~EOS) - JSON schema version 1 has no field that maps to the routing field path of `Widget.embedded.workspace_id`. - Since the field path is required for routing, ElasticGraph cannot ingest events that lack it. To continue, do one of the following: - - 1. If the `Widget.embedded.workspace_id` field has been renamed, indicate this by calling `field.renamed_from "workspace_id"` on the renamed field rather than using `deleted_field`. - 2. Alternately, if no publishers or in-flight events use JSON schema version 1, delete its file from `json_schemas_by_version`, and no further changes are required. - - - JSON schema version 1 has no field that maps to the rollover field path of `Widget.embedded.created_at`. - Since the field path is required for rollover, ElasticGraph cannot ingest events that lack it. To continue, do one of the following: - - 1. If the `Widget.embedded.created_at` field has been renamed, indicate this by calling `field.renamed_from "created_at"` on the renamed field rather than using `deleted_field`. - 2. Alternately, if no publishers or in-flight events use JSON schema version 1, delete its file from `json_schemas_by_version`, and no further changes are required. - EOS - end - it "does not change the formatting of the dumped artifacts in unexpected ways" do config_dir = File.join(CommonSpecHelpers::REPO_ROOT, "config") - run_rake("schema_artifacts:dump", path_to_schema: File.join(config_dir, "schema.rb"), include_extension_module: false) + run_rake( + "schema_artifacts:dump", + path_to_schema: File.join(config_dir, "schema.rb"), + include_extension_module: false, + extension_modules: [JSONIngestion::SchemaDefinition::APIExtension] + ) # :nocov: -- some branches below depend on pass vs fail or local vs CI. # Exclude `data_warehouse.yaml` from the diff since it's generated by the warehouse extension, @@ -802,13 +318,13 @@ module SchemaDefinition end it "retains `extend schema` in the dumped SDL if ElasticGraph includes it in the generated SDL string" do - write_elastic_graph_schema_def_code(json_schema_version: 1, extra_sdl: "") + write_elastic_graph_schema_def_code(extra_sdl: "") run_rake("schema_artifacts:dump") # `extend` should not be added by default... expect(read_artifact(GRAPHQL_SCHEMA_FILE)).not_to include("extend") - write_elastic_graph_schema_def_code(json_schema_version: 1, extra_sdl: <<~EOS) + write_elastic_graph_schema_def_code(extra_sdl: <<~EOS) extend schema @customDirective @@ -906,16 +422,7 @@ def expect_successful_run_of(*shell_commands) }.to output(/Your Gemfile lists/).to_stderr_from_any_process end - let(:json_schema_version_setter_location_regex) do - # In `write_elastic_graph_schema_def_code` `json_schema_version` is called on the 7th line of - # the file written to `schema.rb` (after the 5-line double-load guard). See below. - # - # Note: on Ruby 3.3, the path here winds up being slightly different; instead of just `schema.rb` it is something like: - # `../d20240216-23551-cvdjzo/schema.rb`. I think it's related to the temp directory we run these specs within. - /line 7 at `(\S*\/?)schema\.rb`/ - end - - def write_elastic_graph_schema_def_code(json_schema_version:, component_suffix: "", extra_sdl: "", component_name_extras: "", component_extras: "", omit_component_name_field: false, enforce_json_schema_version: true) + def write_elastic_graph_schema_def_code(component_suffix: "", extra_sdl: "", component_extras: "", number_of_shards: 5) code = <<~EOS Thread.current[:eg_schema_load_count] = (Thread.current[:eg_schema_load_count] || 0) + 1 if Thread.current[:eg_schema_load_count] > 1 @@ -923,8 +430,6 @@ def write_elastic_graph_schema_def_code(json_schema_version:, component_suffix: end ElasticGraph.define_schema do |schema| - schema.json_schema_version #{json_schema_version} - #{"schema.enforce_json_schema_version false" unless enforce_json_schema_version} schema.enum_type "Size" do |t| t.values "SMALL", "MEDIUM", "LAGE" end @@ -955,9 +460,9 @@ def write_elastic_graph_schema_def_code(json_schema_version:, component_suffix: schema.object_type "Component#{component_suffix}" do |t| t.field "id", "ID!" - #{%(t.field "name", "String!"#{component_name_extras}) unless omit_component_name_field} + t.field "name", "String!" t.field "designer_id", "ID" - t.index "components#{component_suffix}", number_of_shards: 5 + t.index "components#{component_suffix}", number_of_shards: #{number_of_shards} t.derive_indexed_type_fields "ComponentDesigner#{component_suffix}", from_id: "designer_id" do |derive| derive.append_only_set "designed_component_names", from: "name" @@ -980,9 +485,6 @@ def runtime_metadata_for_elastic_graph_schema_def_code(include_date_time_fields: end ElasticGraph.define_schema do |schema| - schema.json_schema_version 1 - schema.enforce_json_schema_version false - schema.object_type "MyType" do |t| t.field "id", "ID!" #{'t.field "timestamp", "DateTime"' if include_date_time_fields} @@ -1003,7 +505,7 @@ def expect_up_to_date_artifacts output = run_rake("schema_artifacts:check") }.not_to raise_error - expect(output).to include(DATASTORE_CONFIG_FILE, JSON_SCHEMAS_FILE, "up to date") + expect(output).to include(DATASTORE_CONFIG_FILE, RUNTIME_METADATA_FILE, "up to date") end def expect_all_artifacts_out_of_date_because_they_havent_been_dumped @@ -1011,25 +513,11 @@ def expect_all_artifacts_out_of_date_because_they_havent_been_dumped run_rake("schema_artifacts:check") }.to abort_with { |error| expect(error.message).to eq(<<~EOS.strip) - 5 schema artifact(s) are out of date. Run `bundle exec rake schema_artifacts:dump` to update the following artifact(s): + 3 schema artifact(s) are out of date. Run `bundle exec rake schema_artifacts:dump` to update the following artifact(s): 1. config/schema/artifacts/datastore_config.yaml (file does not exist) - 2. config/schema/artifacts/json_schemas.yaml (file does not exist) - 3. config/schema/artifacts/json_schemas_by_version/v1.yaml (file does not exist) - 4. config/schema/artifacts/runtime_metadata.yaml (file does not exist) - 5. config/schema/artifacts/schema.graphql (file does not exist) - EOS - } - end - - def expect_missing_versioned_json_schema_artifact(version_file) - expect { - run_rake("schema_artifacts:check") - }.to abort_with { |error| - expect(error.message).to eq(<<~EOS.strip) - 1 schema artifact(s) are out of date. Run `bundle exec rake schema_artifacts:dump` to update the following artifact(s): - - 1. config/schema/artifacts/json_schemas_by_version/#{version_file} (file does not exist) + 2. config/schema/artifacts/runtime_metadata.yaml (file does not exist) + 3. config/schema/artifacts/schema.graphql (file does not exist) EOS } end @@ -1038,39 +526,22 @@ def expect_out_of_date_artifacts_with_details(example_diff, test_color: false) expect { run_rake("schema_artifacts:check", pretend_tty: test_color) }.to abort_with { |error| - expect(error.message.lines.first(8).join).to eq(<<~EOS) - 6 schema artifact(s) are out of date. Run `bundle exec rake schema_artifacts:dump` to update the following artifact(s): + expect(error.message.lines.first(5).join).to eq(<<~EOS) + 3 schema artifact(s) are out of date. Run `bundle exec rake schema_artifacts:dump` to update the following artifact(s): 1. config/schema/artifacts/datastore_config.yaml (see [1] below for the first 50 lines of the diff) - 2. config/schema/artifacts/json_schemas.yaml (see [2] below for the first 50 lines of the diff) - 3. config/schema/artifacts/json_schemas_by_version/v1.yaml (see [3] below for the diff) - 4. config/schema/artifacts/json_schemas_by_version/v2.yaml (file does not exist) - 5. config/schema/artifacts/runtime_metadata.yaml (see [4] below for the first 50 lines of the diff) - 6. config/schema/artifacts/schema.graphql (see [5] below for the first 50 lines of the diff) + 2. config/schema/artifacts/runtime_metadata.yaml (see [2] below for the first 50 lines of the diff) + 3. config/schema/artifacts/schema.graphql (see [3] below for the first 50 lines of the diff) EOS expect(error.message).to include(example_diff) } end - def expect_out_of_date_artifacts - expect { - run_rake("schema_artifacts:check") - }.to abort_with a_string_including("out of date", DATASTORE_CONFIG_FILE, JSON_SCHEMAS_FILE) - end - def read_artifact(name) path = File.join("config", "schema", "artifacts", name) File.exist?(path) && File.read(path) end - - def delete_artifact(*name_parts) - ::File.delete(::File.join("config", "schema", "artifacts", *name_parts)) - end - - def versioned_json_schema_file(version) - ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v#{version}.yaml") - end end def run_rake( @@ -1078,6 +549,7 @@ def run_rake( pretend_tty: false, path_to_schema: "schema.rb", include_extension_module: true, + extension_modules: [], derived_type_name_formats: {}, type_name_overrides: {}, enum_value_overrides_by_type: {} @@ -1101,7 +573,7 @@ def as_active_instance index_document_sizes: true, path_to_schema: path_to_schema, schema_artifacts_directory: "config/schema/artifacts", - extension_modules: [JSONIngestion::SchemaDefinition::APIExtension, extension_module].compact, + extension_modules: extension_modules + [extension_module].compact, derived_type_name_formats: derived_type_name_formats, type_name_overrides: type_name_overrides, enum_value_overrides_by_type: enum_value_overrides_by_type, @@ -1110,15 +582,6 @@ def as_active_instance end end - def json_schema_for_keyword_type(type, extras = {}) - { - "allOf" => [ - {"$ref" => "#/$defs/#{type}"}, - {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH} - ] - }.merge(extras) - end - def enum_types_in_dumped_graphql_schema ::GraphQL::Schema.from_definition(read_artifact(GRAPHQL_SCHEMA_FILE)).types.filter_map do |name, type| name if type.kind.enum? && !name.start_with?("__") diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_definition_spec_support.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_definition_spec_support.rb index 2d574983f..d8a98eae2 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_definition_spec_support.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_definition_spec_support.rb @@ -17,6 +17,7 @@ def build_datastore_config(index_document_sizes: false, schema_element_name_form define_schema( index_document_sizes: index_document_sizes, schema_element_name_form: schema_element_name_form, + extension_modules: [], &schema_definition ).datastore_config end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/mapping_customizations_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/mapping_customizations_spec.rb index 9f5392bd9..1cc6aebf4 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/mapping_customizations_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/mapping_customizations_spec.rb @@ -16,7 +16,6 @@ module SchemaDefinition it "respects `mapping` customizations set on a field definition, allowing them to augment or replace the mapping of the base type" do mapping = index_mapping_for "my_type" do |s| s.scalar_type "MyText" do |t| - t.json_schema type: "string" t.mapping type: "text" end @@ -221,7 +220,6 @@ module SchemaDefinition end s.scalar_type "MyText" do |t| - t.json_schema type: "string" t.mapping type: "text" end end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/define_schema_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/define_schema_spec.rb index e6cf32482..7b2965845 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/define_schema_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/define_schema_spec.rb @@ -7,6 +7,7 @@ # frozen_string_literal: true require "elastic_graph/errors" +require "elastic_graph/json_ingestion/schema_definition/api_extension" require "elastic_graph/spec_support/have_readable_to_s_and_inspect_output" require_relative "graphql_schema_spec_support" @@ -81,6 +82,49 @@ module SchemaDefinition ) end + it "rejects type names that have been registered as reserved (as schema definition extensions do)" do + expect { + define_schema do |schema| + schema.state.reserved_type_names << "SomeReservedName" + schema.object_type "SomeReservedName" + end + }.to raise_error Errors::SchemaError, a_string_including( + "`SomeReservedName` cannot be used as a schema type", + "reserved name" + ) + end + + it "allows test schemas to skip JSON schema version setup" do + result = define_schema(json_schema_version: nil) do |schema| + schema.object_type("Widget") do |t| + t.field "id", "ID" + end + end + + expect(type_def_from(result, "Widget")).to eq(<<~EOS.strip) + type Widget { + id: ID + } + EOS + end + + it "allows test schemas to set the JSON schema version themselves" do + # If the test support logic re-set the version it would fail with a "can only be set once" error. + result = define_schema(extension_modules: [JSONIngestion::SchemaDefinition::APIExtension]) do |schema| + schema.json_schema_version 7 + + schema.object_type("Widget") do |t| + t.field "id", "ID" + end + end + + expect(type_def_from(result, "Widget")).to eq(<<~EOS.strip) + type Widget { + id: ID + } + EOS + end + it "produces the same GraphQL output, regardless of the order the types are defined in" do object_type_definitions = { "Component" => lambda do |t| @@ -142,7 +186,6 @@ module SchemaDefinition schema.scalar_type "MyScalar" do |t| expect(t).to have_readable_to_s_and_inspect_output.including("MyScalar") t.mapping type: "keyword" - t.json_schema type: "string" end schema.enum_type "Color" do |t| diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/derived_graphql_type_customizations_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/derived_graphql_type_customizations_spec.rb index f280fbced..7d93460de 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/derived_graphql_type_customizations_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/derived_graphql_type_customizations_spec.rb @@ -170,7 +170,6 @@ module SchemaDefinition api.raw_sdl "directive @external on OBJECT" api.scalar_type "Url" do |t| - t.json_schema type: "string" t.mapping type: "keyword" t.customize_derived_types "UrlEdge", "UrlConnection" do |dt| diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/graphql_schema_spec_support.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/graphql_schema_spec_support.rb index bca5064cc..2c393cafc 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/graphql_schema_spec_support.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/graphql_schema_spec_support.rb @@ -35,7 +35,7 @@ def raise_invalid_graphql_name_error_for(name) end def define_schema(**options, &block) - define_schema_with_schema_elements(schema_elements, **options, &block).graphql_schema_string + define_schema_with_schema_elements(schema_elements, extension_modules: [], **options, &block).graphql_schema_string end def correctly_cased(name) diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/highlights_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/highlights_spec.rb index 175879824..117bb5be2 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/highlights_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/highlights_spec.rb @@ -59,12 +59,10 @@ module SchemaDefinition end api.scalar_type "CustomNumber" do |t| - t.json_schema type: "number" t.mapping type: "integer" end api.scalar_type "CustomKeyword" do |t| - t.json_schema type: "string" t.mapping type: "keyword" end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/implements_shared_examples.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/implements_shared_examples.rb index 14da85267..dde06ab27 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/implements_shared_examples.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/implements_shared_examples.rb @@ -258,31 +258,6 @@ def implemented_interfaces_from(sdl, type_name) EOS end - it "does not care if the interface and object fields have different JSON schema" do - result = define_schema do |schema| - schema.public_send ruby_definition_method, "Thing" do |t| - t.implements "HasID" - t.field "id", "ID!" do |f| - f.json_schema maxLength: 40 - end - t.field "name", "String" - end - - schema.interface_type "HasID" do |t| - t.field "id", "ID!" do |f| - f.json_schema maxLength: 30 - end - end - end - - expect(type_def_from(result, "Thing")).to eq(<<~EOS.strip) - #{graphql_definition_keyword} Thing implements HasID { - id: ID! - name: String - } - EOS - end - it "does not care if the interface and object fields have different index mappings" do result = define_schema do |schema| schema.public_send ruby_definition_method, "Thing" do |t| diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/object_type_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/object_type_spec.rb index ae7f85e21..18186e5f9 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/object_type_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/object_type_spec.rb @@ -674,9 +674,7 @@ module SchemaDefinition result = object_type "Widget", include_docs: true do |t| t.documentation "A widget." t.field "id", "ID" - t.field "user_id", "ID" do |f| - f.json_schema nullable: false - end + t.field "user_id", "ID" t.index "widgets" do |i| i.route_with "user_id" diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/scalar_type_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/scalar_type_spec.rb index e8db9a3df..798bddf1d 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/scalar_type_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/scalar_type_spec.rb @@ -17,7 +17,6 @@ module SchemaDefinition it "generates the SDL for a custom scalar type" do result = scalar_type "BigInt" do |t| t.mapping type: "long" - t.json_schema type: "integer" end expect(type_def_from(result, "BigInt")).to eq(<<~EOS.strip) @@ -28,23 +27,14 @@ module SchemaDefinition it "requires the `mapping` to be specified so we know how to index it in the datastore" do expect { scalar_type "BigInt" do |t| - t.json_schema type: "integer" + # Intentionally not calling `t.mapping`. end }.to raise_error Errors::SchemaError, a_string_including("BigInt", "lacks `mapping`") end - it "requires the `json_schema` to be specified so we know how it should be encoded in an ingested event" do - expect { - scalar_type "BigInt" do |t| - t.mapping type: "long" - end - }.to raise_error Errors::SchemaError, a_string_including("BigInt", "lacks `json_schema`") - end - it "requires a `type` be specified on the `mapping` since we can't guess what the mapping type should be" do expect { scalar_type "BigInt" do |t| - t.json_schema type: "integer" t.mapping null_value: 0 end }.to raise_error Errors::SchemaError, a_string_including("BigInt", "mapping", "type:") @@ -58,7 +48,6 @@ module SchemaDefinition schema.scalar_type "BigInt" do |t| t.mapping type: "long" - t.json_schema type: "integer" end end @@ -79,7 +68,6 @@ module SchemaDefinition schema.scalar_type "BigInt" do |t| t.mapping type: "long" - t.json_schema type: "integer" t.directive "meta", since_date: "2021-08-01" t.directive "meta", author: "John" end @@ -93,7 +81,6 @@ module SchemaDefinition it "allows documentation to be defined on the scalar" do result = scalar_type "BigInt" do |t| t.mapping type: "long" - t.json_schema type: "integer" t.documentation "A number that exceeds the normal `Int` max." end @@ -108,7 +95,6 @@ module SchemaDefinition it "defines a filter type with `any_of` and `equal_to_any_of` for a mapping type that can't efficiently support range queries" do result = scalar_type "FullText" do |t| t.mapping type: "text" - t.json_schema type: "string" end expect(filter_type_from(result, "FullText")).to eq(<<~EOS.strip) @@ -124,7 +110,6 @@ module SchemaDefinition it "defines a filter type with `any_of`, `equal_to_any_of`, and comparison operators for a numeric mapping type that can efficiently support range queries" do result = scalar_type "Short" do |t| t.mapping type: "short" - t.json_schema type: "integer" end expect(filter_type_from(result, "Short")).to eq(<<~EOS.strip) @@ -144,7 +129,6 @@ module SchemaDefinition it "defines a filter type with `any_of`, `equal_to_any_of`, and comparison operators for a date mapping type that can efficiently support range queries" do result = scalar_type "CalendarDate" do |t| t.mapping type: "date" - t.json_schema type: "string" end expect(filter_type_from(result, "CalendarDate")).to eq(<<~EOS.strip) @@ -164,7 +148,6 @@ module SchemaDefinition it "defines a `*ListFilterInput` type so that lists of the custom scalar type can be filtered on" do result = scalar_type "Short" do |t| t.mapping type: "short" - t.json_schema type: "integer" end expect(list_filter_type_from(result, "Short", include_docs: true)).to eq(<<~EOS.strip) @@ -218,7 +201,6 @@ module SchemaDefinition it "documents each filter field" do result = scalar_type "Byte" do |t| t.mapping type: "byte" - t.json_schema type: "integer" end expect(filter_type_from(result, "Byte", include_docs: true)).to eq(<<~EOS.strip) diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/index_definitions_by_name_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/index_definitions_by_name_spec.rb index 1835088ba..3be37da65 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/index_definitions_by_name_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/index_definitions_by_name_spec.rb @@ -41,6 +41,28 @@ module SchemaDefinition end end + it "rejects `route_with` fields that resolve to a non-leaf type" do + expect { + index_definition_metadata_for("widgets") do |i| + i.route_with "nested_fields_gql" + end + }.to raise_error Errors::SchemaError, a_string_including( + "shard routing field `MyType.nested_fields_gql", + "is not a leaf field" + ) + end + + it "includes nested field-path guidance for unresolved nested `route_with` fields" do + expect { + index_definition_metadata_for("widgets") do |i| + i.route_with "nested_fields_gql.missing" + end + }.to raise_error Errors::SchemaError, a_string_including( + "Field `MyType.nested_fields_gql.missing` cannot be resolved", + "Verify that all fields and types referenced by `nested_fields_gql.missing` are defined." + ) + end + it "defaults `route_with` to `id` because that's the default routing the datastore uses" do components = index_definition_metadata_for("components") expect(components.route_with).to eq "id" @@ -59,6 +81,28 @@ module SchemaDefinition expect(components.rollover).to eq nil end + it "rejects rollover fields that are not date or datetime fields" do + expect { + index_definition_metadata_for("widgets") do |i| + i.rollover :monthly, "group_id" + end + }.to raise_error Errors::SchemaError, a_string_including( + "rollover field `MyType.group_id", + "is not a `Date` or `DateTime` field" + ) + end + + it "rejects rollover fields that are lists" do + expect { + index_definition_metadata_for("widgets", on_my_type: ->(t) { t.field "timestamps", "[DateTime!]!" }) do |i| + i.rollover :monthly, "timestamps" + end + }.to raise_error Errors::SchemaError, a_string_including( + "rollover field `MyType.timestamps", + "is a list field" + ) + end + it "dumps the `rollover` timestamp field's `name_in_index`" do widgets = index_definition_metadata_for("widgets") do |i| i.rollover :monthly, "created_at_gql" diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/object_types_by_name/graphql_fields_by_name_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/object_types_by_name/graphql_fields_by_name_spec.rb index 7844ba422..2997bf7d3 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/object_types_by_name/graphql_fields_by_name_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/object_types_by_name/graphql_fields_by_name_spec.rb @@ -243,7 +243,6 @@ module SchemaDefinition schema.scalar_type "Duration" do |t| t.mapping type: "keyword" - t.json_schema type: "string" end schema.object_type "Options" do |t| diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/object_types_by_name/pruning_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/object_types_by_name/pruning_spec.rb index ba53dd6d5..522fe1b91 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/object_types_by_name/pruning_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/object_types_by_name/pruning_spec.rb @@ -113,7 +113,6 @@ module SchemaDefinition s.scalar_type "CustomFrameworkScalar" do |t| t.mapping type: "keyword" - t.json_schema type: "string" t.graphql_only true end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/runtime_metadata_support.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/runtime_metadata_support.rb index b0dcbdc1d..c4c8bfb4e 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/runtime_metadata_support.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/runtime_metadata_support.rb @@ -18,7 +18,7 @@ module SchemaDefinition def define_schema(**options, &block) super( schema_element_name_form: "snake_case", - extension_modules: [JSONIngestion::SchemaDefinition::APIExtension], + extension_modules: [], **options, &block ) diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/scalar_types_by_name_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/scalar_types_by_name_spec.rb index 19c9707dd..791955919 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/scalar_types_by_name_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/scalar_types_by_name_spec.rb @@ -21,14 +21,13 @@ module SchemaDefinition metadata = scalar_type_metadata_for "BigInt" do |s| s.scalar_type "BigInt" do |t| t.mapping type: "long" - t.json_schema type: "integer" - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" + t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "elastic_graph/spec_support/example_extensions/scalar_coercion_adapter" end end expect(metadata).to eq scalar_type_with(coercion_adapter_ref: { "name" => "ExampleScalarCoercionAdapter", - "require_path" => "support/example_extensions/scalar_coercion_adapter" + "require_path" => "elastic_graph/spec_support/example_extensions/scalar_coercion_adapter" }) end @@ -36,7 +35,6 @@ module SchemaDefinition metadata = scalar_type_metadata_for "BigInt" do |s| s.scalar_type "BigInt" do |t| t.mapping type: "long" - t.json_schema type: "integer" t.prepare_for_indexing_with "ExampleIndexingPreparer", defined_at: "support/example_extensions/indexing_preparer" end end @@ -51,10 +49,9 @@ module SchemaDefinition define_schema do |s| s.scalar_type "BigInt" do |t| t.mapping type: "long" - t.json_schema type: "integer" expect { - t.coerce_with "NotAValidConstant", defined_at: "support/example_extensions/scalar_coercion_adapter" + t.coerce_with "NotAValidConstant", defined_at: "elastic_graph/spec_support/example_extensions/scalar_coercion_adapter" }.to raise_error NameError, a_string_including("NotAValidConstant") end end @@ -64,7 +61,6 @@ module SchemaDefinition define_schema do |s| s.scalar_type "BigInt" do |t| t.mapping type: "long" - t.json_schema type: "integer" expect { t.prepare_for_indexing_with "NotAValidConstant", defined_at: "support/example_extensions/indexing_preparer" @@ -83,20 +79,20 @@ module SchemaDefinition metadata = scalar_type_metadata_for "Int" do |s| s.on_built_in_types do |t| if t.is_a?(SchemaElements::ScalarType) - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" + t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "elastic_graph/spec_support/example_extensions/scalar_coercion_adapter" end end end expect(metadata.coercion_adapter_ref).to eq({ "name" => "ExampleScalarCoercionAdapter", - "require_path" => "support/example_extensions/scalar_coercion_adapter" + "require_path" => "elastic_graph/spec_support/example_extensions/scalar_coercion_adapter" }) end describe "`grouping_missing_value_placeholder`" do it "can be set to a number" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer") do |t| + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long") do |t| t.grouping_missing_value_placeholder(-1) end @@ -104,7 +100,7 @@ module SchemaDefinition end it "can be set to a string" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("keyword", type: "string") do |t| + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("keyword") do |t| t.grouping_missing_value_placeholder "missing" end @@ -112,10 +108,10 @@ module SchemaDefinition end it "does not infer placeholder when placeholder is set to nil" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("keyword", type: "string") + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("keyword") expect(grouping_missing_value_placeholder).not_to be_nil - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("keyword", type: "string") do |t| + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("keyword") do |t| t.grouping_missing_value_placeholder nil end expect(grouping_missing_value_placeholder).to be_nil @@ -123,7 +119,7 @@ module SchemaDefinition it "raises an error when placeholder is not a String, Numeric, or nil" do expect { - grouping_missing_value_placeholder_for("keyword", type: "string") do |t| + grouping_missing_value_placeholder_for("keyword") do |t| t.grouping_missing_value_placeholder :symbol end }.to raise_error Errors::SchemaError, a_string_including( @@ -134,7 +130,7 @@ module SchemaDefinition it "raises an error when placeholder is an array" do expect { - grouping_missing_value_placeholder_for("keyword", type: "string") do |t| + grouping_missing_value_placeholder_for("keyword") do |t| t.grouping_missing_value_placeholder ["invalid"] end }.to raise_error Errors::SchemaError, a_string_including( @@ -145,7 +141,7 @@ module SchemaDefinition it "raises an error when placeholder is a hash" do expect { - grouping_missing_value_placeholder_for("keyword", type: "string") do |t| + grouping_missing_value_placeholder_for("keyword") do |t| t.grouping_missing_value_placeholder({key: "value"}) end }.to raise_error Errors::SchemaError, a_string_including( @@ -155,7 +151,7 @@ module SchemaDefinition end it "accepts integer values" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer") do |t| + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long") do |t| t.grouping_missing_value_placeholder 42 end @@ -163,7 +159,7 @@ module SchemaDefinition end it "accepts float values" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("double", type: "number") do |t| + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("double") do |t| t.grouping_missing_value_placeholder 3.14 end @@ -172,7 +168,7 @@ module SchemaDefinition float_types.each do |float_type| it "infers 'NaN' for float type #{float_type}" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for(float_type, type: "number") + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for(float_type) expect(grouping_missing_value_placeholder).to eq(MISSING_NUMERIC_PLACEHOLDER) end @@ -180,7 +176,7 @@ module SchemaDefinition string_types.each do |string_type| it "infers secure random string for string type #{string_type}" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for(string_type, type: "string") + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for(string_type) expect(grouping_missing_value_placeholder).to eq(MISSING_STRING_PLACEHOLDER) end @@ -188,136 +184,36 @@ module SchemaDefinition integer_types.grep_v(/long/).each do |int_type| it "does not infer placeholder for safe integer type #{int_type} with default coercion adapter" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for(int_type, type: "integer") + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for(int_type) expect(grouping_missing_value_placeholder).to be_nil end it "infers 'NaN' for safe integer type #{int_type} with custom coercion adapter" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for(int_type, type: "integer") do |t| - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for(int_type) do |t| + t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "elastic_graph/spec_support/example_extensions/scalar_coercion_adapter" end expect(grouping_missing_value_placeholder).to eq(MISSING_NUMERIC_PLACEHOLDER) end end - it "does not infer placeholder for long types with JSON-safe min/max range and default coercion adapter" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer", minimum: JSON_SAFE_LONG_MIN, maximum: JSON_SAFE_LONG_MAX) - - expect(grouping_missing_value_placeholder).to be_nil - end - - it "infers 'NaN' for long types with JSON-safe min/max range and custom coercion adapter" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer", minimum: JSON_SAFE_LONG_MIN, maximum: JSON_SAFE_LONG_MAX) do |t| - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" - end - - expect(grouping_missing_value_placeholder).to eq(MISSING_NUMERIC_PLACEHOLDER) - end - - it "does not infer a value for long types with max too large" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer", minimum: -(2**53) + 1, maximum: (2**60) - 1) do |t| - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" + it "does not infer a placeholder for `long` types since core ElasticGraph cannot know their range is float-safe" do + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long") do |t| + t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "elastic_graph/spec_support/example_extensions/scalar_coercion_adapter" end expect(grouping_missing_value_placeholder).to be_nil end - it "does not infer placeholder for long types with min too small" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer", minimum: -(2**60), maximum: (2**53) - 1) do |t| - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" + it "does not infer a placeholder for `unsigned_long` types since core ElasticGraph cannot know their range is float-safe" do + grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("unsigned_long") do |t| + t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "elastic_graph/spec_support/example_extensions/scalar_coercion_adapter" end expect(grouping_missing_value_placeholder).to be_nil end - it "does not infer placeholder for long types with only minimum specified" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer", minimum: 0) do |t| - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" - end - - expect(grouping_missing_value_placeholder).to be_nil - end - - it "does not infer placeholder for long types with only maximum specified" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer", maximum: 1000) do |t| - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" - end - - expect(grouping_missing_value_placeholder).to be_nil - end - - it "does not infer placeholder for long types without min/max specified" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer") do |t| - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" - end - - expect(grouping_missing_value_placeholder).to be_nil - end - - it "does not infer placeholder for unsigned_long types with safe maximum and default coercion adapter" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("unsigned_long", type: "integer", maximum: (2**53) - 1) - - expect(grouping_missing_value_placeholder).to be_nil - end - - it "infers 'NaN' for unsigned_long types with safe maximum and custom coercion adapter" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("unsigned_long", type: "integer", maximum: (2**53) - 1) do |t| - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" - end - - expect(grouping_missing_value_placeholder).to eq(MISSING_NUMERIC_PLACEHOLDER) - end - - it "does not infer placeholder for unsigned_long types with unsafe maximum" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("unsigned_long", type: "integer", maximum: (2**60) - 1) do |t| - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" - end - - expect(grouping_missing_value_placeholder).to be_nil - end - - it "does not infer placeholder for unsigned_long types without maximum specified" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("unsigned_long", type: "integer") do |t| - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" - end - - expect(grouping_missing_value_placeholder).to be_nil - end - - describe "boundary conditions for JSON-safe long ranges" do - it "does not infer placeholder when exactly at safe boundaries with default coercion adapter" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer", minimum: JSON_SAFE_LONG_MIN, maximum: JSON_SAFE_LONG_MAX) - - expect(grouping_missing_value_placeholder).to be_nil - end - - it "infers 'NaN' when exactly at safe boundaries with custom coercion adapter" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer", minimum: JSON_SAFE_LONG_MIN, maximum: JSON_SAFE_LONG_MAX) do |t| - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" - end - - expect(grouping_missing_value_placeholder).to eq(MISSING_NUMERIC_PLACEHOLDER) - end - - it "does not infer placeholder when minimum is one below safe range" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer", minimum: JSON_SAFE_LONG_MIN - 1, maximum: JSON_SAFE_LONG_MAX) do |t| - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" - end - - expect(grouping_missing_value_placeholder).to be_nil - end - - it "does not infer placeholder when maximum is one above safe range" do - grouping_missing_value_placeholder = grouping_missing_value_placeholder_for("long", type: "integer", minimum: JSON_SAFE_LONG_MIN, maximum: JSON_SAFE_LONG_MAX + 1) do |t| - t.coerce_with "ExampleScalarCoercionAdapter", defined_at: "support/example_extensions/scalar_coercion_adapter" - end - - expect(grouping_missing_value_placeholder).to be_nil - end - end - it "has expected value for all built-in scalar types" do results = define_schema built_in_scalars = results.state.scalar_types_by_name.keys @@ -335,20 +231,19 @@ module SchemaDefinition "Float" => MISSING_NUMERIC_PLACEHOLDER, "ID" => MISSING_STRING_PLACEHOLDER, "Int" => MISSING_NUMERIC_PLACEHOLDER, # GraphQL automatically coerces Int values - "JsonSafeLong" => MISSING_NUMERIC_PLACEHOLDER, # custom coercion adapter coerces floats back to integers + "JsonSafeLong" => nil, # the safe range is only known to JSON ingestion (which infers a placeholder) "LocalTime" => nil, - "LongString" => nil, # outside of the JSON safe range. + "LongString" => nil, # same as JsonSafeLong (and its range is outside the JSON safe range anyway) "String" => MISSING_STRING_PLACEHOLDER, "TimeZone" => MISSING_STRING_PLACEHOLDER, "Untyped" => MISSING_STRING_PLACEHOLDER }) end - def grouping_missing_value_placeholder_for(mapping_type, **json_schema) + def grouping_missing_value_placeholder_for(mapping_type) metadata = scalar_type_metadata_for "CustomScalar" do |s| s.scalar_type "CustomScalar" do |t| t.mapping type: mapping_type - t.json_schema(**json_schema) yield t if block_given? end end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/schema_elements/deprecated_element_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/schema_elements/deprecated_element_spec.rb new file mode 100644 index 000000000..418279df4 --- /dev/null +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/schema_elements/deprecated_element_spec.rb @@ -0,0 +1,53 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/spec_support/schema_definition_helpers" + +module ElasticGraph + module SchemaDefinition + module SchemaElements + RSpec.describe DeprecatedElement do + include_context "SchemaDefinitionHelpers" + + it "records `deleted_type`, `deleted_field`, and `renamed_from` calls so that schema artifact tooling can consume them" do + state = define_schema(schema_element_name_form: "snake_case", extension_modules: []) do |schema| + schema.deleted_type "OldType" + + schema.object_type "Widget" do |t| + t.renamed_from "OldWidget" + t.deleted_field "legacy_name" + + t.field "id", "ID!" + t.field "name", "String" do |f| + f.renamed_from "old_name" + end + end + end.state + + expect(state.deleted_types_by_old_name.keys).to eq ["OldType"] + expect(state.renamed_types_by_old_name.keys).to eq ["OldWidget"] + expect(state.deleted_fields_by_type_name_and_old_field_name.fetch("Widget").keys).to eq ["legacy_name"] + expect(state.renamed_fields_by_type_name_and_old_field_name.fetch("Widget").keys).to eq ["old_name"] + + expect(state.deleted_types_by_old_name.fetch("OldType").description).to match( + /\A`schema\.deleted_type "OldType"` at .+:\d+\z/ + ) + expect(state.renamed_types_by_old_name.fetch("OldWidget").description).to match( + /\A`type\.renamed_from "OldWidget"` at .+:\d+\z/ + ) + expect(state.deleted_fields_by_type_name_and_old_field_name.fetch("Widget").fetch("legacy_name").description).to match( + /\A`type\.deleted_field "legacy_name"` at .+:\d+\z/ + ) + expect(state.renamed_fields_by_type_name_and_old_field_name.fetch("Widget").fetch("old_name").description).to match( + /\A`field\.renamed_from "old_name"` at .+:\d+\z/ + ) + end + end + end + end +end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/schema_elements/field_path_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/schema_elements/field_path_spec.rb index 7e3cb11dc..4c38deba3 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/schema_elements/field_path_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/schema_elements/field_path_spec.rb @@ -15,8 +15,7 @@ module SchemaElements class FieldPath RSpec.describe Resolver do it "can only be created after the user definition is complete, to avoid problems" do - schema_elements = SchemaArtifacts::RuntimeMetadata::SchemaElementNames.new(form: "snake_case") - api = API.new(schema_elements, true) + api = build_api expect { Resolver.new(api.state) @@ -28,6 +27,32 @@ class FieldPath expect(Resolver.new(api.state)).to be_a Resolver end + + it "describes resolved paths using the parent type name and the `name_in_index` of each part" do + api = build_api + + api.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "cost", "Money" + t.index "widgets" + end + + api.object_type "Money" do |t| + t.field "amount", "Int", name_in_index: "amount_in_index" + end + + api.results # signals the definition is complete + + widget_type = api.state.object_types_by_name.fetch("Widget") + path = Resolver.new(api.state).resolve_public_path(widget_type, "cost.amount") { |field| true } + + expect(path.fully_qualified_path_in_index).to eq("Widget.cost.amount_in_index") + end + + def build_api + schema_elements = SchemaArtifacts::RuntimeMetadata::SchemaElementNames.new(form: "snake_case") + API.new(schema_elements, true) + end end end end diff --git a/spec_support/lib/elastic_graph/spec_support/enable_simplecov.rb b/spec_support/lib/elastic_graph/spec_support/enable_simplecov.rb index a9769863a..0d87095f3 100644 --- a/spec_support/lib/elastic_graph/spec_support/enable_simplecov.rb +++ b/spec_support/lib/elastic_graph/spec_support/enable_simplecov.rb @@ -103,9 +103,6 @@ def wait_for_other_processes # status if we're not running it's test suite. add_filter "/elasticgraph-local/" unless spec_files_to_run.any? { |f| f.include?("/elasticgraph-local/") } - # The JSON ingestion gem is being introduced by extracting implementation first and moving its tests later. - add_filter "/elasticgraph-json_ingestion/" - # This version file is loaded from our gemspecs, which can get loaded by bundler before we get here. # SimpleCov is only able to track coverage of files loaded after it starts, so we need to filter them out if # their constant is already defined. They don't contain any branching statements or anything so it's ok to diff --git a/spec_support/lib/elastic_graph/spec_support/example_extensions/scalar_coercion_adapter.rb b/spec_support/lib/elastic_graph/spec_support/example_extensions/scalar_coercion_adapter.rb new file mode 100644 index 000000000..6b4d85b41 --- /dev/null +++ b/spec_support/lib/elastic_graph/spec_support/example_extensions/scalar_coercion_adapter.rb @@ -0,0 +1,20 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +# This example adapter is shared by multiple gem suites (e.g. `elasticgraph-schema_definition` +# and `elasticgraph-json_ingestion`). It must live in `spec_support` (rather than being +# duplicated under each gem's `spec/support`) so that every suite loads it from the same +# require path: the extension loader raises if the same extension is loaded from two +# different paths within one process, as can happen when one worker runs multiple suites. +class ExampleScalarCoercionAdapter + def self.coerce_input(value, ctx) + end + + def self.coerce_result(value, ctx) + end +end