From 0b041fc7771f78496fd9872991685284738246b3 Mon Sep 17 00:00:00 2001 From: Marc Daniels Date: Fri, 12 Jun 2026 13:11:47 -0400 Subject: [PATCH] Narrow queried indices based on __typename filter. When a query on an abstract type filters on `__typename`, we can skip any index that contains none of the requested concrete types. For example, querying `DistributionChannel` with `__typename: {equal_to_any_of: [PhysicalStore]}` only needs to hit `physical_stores`, not `distribution_channels`. `TypenameFilter` determines the subset of type names satisfying the `__typename` filter using full set-algebra support across `not`, `any_of`, and `all_of` combinators. `DatastoreQuery` uses this via `narrowed_search_index_definitions`, which intersects `initial_search_index_definitions` with only the indices that could contain the filtered types. Closes #1179 Generated with Claude Code --- .../lib/elastic_graph/graphql.rb | 1 + .../elastic_graph/graphql/datastore_query.rb | 32 +++++++++++++++- .../index_expression_builder.rb | 4 +- .../graphql/datastore_query/routing_picker.rb | 4 +- .../graphql/filtering/typename_filter.rb | 38 +++++++++++++++++++ .../elastic_graph/graphql/datastore_query.rbs | 3 ++ .../graphql/filtering/typename_filter.rbs | 22 +++++++++++ .../spec/acceptance/search_spec.rb | 33 ++++++++++++++++ .../graphql/datastore_query/merge_spec.rb | 23 +++++------ .../search_index_expression_spec.rb | 2 +- 10 files changed, 144 insertions(+), 18 deletions(-) create mode 100644 elasticgraph-graphql/lib/elastic_graph/graphql/filtering/typename_filter.rb create mode 100644 elasticgraph-graphql/sig/elastic_graph/graphql/filtering/typename_filter.rbs diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql.rb b/elasticgraph-graphql/lib/elastic_graph/graphql.rb index 3700c2502..995aa2419 100644 --- a/elasticgraph-graphql/lib/elastic_graph/graphql.rb +++ b/elasticgraph-graphql/lib/elastic_graph/graphql.rb @@ -124,6 +124,7 @@ def datastore_query_builder filter_node_interpreter:, runtime_metadata:, logger:, + index_definitions_by_type_name: @datastore_core.index_definitions_by_graphql_type, default_page_size: @config.default_page_size, max_page_size: @config.max_page_size ) diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query.rb index 8ea66f290..ce8279d6b 100644 --- a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query.rb +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query.rb @@ -29,6 +29,7 @@ class GraphQL class DatastoreQuery < Support::MemoizableData.define( :total_document_count_needed, :aggregations, :logger, :filter_interpreter, :routing_picker, :index_expression_builder, :default_page_size, :initial_search_index_definitions, :max_page_size, + :typename_filter, :index_definitions_by_type_name, :client_filters, :internal_filters, :sort, :document_pagination, :requested_fields, :request_all_fields, :requested_highlights, :request_all_highlights, :individual_docs_needed, :size_multiplier, :monotonic_clock_deadline, :schema_element_names @@ -39,6 +40,7 @@ class DatastoreQuery < Support::MemoizableData.define( require "elastic_graph/graphql/datastore_query/index_expression_builder" require "elastic_graph/graphql/datastore_query/paginator" require "elastic_graph/graphql/datastore_query/routing_picker" + require "elastic_graph/graphql/filtering/typename_filter" # Performs a list of queries by building a hash of datastore msearch header/body tuples (keyed # by query), yielding them to the caller, and then post-processing the results. The caller is @@ -141,13 +143,29 @@ def to_datastore_msearch_header_and_body # @!attribute [r] initial_search_index_definitions # The index definitions as provided at construction, before any subsequent adjustments. + # Returns the narrowed set of index definitions to search, based on any `__typename` filter + # in the client filters. Falls back to `initial_search_index_definitions` when there is no + # `__typename` filter or no type name mapping is available. + def narrowed_search_index_definitions + @narrowed_search_index_definitions ||= begin + filtered_type_names = typename_filter.filtered_type_names(client_filters.to_a) + + if filtered_type_names + possible_index_defs = index_definitions_by_type_name.slice(*filtered_type_names).values.flatten + initial_search_index_definitions & possible_index_defs + else + initial_search_index_definitions + end + end + end + # Returns an index_definition expression string to use for searches. This string can specify # multiple indices, use wildcards, etc. For info about what is supported, see: # https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-index.html def search_index_expression @search_index_expression ||= index_expression_builder.determine_search_index_expression( all_filters, - initial_search_index_definitions, + narrowed_search_index_definitions, # When we have aggregations, we must require indices to search. When we search no indices, the datastore does not return # the standard aggregations response structure, which causes problems. require_indices: !aggregations_datastore_body.empty? @@ -348,7 +366,7 @@ def highlight # Encapsulates dependencies of `Query`, giving us something we can expose off of `application` # to build queries when desired. - class Builder < Support::MemoizableData.define(:runtime_metadata, :logger, :filter_interpreter, :filter_node_interpreter, :default_page_size, :max_page_size) + class Builder < Support::MemoizableData.define(:runtime_metadata, :logger, :filter_interpreter, :filter_node_interpreter, :default_page_size, :max_page_size, :index_definitions_by_type_name) def routing_picker @routing_picker ||= RoutingPicker.new( filter_node_interpreter: filter_node_interpreter, @@ -356,6 +374,14 @@ def routing_picker ) end + def typename_filter + @typename_filter ||= Filtering::TypenameFilter.new( + filter_node_interpreter: filter_node_interpreter, + schema_names: runtime_metadata.schema_element_names, + known_type_names: index_definitions_by_type_name.keys + ) + end + def index_expression_builder @index_expression_builder ||= IndexExpressionBuilder.new( filter_node_interpreter: filter_node_interpreter, @@ -390,10 +416,12 @@ def new_query( DatastoreQuery.new( routing_picker: routing_picker, + typename_filter: typename_filter, index_expression_builder: index_expression_builder, logger: logger, schema_element_names: runtime_metadata.schema_element_names, initial_search_index_definitions: initial_search_index_definitions, + index_definitions_by_type_name: index_definitions_by_type_name, client_filters: client_filters.to_set, internal_filters: internal_filters.to_set, sort: sort, diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb index d03ad1d02..82200f1df 100644 --- a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb @@ -141,8 +141,8 @@ def +(other) # `Query::IndexExpressionBuilder` exists only for use by `Query` and is effectively private. private_constant :IndexExpressionBuilder - # Steep is complaining that it can't find some `Query` but they are not in this file... - # @dynamic shard_routing_values, effective_size, merge_with, search_index_expression, with, to_datastore_msearch_header_and_body + # Steep can't find implementations of these `DatastoreQuery` methods because they're defined in `datastore_query.rb`, not in this file. + # @dynamic shard_routing_values, effective_size, merge_with, search_index_expression, narrowed_search_index_definitions, with, to_datastore_msearch_header_and_body end end end diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/routing_picker.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/routing_picker.rb index 8cdc00d3b..8755e8655 100644 --- a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/routing_picker.rb @@ -53,8 +53,8 @@ def extract_eligible_routing_values(filter_hashes, routing_field_paths) # `Query::RoutingPicker` exists only for use by `Query` and is effectively private. private_constant :RoutingPicker - # Steep is complaining that it can't find some `Query` but they are not in this file... - # @dynamic shard_routing_values, effective_size, merge_with, search_index_expression, with, to_datastore_msearch_header_and_body + # Steep can't find implementations of these `DatastoreQuery` methods because they're defined in `datastore_query.rb`, not in this file. + # @dynamic shard_routing_values, effective_size, merge_with, search_index_expression, narrowed_search_index_definitions, with, to_datastore_msearch_header_and_body end end end diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/filtering/typename_filter.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/filtering/typename_filter.rb new file mode 100644 index 000000000..17f609204 --- /dev/null +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/filtering/typename_filter.rb @@ -0,0 +1,38 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/graphql/filtering/filter_value_set_extractor" + +module ElasticGraph + class GraphQL + module Filtering + # Responsible for extracting a constrained set of concrete type names from query filters, + # based on a `__typename` filter. + class TypenameFilter + def initialize(filter_node_interpreter:, schema_names:, known_type_names:) + @extractor = FilterValueSetExtractor.for_equality(filter_node_interpreter, schema_names) + @known_type_names = known_type_names + end + + # Returns the subset of `known_type_names` that satisfy any `__typename` filter in + # `filter_hashes`. Returns `nil` if the filters place no constraint on `__typename`, + # meaning all type names are potentially matched. + def filtered_type_names(filter_hashes) + typename_set = @extractor.extract_filter_value_set(filter_hashes, ["__typename"]) + return nil unless typename_set + + if typename_set.inclusive? + typename_set.values.to_a + else + @known_type_names - typename_set.values.to_a + end + end + end + end + end +end diff --git a/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_query.rbs b/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_query.rbs index 27a6f2628..1c3f08bbf 100644 --- a/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_query.rbs +++ b/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_query.rbs @@ -2,6 +2,7 @@ module ElasticGraph class GraphQL class DatastoreQuerySupertype attr_reader initial_search_index_definitions: ::Array[DatastoreCore::_IndexDefinition] + attr_reader index_definitions_by_type_name: ::Hash[::String, ::Array[DatastoreCore::_IndexDefinition]] attr_reader aggregations: ::Hash[::String, Aggregation::Query] attr_reader document_paginator: DatastoreQuery::DocumentPaginator attr_reader total_document_count_needed: bool @@ -9,6 +10,7 @@ module ElasticGraph # Note: this is a partial signature definition class DatastoreQuery < DatastoreQuerySupertype + def narrowed_search_index_definitions: () -> ::Array[DatastoreCore::_IndexDefinition] def shard_routing_values: () -> ::Array[::String]? def merge_with: (**untyped) -> DatastoreQuery def search_index_expression: () -> ::String @@ -21,6 +23,7 @@ module ElasticGraph def self.new: ( runtime_metadata: SchemaArtifacts::RuntimeMetadata::Schema, logger: ::Logger, + index_definitions_by_type_name: ::Hash[::String, ::Array[DatastoreCore::_IndexDefinition]], **untyped ) -> Builder diff --git a/elasticgraph-graphql/sig/elastic_graph/graphql/filtering/typename_filter.rbs b/elasticgraph-graphql/sig/elastic_graph/graphql/filtering/typename_filter.rbs new file mode 100644 index 000000000..b2f0b9742 --- /dev/null +++ b/elasticgraph-graphql/sig/elastic_graph/graphql/filtering/typename_filter.rbs @@ -0,0 +1,22 @@ +module ElasticGraph + class GraphQL + module Filtering + class TypenameFilter + def initialize: ( + filter_node_interpreter: FilterNodeInterpreter, + schema_names: SchemaArtifacts::RuntimeMetadata::SchemaElementNames, + known_type_names: ::Array[::String] + ) -> void + + def filtered_type_names: ( + ::Array[::Hash[::String, untyped]] + ) -> ::Array[::String]? + + private + + @extractor: FilterValueSetExtractor[EqualityValueSet] + @known_type_names: ::Array[::String] + end + end + end +end diff --git a/elasticgraph-graphql/spec/acceptance/search_spec.rb b/elasticgraph-graphql/spec/acceptance/search_spec.rb index 7c19ab980..efa9a3e40 100644 --- a/elasticgraph-graphql/spec/acceptance/search_spec.rb +++ b/elasticgraph-graphql/spec/acceptance/search_spec.rb @@ -701,6 +701,9 @@ module ElasticGraph it "correctly scopes results to the queried interface level across a multi-level type hierarchy", :expect_search_routing do established_on_asc = :"#{case_correctly("established_on")}_ASC" id_desc = :"#{case_correctly("id")}_DESC" + dc_index = index_definition_name_for("distribution_channels") + ps_index = index_definition_name_for("physical_stores") + both_indices = "#{dc_index},#{ps_index}" # The DistributionChannel hierarchy has two branches: # DistributionChannel (index: distribution_channels) @@ -738,26 +741,31 @@ module ElasticGraph expect(channels.map { |c| c["__typename"] }).to contain_exactly( *expected_store_typenames, "DirectWholesaler", "BrokerWholesaler" ) + expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices] # Querying at the Retail interface excludes wholesalers, even though they live in # the same distribution_channels index. retailers = list_retailers_with(*store_fragments) expect(retailers.map { |r| r["__typename"] }).to contain_exactly(*expected_store_typenames) + expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices] # Querying at the Store interface likewise excludes wholesalers. stores = list_stores_with(*store_fragments) expect(stores.map { |s| s["__typename"] }).to contain_exactly(*expected_store_typenames) + expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices] # Using `nodes` (instead of `edges { node }`) also works. This exercises the `nodes` # code path where the field type is list-wrapped (e.g. `[Store!]!`). stores_via_nodes = list_stores_via_nodes_with(*store_fragments) expect(stores_via_nodes.map { |s| s["__typename"] }).to contain_exactly(*expected_store_typenames) + expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices] # Filters apply within the correct scope at each level. # At distribution_channels: active=false matches only wholesaler2. inactive = list_distribution_channels_with(*all_channel_fragments, filter: {active: {equal_to_any_of: [false]}}) expect(inactive.map { |c| c["__typename"] }).to contain_exactly("BrokerWholesaler") expect(inactive.map { |c| c["id"] }).to contain_exactly(wholesaler2.fetch(:id)) + expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices] # At retailers: established_on filter applies, and wholesalers are still excluded. retailers_after_2020 = list_retailers_with(*store_fragments, filter: {established_on: {gte: "2020-01-01"}}) @@ -765,6 +773,7 @@ module ElasticGraph online_store1.fetch(:id), online_store2.fetch(:id), physical_store2.fetch(:id) ) + expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices] # Sort by established_on at the stores level spans both indices correctly. stores_sorted = list_stores_with(*store_fragments, order_by: [established_on_asc]) @@ -774,6 +783,7 @@ module ElasticGraph online_store2.fetch(:id), physical_store2.fetch(:id) ]) + expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices] # Pagination at the distribution_channels level covers all types. channels_page, page_info = list_distribution_channels_and_page_info_with( @@ -783,6 +793,7 @@ module ElasticGraph ) expect(channels_page.size).to eq(4) expect(page_info).to include(case_correctly("has_next_page") => true) + expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices] # Filter by ID spans indices and respects the __typename scope at each query level. stores_by_id = list_stores_with( @@ -793,11 +804,13 @@ module ElasticGraph [physical_store1.fetch(:id), "PhysicalStore"], [online_store1.fetch(:id), "OnlineStore"] ) + expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices] # Aggregations respect the same __typename scoping as document queries. store_agg_count = call_graphql_query("query { #{case_correctly("store_aggregations")} { nodes { #{case_correctly("count")} } } }") .dig("data", case_correctly("store_aggregations"), "nodes", 0, case_correctly("count")) expect(store_agg_count).to eq(expected_store_typenames.size) + expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices] # `_typename` filter allows querying by concrete subtype across multiple indexes and # branches of the type hierarchy. `DirectWholesaler` is in the shared `distribution_channels` @@ -808,6 +821,23 @@ module ElasticGraph filter: {typename_key => {equal_to_any_of: ["DirectWholesaler", "PhysicalStore"]}} ) expect(wholesaler_or_physical.map { |c| c["__typename"] }).to contain_exactly("DirectWholesaler", "PhysicalStore", "PhysicalStore") + expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices] + + # A __typename filter targeting only PhysicalStore narrows to just the physical_stores index. + physical_stores_only = list_distribution_channels_with( + *all_channel_fragments, + filter: {typename_key => {equal_to_any_of: ["PhysicalStore"]}} + ) + expect(physical_stores_only.map { |c| c["__typename"] }).to contain_exactly("PhysicalStore", "PhysicalStore") + expect(index_search_expressions_from_queries("main").last(1)).to eq [ps_index] + + # A __typename filter targeting both wholesaler types narrows to just the distribution_channels index. + wholesalers_only = list_distribution_channels_with( + *all_channel_fragments, + filter: {typename_key => {equal_to_any_of: ["DirectWholesaler", "BrokerWholesaler"]}} + ) + expect(wholesalers_only.map { |c| c["__typename"] }).to contain_exactly("DirectWholesaler", "BrokerWholesaler") + expect(index_search_expressions_from_queries("main").last(1)).to eq [dc_index] # `_typename` filter interacts correctly with automatic `__typename` scoping at a sub-interface level. # Filtering `retailers` to `OnlineStore OR PhysicalStore` returns all retailers (the full set), @@ -817,6 +847,7 @@ module ElasticGraph filter: {typename_key => {equal_to_any_of: ["OnlineStore", "PhysicalStore"]}} ) expect(all_retailers.map { |r| r["__typename"] }).to contain_exactly(*expected_store_typenames) + expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices] # `_typename` filter also works on aggregations, including across indexes. wholesaler_or_physical_agg_count = call_graphql_query(<<~QUERY) @@ -830,6 +861,7 @@ module ElasticGraph QUERY .dig("data", case_correctly("distribution_channel_aggregations"), "nodes", 0, case_correctly("count")) expect(wholesaler_or_physical_agg_count).to eq(wholesaler_or_physical.size) + expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices] # all_highlights resolves against the concrete type (OnlineStore), not the abstract root # (DistributionChannel). OnlineStore.name is absent from DistributionChannel — without @@ -842,6 +874,7 @@ module ElasticGraph {"path" => ["name"], "snippets" => ["Example Marketplace"]} ] }) + expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices] end it "supports querying a type that is both indexed (via interface inheritance) and embedded as a field on another type" do diff --git a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/merge_spec.rb b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/merge_spec.rb index 4696e1b64..6c39bdb00 100644 --- a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/merge_spec.rb +++ b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/merge_spec.rb @@ -18,25 +18,26 @@ class GraphQL include_context "DatastoreQueryUnitSupport" before(:context) do - # These are derived from app state and don't vary in two different queries for the same app, - # so we don't have to deal with merging them. - app_level_attributes = %i[ - logger filter_interpreter routing_picker index_expression_builder - default_page_size max_page_size schema_element_names - ] - - @attributes_needing_merge_test_coverage = (DatastoreQuery.members - app_level_attributes).to_set + @attributes_needing_merge_test_coverage = DatastoreQuery::Builder.instance_method(:new_query).parameters.map(&:last).to_set + @attributes_covered = ::Set.new end before(:example) do |ex| Array(ex.metadata[:covers]).each do |attribute| - @attributes_needing_merge_test_coverage.delete(attribute) + if @attributes_needing_merge_test_coverage.include?(attribute) + @attributes_covered << attribute + else + # :nocov: -- only executed when a test has a typo in its `covers:` metadata + raise "Attribute `#{attribute}` (from `covers: :#{attribute}`) does not appear to need coverage. Did you misspell it?" + # :nocov: + end end end after(:context) do - expect(@attributes_needing_merge_test_coverage).to be_empty, "`#merge` tests are expected to cover all attributes, " \ - "but the following do not appear to have coverage: #{@attributes_needing_merge_test_coverage}" + untested_attribute = @attributes_needing_merge_test_coverage - @attributes_covered + expect(untested_attribute).to be_empty, "`#merge` tests are expected to cover all attributes, " \ + "but the following do not appear to have coverage: #{untested_attribute}" end it "does not allow `initial_search_index_definitions` to be overridden", covers: :initial_search_index_definitions do diff --git a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/search_index_expression_spec.rb b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/search_index_expression_spec.rb index e7612a82b..79aff4868 100644 --- a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/search_index_expression_spec.rb +++ b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/search_index_expression_spec.rb @@ -548,7 +548,7 @@ def search_index_expression_parts_for(filter_or_filters, aggregations: {}) end index_def = graphql.datastore_core.index_definitions_by_name.fetch("widgets") - builder.new_query(initial_search_index_definitions: [index_def], aggregations: aggregations, **options).search_index_expression.split(",") + graphql.datastore_query_builder.new_query(initial_search_index_definitions: [index_def], aggregations: aggregations, **options).search_index_expression.split(",") end # The search index expression parts should be the same regardless of whether a client or internal filter is used.