diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/routing_picker.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/routing_picker.rb index 5910dd234..8cdc00d3b 100644 --- a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/routing_picker.rb @@ -14,20 +14,7 @@ class DatastoreQuery # Responsible for picking routing values for a specific query based on the filters. class RoutingPicker def initialize(filter_node_interpreter:, schema_names:) - all_values_set = RoutingValueSet::ALL - empty_set = RoutingValueSet::EMPTY - - @filter_value_set_extractor = Filtering::FilterValueSetExtractor.new( - filter_node_interpreter, - schema_names, - all_values_set, - empty_set - ) do |operator, filter_value| - if operator == :equal_to_any_of - # This calls `.compact` to remove `nil` filter_value values - RoutingValueSet.of(filter_value.compact) - end - end + @filter_value_set_extractor = Filtering::FilterValueSetExtractor.for_equality(filter_node_interpreter, schema_names) end # Given a list of `filter_hashes` and a list of `routing_field_paths`, returns a list of @@ -53,119 +40,18 @@ def initialize(filter_node_interpreter:, schema_names:) # end # ``` def extract_eligible_routing_values(filter_hashes, routing_field_paths) - @filter_value_set_extractor.extract_filter_value_set(filter_hashes, routing_field_paths)&.to_return_value - end - end - - class RoutingValueSet < Data.define(:type, :routing_values) - def self.of(routing_values) - new(:inclusive, routing_values.to_set) - end - - def self.of_all_except(routing_values) - new(:exclusive, routing_values.to_set) - end - - ALL = of_all_except([]) - EMPTY = of([]) - - def intersection(other) - if inclusive? && other.inclusive? - # Since both sets are inclusive, we can just delegate to `Set#intersection` here. - RoutingValueSet.of(routing_values.intersection(other.routing_values)) - elsif exclusive? && other.exclusive? - # Since both sets are exclusive, we need to return an exclusive set of the union of the - # excluded values. For example, when dealing with positive integers: - # - # s1 = RoutingValueSet.of_all_except([1, 2, 3]) # > 3 - # s2 = RoutingValueSet.of_all_except([3, 4, 5]) # 1, 2, > 5 - # - # s3 = s1.intersection(s2) - # - # Here s3 would be all values > 5 (the same as `RoutingValueSet.of_all_except([1, 2, 3, 4, 5])`) - RoutingValueSet.of_all_except(routing_values.union(other.routing_values)) - else - # Since one set is inclusive and one set is exclusive, we need to return an inclusive set of - # `included_values - excluded_values`. For example, when dealing with positive integers: - # - # s1 = RoutingValueSet.of([1, 2, 3]) # 1, 2, 3 - # s2 = RoutingValueSet.of_all_except([3, 4, 5]) # 1, 2, > 5 - # - # s3 = s1.intersection(s2) - # - # Here s3 would be just `1, 2`. - included_values, excluded_values = get_included_and_excluded_values(other) - RoutingValueSet.of(included_values - excluded_values) - end - end - - def union(other) - if inclusive? && other.inclusive? - # Since both sets are inclusive, we can just delegate to `Set#union` here. - RoutingValueSet.of(routing_values.union(other.routing_values)) - elsif exclusive? && other.exclusive? - # Since both sets are exclusive, we need to return an exclusive set of the intersection of the - # excluded values. For example, when dealing with positive integers: - # - # s1 = RoutingValueSet.of_all_except([1, 2, 3]) # > 3 - # s2 = RoutingValueSet.of_all_except([3, 4, 5]) # 1, 2, > 5 - # - # s3 = s1.union(s2) - # - # Here s3 would be all 1, 2, > 3 (the same as `RoutingValueSet.of_all_except([3])`) - RoutingValueSet.of_all_except(routing_values.intersection(other.routing_values)) - else - # Since one set is inclusive and one set is exclusive, we need to return an exclusive set of - # `excluded_values - included_values`. For example, when dealing with positive integers: - # - # s1 = RoutingValueSet.of([1, 2, 3]) # 1, 2, 3 - # s2 = RoutingValueSet.of_all_except([3, 4, 5]) # 1, 2, > 5 - # - # s3 = s1.union(s2) - # - # Here s3 would be 1, 2, 3, > 5 (the same as `RoutingValueSet.of_all_except([4, 5])`) - included_values, excluded_values = get_included_and_excluded_values(other) - RoutingValueSet.of_all_except(excluded_values - included_values) - end - end - - def negate - with(type: INVERTED_TYPES.fetch(type)) - end - - INVERTED_TYPES = {inclusive: :exclusive, exclusive: :inclusive} - - def to_return_value + result = @filter_value_set_extractor.extract_filter_value_set(filter_hashes, routing_field_paths) # Elasticsearch/OpenSearch have no routing value syntax to tell it to avoid searching a specific shard # (and the fact that we are excluding a routing value doesn't mean that other documents that # live on the same shard with different routing values can't match!) so we return `nil` to # force the datastore to search all shards. - return nil if exclusive? - - routing_values.to_a - end - - protected - - def inclusive? - type == :inclusive - end - - def exclusive? - type == :exclusive - end - - private - - def get_included_and_excluded_values(other) - inclusive? ? [routing_values, other.routing_values] : [other.routing_values, routing_values] + return nil if result.nil? || result.exclusive? + result.values.to_a end end # `Query::RoutingPicker` exists only for use by `Query` and is effectively private. private_constant :RoutingPicker - # `RoutingValueSet` exists only for use here and is effectively private. - private_constant :RoutingValueSet # Steep is complaining that it can't find some `Query` but they are not in this file... # @dynamic shard_routing_values, effective_size, merge_with, search_index_expression, with, to_datastore_msearch_header_and_body diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/filtering/equality_value_set.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/filtering/equality_value_set.rb new file mode 100644 index 000000000..149c3a95c --- /dev/null +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/filtering/equality_value_set.rb @@ -0,0 +1,113 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + class GraphQL + module Filtering + # A set that can represent either a specific list of values or all values except a specific + # list, with support for common set operations (union, intersection, negation). In contrast + # to other set implementations that work with `FilterValueSetExtractor`, only only works with + # `equal_to_any_of` filtering (hence the `EqualityValueSet` name). + class EqualityValueSet < Data.define(:type, :values) + # `Data.define` provides the following methods: + # @dynamic initialize, type, values, with + + def self.of(values) + new(:inclusive, values.to_set) + end + + def self.of_all_except(values) + new(:exclusive, values.to_set) + end + + ALL = of_all_except([]) + EMPTY = of([]) + + def intersection(other) + if inclusive? && other.inclusive? + # Since both sets are inclusive, we can just delegate to `Set#intersection` here. + EqualityValueSet.of(values.intersection(other.values)) + elsif exclusive? && other.exclusive? + # Since both sets are exclusive, we need to return an exclusive set of the union of the + # excluded values. For example, when dealing with positive integers: + # + # s1 = EqualityValueSet.of_all_except([1, 2, 3]) # > 3 + # s2 = EqualityValueSet.of_all_except([3, 4, 5]) # 1, 2, > 5 + # + # s3 = s1.intersection(s2) + # + # Here s3 would be all values > 5 (the same as `EqualityValueSet.of_all_except([1, 2, 3, 4, 5])`) + EqualityValueSet.of_all_except(values.union(other.values)) + else + # Since one set is inclusive and one set is exclusive, we need to return an inclusive set of + # `included_values - excluded_values`. For example, when dealing with positive integers: + # + # s1 = EqualityValueSet.of([1, 2, 3]) # 1, 2, 3 + # s2 = EqualityValueSet.of_all_except([3, 4, 5]) # 1, 2, > 5 + # + # s3 = s1.intersection(s2) + # + # Here s3 would be just `1, 2`. + included_values, excluded_values = get_included_and_excluded_values(other) + EqualityValueSet.of(included_values - excluded_values) + end + end + + def union(other) + if inclusive? && other.inclusive? + # Since both sets are inclusive, we can just delegate to `Set#union` here. + EqualityValueSet.of(values.union(other.values)) + elsif exclusive? && other.exclusive? + # Since both sets are exclusive, we need to return an exclusive set of the intersection of the + # excluded values. For example, when dealing with positive integers: + # + # s1 = EqualityValueSet.of_all_except([1, 2, 3]) # > 3 + # s2 = EqualityValueSet.of_all_except([3, 4, 5]) # 1, 2, > 5 + # + # s3 = s1.union(s2) + # + # Here s3 would be all 1, 2, > 3 (the same as `EqualityValueSet.of_all_except([3])`) + EqualityValueSet.of_all_except(values.intersection(other.values)) + else + # Since one set is inclusive and one set is exclusive, we need to return an exclusive set of + # `excluded_values - included_values`. For example, when dealing with positive integers: + # + # s1 = EqualityValueSet.of([1, 2, 3]) # 1, 2, 3 + # s2 = EqualityValueSet.of_all_except([3, 4, 5]) # 1, 2, > 5 + # + # s3 = s1.union(s2) + # + # Here s3 would be 1, 2, 3, > 5 (the same as `EqualityValueSet.of_all_except([4, 5])`) + included_values, excluded_values = get_included_and_excluded_values(other) + EqualityValueSet.of_all_except(excluded_values - included_values) + end + end + + def negate + with(type: INVERTED_TYPES.fetch(type)) + end + + INVERTED_TYPES = {inclusive: :exclusive, exclusive: :inclusive} + + def inclusive? + type == :inclusive + end + + def exclusive? + type == :exclusive + end + + private + + def get_included_and_excluded_values(other) + inclusive? ? [values, other.values] : [other.values, values] + end + end + end + end +end diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb index 8d91a76db..d2312e99e 100644 --- a/elasticgraph-graphql/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb @@ -6,12 +6,25 @@ # # frozen_string_literal: true +require "elastic_graph/graphql/filtering/equality_value_set" + module ElasticGraph class GraphQL module Filtering # Responsible for extracting a set of values from query filters, based on a using a custom # set type that is able to efficiently model the "all values" case. class FilterValueSetExtractor + # Factory method for building a `FilterValueSetExtractor` that uses `EqualityValueSet` to + # extract the set of values matched by `equal_to_any_of` filters on the target fields. + def self.for_equality(filter_node_interpreter, schema_names) + new(filter_node_interpreter, schema_names, EqualityValueSet::ALL, EqualityValueSet::EMPTY) do |operator, filter_value| + if operator == :equal_to_any_of + # This calls `.compact` to remove `nil` filter_value values + EqualityValueSet.of(filter_value.compact) + end + end + end + def initialize(filter_node_interpreter, schema_names, all_values_set, empty_set, &build_set_for_filter) @filter_node_interpreter = filter_node_interpreter @schema_names = schema_names diff --git a/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_query/routing_picker.rbs b/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_query/routing_picker.rbs index 2a3807148..f718510f9 100644 --- a/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_query/routing_picker.rbs +++ b/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_query/routing_picker.rbs @@ -14,49 +14,10 @@ module ElasticGraph private - @filter_value_set_extractor: Filtering::FilterValueSetExtractor[RoutingValueSet] + @filter_value_set_extractor: Filtering::FilterValueSetExtractor[Filtering::EqualityValueSet] end type routingValue = untyped - - type routingValueSetType = :inclusive | :exclusive - - class RoutingValueSetSupertype - attr_reader type: routingValueSetType - attr_reader routing_values: ::Set[routingValue] - - def initialize: (routingValueSetType, ::Set[routingValue]) -> void - def self.with: ( - type: routingValueSetType, - routing_values: ::Set[routingValue] - ) -> RoutingValueSet - - def with: ( - ?type: routingValueSetType, - ?routing_values: ::Set[routingValue] - ) -> RoutingValueSet - end - - class RoutingValueSet < RoutingValueSetSupertype - include Support::_NegatableSet[RoutingValueSet] - def self.of: (::Enumerable[routingValue]) -> RoutingValueSet - def self.of_all_except: (::Enumerable[routingValue]) -> RoutingValueSet - - ALL: RoutingValueSet - EMPTY: RoutingValueSet - INVERTED_TYPES: ::Hash[routingValueSetType, routingValueSetType] - - def to_return_value: () -> ::Array[routingValue]? - - def inclusive?: () -> bool - def exclusive?: () -> bool - - private - - def get_included_and_excluded_values: ( - RoutingValueSet - ) -> [::Set[routingValue], ::Set[routingValue]] - end end end end diff --git a/elasticgraph-graphql/sig/elastic_graph/graphql/filtering/equality_value_set.rbs b/elasticgraph-graphql/sig/elastic_graph/graphql/filtering/equality_value_set.rbs new file mode 100644 index 000000000..106049c08 --- /dev/null +++ b/elasticgraph-graphql/sig/elastic_graph/graphql/filtering/equality_value_set.rbs @@ -0,0 +1,39 @@ +module ElasticGraph + class GraphQL + module Filtering + type equalityValueSetType = :inclusive | :exclusive + + class EqualityValueSet + include Support::_NegatableSet[EqualityValueSet] + + attr_reader type: equalityValueSetType + attr_reader values: ::Set[untyped] + + def initialize: (equalityValueSetType, ::Set[untyped]) -> void + + def self.new: + (equalityValueSetType, ::Set[untyped]) -> instance + | (type: equalityValueSetType, values: ::Set[untyped]) -> instance + + def with: ( + ?type: equalityValueSetType, + ?values: ::Set[untyped] + ) -> EqualityValueSet + + def self.of: (::Enumerable[untyped]) -> EqualityValueSet + def self.of_all_except: (::Enumerable[untyped]) -> EqualityValueSet + + ALL: EqualityValueSet + EMPTY: EqualityValueSet + INVERTED_TYPES: ::Hash[equalityValueSetType, equalityValueSetType] + + def inclusive?: () -> bool + def exclusive?: () -> bool + + private + + def get_included_and_excluded_values: (EqualityValueSet) -> [::Set[untyped], ::Set[untyped]] + end + end + end +end diff --git a/elasticgraph-graphql/sig/elastic_graph/graphql/filtering/filter_value_set_extractor.rbs b/elasticgraph-graphql/sig/elastic_graph/graphql/filtering/filter_value_set_extractor.rbs index fd036d296..96ca39ab0 100644 --- a/elasticgraph-graphql/sig/elastic_graph/graphql/filtering/filter_value_set_extractor.rbs +++ b/elasticgraph-graphql/sig/elastic_graph/graphql/filtering/filter_value_set_extractor.rbs @@ -4,6 +4,11 @@ module ElasticGraph class FilterValueSetExtractor[S < Support::_NegatableSet[S]] type setType[out S] = S | singleton(UnboundedSetWithExclusions) + def self.for_equality: ( + FilterNodeInterpreter, + SchemaArtifacts::RuntimeMetadata::SchemaElementNames + ) -> FilterValueSetExtractor[EqualityValueSet] + def initialize: ( FilterNodeInterpreter, SchemaArtifacts::RuntimeMetadata::SchemaElementNames,