@@ -24,7 +24,7 @@ struct BaseEdgeOpSpec {
2424 pub edge_anno : Option < EdgeAnnoSearchSpec > ,
2525 pub is_reflexive : bool ,
2626 pub op_str : Option < String > ,
27- pub check_cost_for_inverse_operator : bool ,
27+ pub inverse_operator_needs_cost_check : bool ,
2828}
2929
3030struct BaseEdgeOp {
@@ -45,34 +45,7 @@ impl BaseEdgeOp {
4545 gs. push ( gs_for_component) ;
4646 }
4747
48- let all_part_of_components = spec
49- . components
50- . iter ( )
51- . all ( |c| c. get_type ( ) == AnnotationComponentType :: PartOf ) ;
52-
53- let max_nodes_estimate = if all_part_of_components && gs. len ( ) == 1 {
54- // PartOf components have a very skewed distribution of root nodes
55- // vs. the actual possible targets, thus do not use all nodes as
56- // population but only the non-roots.
57- if let Some ( stats) = gs[ 0 ] . get_statistics ( ) {
58- stats. nodes - stats. root_nodes
59- } else {
60- // Fallback to guessing by using the node type
61- db. get_node_annos ( ) . guess_max_count (
62- Some ( & NODE_TYPE_KEY . ns ) ,
63- & NODE_TYPE_KEY . name ,
64- "corpus" ,
65- "datasource" ,
66- ) ?
67- }
68- } else {
69- db. get_node_annos ( ) . guess_max_count (
70- Some ( & NODE_TYPE_KEY . ns ) ,
71- & NODE_TYPE_KEY . name ,
72- "node" ,
73- "node" ,
74- ) ?
75- } ;
48+ let max_nodes_estimate = calculate_max_node_estimate ( db, & spec, & gs, false ) ?;
7649 Ok ( BaseEdgeOp {
7750 gs,
7851 spec,
@@ -82,6 +55,48 @@ impl BaseEdgeOp {
8255 }
8356}
8457
58+ fn calculate_max_node_estimate (
59+ db : & AnnotationGraph ,
60+ spec : & BaseEdgeOpSpec ,
61+ gs : & [ Arc < dyn GraphStorage > ] ,
62+ inverse : bool ,
63+ ) -> Result < usize > {
64+ let all_components_are_partof = spec
65+ . components
66+ . iter ( )
67+ . all ( |c| c. get_type ( ) == AnnotationComponentType :: PartOf ) ;
68+ let max_nodes_estimate = if all_components_are_partof && gs. len ( ) == 1 {
69+ // PartOf components have a very skewed distribution of root nodes vs.
70+ // the actual possible targets, thus do not use all nodes as population
71+ // but only the non-roots. We can only use this formula for the actual
72+ // @* operator, but not the inverted one.
73+ if !inverse && let Some ( stats) = gs[ 0 ] . get_statistics ( ) {
74+ stats. nodes - stats. root_nodes
75+ } else {
76+ // Fallback to guessing how many nodes have the node type "corpus"
77+ // or "datasource" and thus could be reachable as RHS in a worst case
78+ // scenario. Since a node can't be part of itself, subtract 1 for
79+ // the node on the LHS.
80+ db. get_node_annos ( )
81+ . guess_max_count (
82+ Some ( & NODE_TYPE_KEY . ns ) ,
83+ & NODE_TYPE_KEY . name ,
84+ "corpus" ,
85+ "datasource" ,
86+ ) ?
87+ . saturating_sub ( 1 )
88+ }
89+ } else {
90+ db. get_node_annos ( ) . guess_max_count (
91+ Some ( & NODE_TYPE_KEY . ns ) ,
92+ & NODE_TYPE_KEY . name ,
93+ "node" ,
94+ "node" ,
95+ ) ?
96+ } ;
97+ Ok ( max_nodes_estimate)
98+ }
99+
85100impl BinaryOperatorSpec for BaseEdgeOpSpec {
86101 fn necessary_components (
87102 & self ,
@@ -286,13 +301,15 @@ impl BinaryOperatorBase for BaseEdgeOp {
286301
287302 fn get_inverse_operator < ' a > (
288303 & self ,
289- _graph : & ' a AnnotationGraph ,
304+ graph : & ' a AnnotationGraph ,
290305 ) -> Result < Option < BinaryOperator < ' a > > > {
306+ let inverse = !self . inverse ;
307+
291308 // Check if all graph storages have the same inverse cost. If not, we
292309 // don't provide an inverse operator, because the plans would not
293310 // account for the different costs
294311 for g in & self . gs {
295- if self . spec . check_cost_for_inverse_operator && !g. inverse_has_same_cost ( ) {
312+ if self . spec . inverse_operator_needs_cost_check && !g. inverse_has_same_cost ( ) {
296313 return Ok ( None ) ;
297314 }
298315 if let Some ( stat) = g. get_statistics ( ) {
@@ -302,11 +319,12 @@ impl BinaryOperatorBase for BaseEdgeOp {
302319 }
303320 }
304321 }
322+ let max_nodes_estimate = calculate_max_node_estimate ( graph, & self . spec , & self . gs , inverse) ?;
305323 let edge_op = BaseEdgeOp {
306324 gs : self . gs . clone ( ) ,
307325 spec : self . spec . clone ( ) ,
308- max_nodes_estimate : self . max_nodes_estimate ,
309- inverse : ! self . inverse ,
326+ max_nodes_estimate,
327+ inverse,
310328 } ;
311329 Ok ( Some ( BinaryOperator :: Index ( Box :: new ( edge_op) ) ) )
312330 }
@@ -317,7 +335,11 @@ impl BinaryOperatorBase for BaseEdgeOp {
317335 return Ok ( EstimationType :: Selectivity ( 0.0 ) ) ;
318336 }
319337
320- let max_nodes: f64 = self . max_nodes_estimate as f64 ;
338+ let mut max_nodes: f64 = self . max_nodes_estimate as f64 ;
339+ // Avoid division by 0
340+ if max_nodes == 0.0 {
341+ max_nodes = 1.0 ;
342+ }
321343
322344 let mut worst_sel: f64 = 0.0 ;
323345
@@ -624,7 +646,7 @@ impl BinaryOperatorSpec for DominanceSpec {
624646 dist : self . dist . clone ( ) ,
625647 edge_anno : self . edge_anno . clone ( ) ,
626648 is_reflexive : true ,
627- check_cost_for_inverse_operator : true ,
649+ inverse_operator_needs_cost_check : true ,
628650 } ;
629651 base. create_operator ( db, cost_estimate)
630652 }
@@ -676,7 +698,7 @@ impl BinaryOperatorSpec for PointingSpec {
676698 edge_anno : self . edge_anno . clone ( ) ,
677699 is_reflexive : true ,
678700 op_str : Some ( op_str) ,
679- check_cost_for_inverse_operator : true ,
701+ inverse_operator_needs_cost_check : true ,
680702 } ;
681703 base. create_operator ( db, cost_estimate)
682704 }
@@ -721,7 +743,7 @@ impl BinaryOperatorSpec for PartOfSubCorpusSpec {
721743 ANNIS_NS . into( ) ,
722744 "" . into( ) ,
723745 ) ] ;
724- let check_cost_for_inverse_operator = if let Some ( ( _, rhs) ) = cost_estimate {
746+ let inverse_operator_needs_cost_check = if let Some ( ( _, rhs) ) = cost_estimate {
725747 // Only ignore different cost and risk a nested loop join if the RHS
726748 // has an estimated output size of 1 and thus a nested loop is not
727749 // as costly.
@@ -735,7 +757,7 @@ impl BinaryOperatorSpec for PartOfSubCorpusSpec {
735757 dist : self . dist . clone ( ) ,
736758 edge_anno : None ,
737759 is_reflexive : false ,
738- check_cost_for_inverse_operator ,
760+ inverse_operator_needs_cost_check ,
739761 } ;
740762
741763 base. create_operator ( db, cost_estimate)
@@ -751,3 +773,6 @@ impl BinaryOperatorSpec for PartOfSubCorpusSpec {
751773 self
752774 }
753775}
776+
777+ #[ cfg( test) ]
778+ mod tests;
0 commit comments