Skip to content

Commit 1a6a11f

Browse files
committed
assume all annotation can be matched in regex search without a prefix
1 parent 973b6ab commit 1a6a11f

6 files changed

Lines changed: 24 additions & 16 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ exist.
2727

2828
- Less frequent corpus cache status updates in log. Before, every corpus access
2929
could trigger an entry into the log which is not desired under heavy load.
30+
- Improve query execution planning by assuming all annotations can be matched in
31+
regular expressions without a prefix.
3032

3133
## [3.7.1] - 2025-04-14
3234

core/src/annostorage/inmemory.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,10 @@ where
752752
fn guess_max_count_regex(&self, ns: Option<&str>, name: &str, pattern: &str) -> Result<usize> {
753753
let full_match_pattern = util::regex_full_match(pattern);
754754

755+
// Get the total number of annotations with the namespace/name. We
756+
// can't get larger than this number
757+
let total = self.number_of_annotations_by_name(ns, name)?;
758+
755759
// Try to parse the regular expression
756760
let parsed = regex_syntax::Parser::new().parse(&full_match_pattern);
757761
if let Ok(parsed) = parsed {
@@ -770,11 +774,10 @@ where
770774
guessed_count += self.guess_max_count(ns, name, lower_val, &upper_val)?;
771775
}
772776
}
777+
} else {
778+
guessed_count = total;
773779
}
774780

775-
// Get the total number of annotations with the namespace/name. We
776-
// can't get larger than this number
777-
let total = self.number_of_annotations_by_name(ns, name)?;
778781
Ok(guessed_count.min(total))
779782
} else {
780783
Ok(0)

core/src/annostorage/ondisk.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -895,6 +895,10 @@ where
895895
fn guess_max_count_regex(&self, ns: Option<&str>, name: &str, pattern: &str) -> Result<usize> {
896896
let full_match_pattern = util::regex_full_match(pattern);
897897

898+
// Get the total number of annotations with the namespace/name. We
899+
// can't get larger than this number
900+
let total = self.number_of_annotations_by_name(ns, name)?;
901+
898902
// Try to parse the regular expression
899903
let parsed = Parser::new().parse(&full_match_pattern);
900904
if let Ok(parsed) = parsed {
@@ -913,11 +917,10 @@ where
913917
guessed_count += self.guess_max_count(ns, name, lower_val, &upper_val)?;
914918
}
915919
}
920+
} else {
921+
guessed_count = total;
916922
}
917923

918-
// Get the total number of annotations with the namespace/name. We
919-
// can't get larger than this number
920-
let total = self.number_of_annotations_by_name(ns, name)?;
921924
Ok(guessed_count.min(total))
922925
} else {
923926
Ok(0)

graphannis/src/annis/db/aql/conjunction.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ fn get_cost_estimates<'a>(
119119
}
120120
}
121121

122+
/// Returns true if it is estimated to switch the operands in a join.
122123
fn should_switch_operand_order(
123124
op_spec: &BinaryOperatorSpecEntry,
124125
node2cost: &BTreeMap<usize, CostEstimate>,

graphannis/src/annis/db/exec/mod.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ pub struct CostEstimate {
1616
pub output: usize,
1717
/// Sum of all processed tuples including the ones of the sub-steps.
1818
pub intermediate_sum: usize,
19-
/// The estimated number of tuples that are processed in a join in this
20-
/// execution step.
19+
/// Simplistic estimated number of tuples that are processed in a join.
2120
pub processed_in_step: usize,
2221
}
2322

@@ -58,13 +57,13 @@ impl ExecutionNodeDesc {
5857
pub fn empty_with_fragment(
5958
node_nr: usize,
6059
query_fragment: String,
61-
est_size: Option<usize>,
60+
estimated_output: usize,
6261
) -> ExecutionNodeDesc {
6362
let mut node_pos = BTreeMap::new();
6463
node_pos.insert(node_nr, 0);
6564

66-
let cost = est_size.map(|output| CostEstimate {
67-
output,
65+
let cost = Some(CostEstimate {
66+
output: estimated_output,
6867
intermediate_sum: 0,
6968
processed_in_step: 0,
7069
});

graphannis/src/annis/db/exec/nodesearch.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,7 @@ impl<'a> NodeSearch<'a> {
569569
desc: Some(ExecutionNodeDesc::empty_with_fragment(
570570
common_args.node_nr,
571571
common_args.query_fragment,
572-
Some(est_output),
572+
est_output,
573573
)),
574574
node_search_desc: Arc::new(NodeSearchDesc {
575575
qname: (
@@ -676,7 +676,7 @@ impl<'a> NodeSearch<'a> {
676676
desc: Some(ExecutionNodeDesc::empty_with_fragment(
677677
common_args.node_nr,
678678
common_args.query_fragment.clone(),
679-
Some(est_output),
679+
est_output,
680680
)),
681681
node_search_desc: Arc::new(NodeSearchDesc {
682682
qname: (qname.0, Some(qname.1)),
@@ -772,7 +772,7 @@ impl<'a> NodeSearch<'a> {
772772
desc: Some(ExecutionNodeDesc::empty_with_fragment(
773773
common_args.node_nr,
774774
common_args.query_fragment,
775-
Some(est_output),
775+
est_output,
776776
)),
777777
node_search_desc: Arc::new(NodeSearchDesc {
778778
qname: (qname.0, Some(qname.1)),
@@ -920,7 +920,7 @@ impl<'a> NodeSearch<'a> {
920920
desc: Some(ExecutionNodeDesc::empty_with_fragment(
921921
common_args.node_nr,
922922
common_args.query_fragment.clone(),
923-
Some(est_output),
923+
est_output,
924924
)),
925925
node_search_desc: Arc::new(NodeSearchDesc {
926926
qname: (
@@ -978,7 +978,7 @@ impl<'a> NodeSearch<'a> {
978978
desc: Some(ExecutionNodeDesc::empty_with_fragment(
979979
common_args.node_nr,
980980
common_args.query_fragment.clone(),
981-
Some(est_output),
981+
est_output,
982982
)),
983983
node_search_desc: Arc::new(NodeSearchDesc {
984984
qname: (

0 commit comments

Comments
 (0)