Skip to content

Commit 6576872

Browse files
authored
Merge pull request #299 from korpling/feature/optional-node-check-tests
Add fix for handling of optional nodes that are not at the end of the query
2 parents cf153d5 + f87bd0d commit 6576872

8 files changed

Lines changed: 116 additions & 52 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
55

66
## [Unreleased]
77

8+
### Fixed
9+
10+
- When optional nodes where located not at the end but somewhere in between the
11+
query, the output of the `find` query could include the wrong node ID.
12+
813
## [3.3.0] - 2024-05-27
914

1015
### Changed

graphannis/src/annis/db/aql/conjunction.rs

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -292,14 +292,15 @@ impl Conjunction {
292292
} else {
293293
(idx + 1).to_string()
294294
};
295+
self.variables.insert(variable.clone(), self.nodes.len());
296+
295297
self.nodes.push(NodeSearchSpecEntry {
296298
var: variable.clone(),
297299
spec: node,
298300
optional,
299301
location: location.clone(),
300302
});
301303

302-
self.variables.insert(variable.clone(), idx);
303304
if included_in_output && !optional {
304305
self.include_in_output.insert(variable.clone());
305306
}
@@ -315,9 +316,11 @@ impl Conjunction {
315316
var: &str,
316317
location: Option<LineColumnRange>,
317318
) -> Result<()> {
318-
if let Some(idx) = self.variables.get(var) {
319-
self.unary_operators
320-
.push(UnaryOperatorSpecEntry { op, idx: *idx });
319+
if let Some(local_idx) = self.variables.get(var) {
320+
self.unary_operators.push(UnaryOperatorSpecEntry {
321+
op,
322+
idx: *local_idx,
323+
});
321324
Ok(())
322325
} else {
323326
Err(GraphAnnisError::AQLSemanticError(AQLError {
@@ -370,34 +373,28 @@ impl Conjunction {
370373
location: Option<LineColumnRange>,
371374
) -> Result<usize> {
372375
if let Some(pos) = self.variables.get(variable) {
373-
return Ok(*pos);
376+
return Ok(pos + self.var_idx_offset);
374377
}
375378
Err(GraphAnnisError::AQLSemanticError(AQLError {
376379
desc: format!("Operand \"#{}\" not found", variable),
377380
location,
378381
}))
379382
}
380383

384+
/// Return true if the given query node variable is supposed to be included
385+
/// in a match output. This is not the case if it is an optional node or a
386+
/// legacy `meta::` query reference.
381387
pub fn is_included_in_output(&self, variable: &str) -> bool {
382388
self.include_in_output.contains(variable)
383389
}
384390

385-
/// Return the variable name for a given position in the match output list.
386-
///
387-
/// Optional nodes that are not part of the output are ignored. If there are
388-
/// no optional nodes, this corresponds to the index of the node in the
389-
/// query.
390-
pub fn get_variable_by_pos(&self, pos: usize) -> Option<String> {
391-
let mut output_pos = 0;
392-
for n in self.nodes.iter() {
393-
if self.is_included_in_output(&n.var) {
394-
if output_pos == pos {
395-
return Some(n.var.clone());
396-
}
397-
output_pos += 1;
398-
}
399-
}
400-
None
391+
/// Return the variable name for a node number. The node number is the
392+
/// position of an node description in the query. In case of a query with
393+
/// multiple alternatives, this refers to the node number of the whole query and
394+
/// not only the conjunction.
395+
pub fn get_variable_by_node_nr(&self, node_nr: usize) -> Option<String> {
396+
let pos = node_nr.checked_sub(self.var_idx_offset)?;
397+
self.nodes.get(pos).map(|spec| spec.var.clone())
401398
}
402399

403400
pub fn resolve_variable(
@@ -725,7 +722,7 @@ impl Conjunction {
725722
.ok_or(GraphAnnisError::NoComponentForNode(spec_idx_right + 1))?);
726723

727724
// get the original execution node
728-
let exec_left: Box<dyn ExecutionNode<Item = Result<MatchGroup>> + 'a> = component2exec
725+
let exec_left = component2exec
729726
.remove(&component_left)
730727
.ok_or(GraphAnnisError::NoExecutionNode(component_left))?;
731728

@@ -821,7 +818,7 @@ impl Conjunction {
821818

822819
// 2. add unary operators as filter to the existing node search
823820
for op_spec_entry in self.unary_operators.iter() {
824-
let child_exec: Box<dyn ExecutionNode<Item = Result<MatchGroup>> + 'a> = component2exec
821+
let child_exec = component2exec
825822
.remove(&op_spec_entry.idx)
826823
.ok_or(GraphAnnisError::NoExecutionNode(op_spec_entry.idx))?;
827824

graphannis/src/annis/db/aql/disjunction.rs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,23 @@ impl Disjunction {
3636
None
3737
}
3838

39-
/// Return the variable name for a given position in the match output list.
40-
///
41-
/// Optional nodes that are not part of the output are ignored. If there are
42-
/// no optional nodes, this corresponds to the index of the node in the
43-
/// query.
44-
pub(crate) fn get_variable_by_pos(&self, pos: usize) -> Option<String> {
39+
/// Return the variable name for a node number. The node number is the
40+
/// position of an AQL query node in the disjunction.
41+
pub(crate) fn get_variable_by_node_nr(&self, node_nr: usize) -> Option<String> {
4542
for alt in &self.alternatives {
46-
if let Some(var) = alt.get_variable_by_pos(pos) {
43+
if let Some(var) = alt.get_variable_by_node_nr(node_nr) {
4744
return Some(var);
4845
}
4946
}
5047
None
5148
}
49+
50+
pub(crate) fn is_included_in_output(&self, variable: &str) -> bool {
51+
for alt in &self.alternatives {
52+
if alt.is_included_in_output(variable) {
53+
return true;
54+
}
55+
}
56+
false
57+
}
5258
}

graphannis/src/annis/db/aql/mod.rs

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -359,31 +359,30 @@ fn add_node_specs_by_start(
359359
fn add_legacy_metadata_constraints(
360360
q: &mut Conjunction,
361361
legacy_meta_search: Vec<(NodeSearchSpec, ast::Pos)>,
362-
first_node_pos: Option<String>,
362+
first_node_var: Option<String>,
363363
) -> Result<()> {
364364
{
365-
let mut first_meta_idx: Option<String> = None;
366-
// TODO: add warning to the user not to use this construct anymore
365+
let mut first_meta_var: Option<String> = None;
367366
for (spec, _pos) in legacy_meta_search {
368367
// add an artificial node that describes the document/corpus node
369-
let meta_node_idx = q.add_node_from_query(spec, None, None, false, false);
370-
if let Some(first_meta_idx) = first_meta_idx.clone() {
368+
let meta_node_var = q.add_node_from_query(spec, None, None, false, false);
369+
if let Some(first_meta_var) = first_meta_var.clone() {
371370
// avoid nested loops by joining additional meta nodes with a "identical node"
372371
q.add_operator(
373372
Arc::new(IdenticalNodeSpec {}),
374-
&first_meta_idx,
375-
&meta_node_idx,
373+
&first_meta_var,
374+
&meta_node_var,
376375
true,
377376
)?;
378-
} else if let Some(first_node_pos) = first_node_pos.clone() {
379-
first_meta_idx = Some(meta_node_idx.clone());
377+
} else if let Some(first_node_var) = first_node_var.clone() {
378+
first_meta_var = Some(meta_node_var.clone());
380379
// add a special join to the first node of the query
381380
q.add_operator(
382381
Arc::new(PartOfSubCorpusSpec {
383382
dist: RangeSpec::Unbound,
384383
}),
385-
&first_node_pos,
386-
&meta_node_idx,
384+
&first_node_var,
385+
&meta_node_var,
387386
true,
388387
)?;
389388
// Also make sure the matched node is actually a document
@@ -402,7 +401,7 @@ fn add_legacy_metadata_constraints(
402401
);
403402
q.add_operator(
404403
Arc::new(IdenticalNodeSpec {}),
405-
&meta_node_idx,
404+
&meta_node_var,
406405
&doc_anno_idx,
407406
true,
408407
)?;
@@ -501,12 +500,12 @@ pub fn parse(query_as_aql: &str, quirks_mode: bool) -> Result<Disjunction> {
501500
let mut mapped = map_conjunction(c, &offsets, var_idx_offset, quirks_mode)?;
502501

503502
if quirks_mode {
504-
// apply the meta constraints from all conjunctions to conjunctions
505-
let first_node_pos = mapped.get_variable_by_pos(0);
503+
// apply the meta constraints to first node of all conjunctions
504+
let first_node_var = mapped.get_variable_by_node_nr(0);
506505
add_legacy_metadata_constraints(
507506
&mut mapped,
508507
legacy_meta_search.clone(),
509-
first_node_pos,
508+
first_node_var,
510509
)?;
511510
}
512511
var_idx_offset += mapped.num_of_nodes();

graphannis/src/annis/db/corpusstorage.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1908,14 +1908,20 @@ impl CorpusStorage {
19081908
let m = m?;
19091909
let mut match_desc = String::new();
19101910

1911-
for (i, singlematch) in m.iter().enumerate() {
1911+
let mut any_nodes_added = false;
1912+
1913+
for (node_nr, singlematch) in m.iter().enumerate() {
19121914
// check if query node actually should be included
1913-
let include_in_output = prep.query.get_variable_by_pos(i).is_some();
1915+
let include_in_output = prep
1916+
.query
1917+
.get_variable_by_node_nr(node_nr)
1918+
.map_or(false, |var| prep.query.is_included_in_output(&var));
19141919

19151920
if include_in_output {
1916-
if i > 0 {
1921+
if any_nodes_added {
19171922
match_desc.push(' ');
19181923
}
1924+
any_nodes_added = true;
19191925

19201926
let singlematch_anno_key = &singlematch.anno_key;
19211927
if singlematch_anno_key.ns != ANNIS_NS || singlematch_anno_key.name != NODE_TYPE

graphannis/src/annis/db/plan.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,11 @@ impl<'a> ExecutionPlan<'a> {
9191
})
9292
}
9393

94+
/// Re-orders the match vector from the top execution node to match the
95+
/// requested query node order. If query nodes are not part of the result,
96+
/// they are still included in the vector but you can not use the node ID at
97+
/// this position.
9498
fn reorder_match(&self, tmp: MatchGroup) -> MatchGroup {
95-
if tmp.len() <= 1 {
96-
// nothing to reorder
97-
return tmp;
98-
}
9999
if let Some(ref inverse_node_pos) = self.inverse_node_pos[self.current_plan] {
100100
// re-order the matched nodes by the original node position of the query
101101
let mut result = MatchGroup::new();

graphannis/tests/searchtest.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,54 @@ fn meta_node_output_standard() {
267267
);
268268
}
269269

270+
#[ignore]
271+
#[test]
272+
fn exclude_optional_node_in_between() {
273+
let cs = CORPUS_STORAGE.as_ref().unwrap();
274+
275+
let q = SearchQuery {
276+
corpus_names: &["GUM"],
277+
query: r#"entity="person" !_o_ q? & infstat="giv" & #1 _r_ #3"#,
278+
query_language: QueryLanguage::AQL,
279+
timeout: None,
280+
};
281+
let result = cs
282+
.find(
283+
q.clone(),
284+
0,
285+
Some(1),
286+
graphannis::corpusstorage::ResultOrder::Normal,
287+
)
288+
.unwrap();
289+
290+
// Only node #1 and #3 should be part of the output
291+
assert_eq!(vec!["ref::entity::GUM/GUM_interview_ants#referent_291 ref::infstat::GUM/GUM_interview_ants#referent_321"], result);
292+
}
293+
294+
#[ignore]
295+
#[test]
296+
fn exclude_optional_node_at_end() {
297+
let cs = CORPUS_STORAGE.as_ref().unwrap();
298+
299+
let q = SearchQuery {
300+
corpus_names: &["GUM"],
301+
query: r#"entity="person" _r_ infstat="giv" & q? & #1 !_o_ #3"#,
302+
query_language: QueryLanguage::AQL,
303+
timeout: None,
304+
};
305+
let result = cs
306+
.find(
307+
q.clone(),
308+
0,
309+
Some(1),
310+
graphannis::corpusstorage::ResultOrder::Normal,
311+
)
312+
.unwrap();
313+
314+
// Only node #1 and #2 should be part of the output
315+
assert_eq!(vec!["ref::entity::GUM/GUM_interview_ants#referent_291 ref::infstat::GUM/GUM_interview_ants#referent_321"], result);
316+
}
317+
270318
#[ignore]
271319
#[test]
272320
fn token_search_loads_components_for_leaf_filter() {

graphannis/tests/searchtest_queries.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ NonExistingRight,"quote & /""/? & #1 !_r_ #2",GUM,19
4747
NonExistingRightInv,"quote & /""/? & #2 !_r_ #1",GUM,19
4848
InvalidNonExisting,"tok !. tok? !_o_ node?",GUM,0
4949
NotDocument,"tok @* doc!=""maz-11299""",pcc2.1,34709
50+
OptionalQueryNodeAtEnd,"entity=""person"" _r_ infstat=""giv"" & q? & #1 !_o_ #3",GUM,2404
51+
OptionalQueryNodeInBetween,"entity=""person"" !_o_ q? & infstat=""giv"" & #1 _r_ #3",GUM,2404
52+
OptionalQueryAlternative,"(entity=""person"" !_o_ q? & infstat=""giv"" & #1 _r_ #3) | (entity=""organization"" !_o_ q? & infstat=""new"" & #4 _r_ #6)",GUM,2702
5053
StructureInclusionSeed,"cat=""S"" _i_ cat=""AP""",pcc2.1,726
5154
IsConnectedRange,"""Jugendlichen"" .3,10 ""Musikcafé""",pcc2.1,1
5255
Regex,cat=/.P/ >* /A.*/,pcc2.1,1130

0 commit comments

Comments
 (0)