Skip to content

Commit cf70fcd

Browse files
committed
Add support for coverage edges between spans and segmentation nodes
1 parent 781dd89 commit cf70fcd

6 files changed

Lines changed: 69 additions & 53 deletions

File tree

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
55

66
## [Unreleased]
77

8+
### Added
9+
10+
- Added support for coverage edges between span nodes an segmentation nodes when
11+
calculating the AQL model index.
12+
13+
### Fixed
14+
15+
- Do not use recursion to calculate the indirect coverage edges in the model
16+
index, since this could fail for deeply nested structures.
17+
818
## [3.3.3] - 2024-07-12
919

1020
### Fixed

cli/src/bin/annis.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ impl AnnisRunner {
175175
let readline = rl.readline(&prompt);
176176
match readline {
177177
Ok(line) => {
178-
rl.add_history_entry(&line.clone());
178+
rl.add_history_entry(line.clone());
179179
if !self.exec(&line) {
180180
break;
181181
}

graphannis/src/annis/db/aql/model.rs

Lines changed: 55 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -112,60 +112,66 @@ fn calculate_inherited_coverage_edges(
112112
graph: &mut AnnotationGraph,
113113
n: NodeID,
114114
all_cov_components: &[AnnotationComponent],
115-
all_dom_gs: &[Arc<dyn GraphStorage>],
115+
all_dom_components: &[AnnotationComponent],
116116
) -> std::result::Result<FxHashSet<NodeID>, ComponentTypeError> {
117-
let mut directly_covered_token = FxHashSet::default();
118-
119-
for c in all_cov_components.iter() {
120-
if let Some(gs) = graph.get_graphstorage_as_ref(c) {
121-
let out: Result<Vec<u64>, graphannis_core::errors::GraphAnnisCoreError> =
122-
gs.get_outgoing_edges(n).collect();
123-
directly_covered_token.extend(out?);
124-
}
125-
}
126-
127-
if directly_covered_token.is_empty() {
128-
let has_token_anno = graph
129-
.get_node_annos()
130-
.get_value_for_item(&n, &TOKEN_KEY)?
131-
.is_some();
132-
if has_token_anno {
133-
// Even if technically a token does not cover itself, if we need to abort the recursion
134-
// with the basic case
135-
directly_covered_token.insert(n);
117+
// Iterate over all all nodes that are somehow covered (by coverage or
118+
// dominance edges) starting from the given node.
119+
let all_text_coverage_components: Vec<AnnotationComponent> =
120+
[all_cov_components, all_dom_components].concat();
121+
122+
let all_text_coverage_gs: Vec<_> = all_text_coverage_components
123+
.iter()
124+
.filter_map(|c| graph.get_graphstorage_as_ref(c))
125+
.map(|gs| gs.as_edgecontainer())
126+
.collect();
127+
let combined_gs = UnionEdgeContainer::new(all_text_coverage_gs);
128+
129+
// Remember the non-token and token nodes, so we can connect all non-token
130+
// nodes to the covered token.
131+
let mut indirectly_covered_nodes = FxHashSet::default();
132+
indirectly_covered_nodes.insert(n);
133+
let mut covered_token = FxHashSet::default();
134+
135+
let tok_helper = TokenHelper::new(graph)?;
136+
137+
for step in CycleSafeDFS::new(&combined_gs, n, 1, usize::MAX) {
138+
let step = step?;
139+
if tok_helper.is_token(step.node)? {
140+
covered_token.insert(step.node);
141+
} else {
142+
indirectly_covered_nodes.insert(step.node);
136143
}
137144
}
138145

139-
let mut indirectly_covered_token = FxHashSet::default();
140-
// recursivly get the covered token from all children connected by a dominance relation
141-
for dom_gs in all_dom_gs {
142-
for out in dom_gs.get_outgoing_edges(n) {
143-
let out = out?;
144-
indirectly_covered_token.extend(calculate_inherited_coverage_edges(
145-
graph,
146-
out,
147-
all_cov_components,
148-
all_dom_gs,
149-
)?);
150-
}
151-
}
146+
let coverage_gs = tok_helper.get_gs_coverage().clone();
152147

153148
if let Ok(gs_cov) = graph.get_or_create_writable(&AnnotationComponent::new(
154149
AnnotationComponentType::Coverage,
155150
ANNIS_NS.into(),
156151
"inherited-coverage".into(),
157152
)) {
158-
// Ignore all already directly covered token when creating the inherited coverage edges
159-
for t in indirectly_covered_token.difference(&directly_covered_token) {
160-
gs_cov.add_edge(Edge {
161-
source: n,
162-
target: *t,
163-
})?;
153+
// Connect all non-token nodes to the covered token nodes if no such direct coverage already exists
154+
for source in indirectly_covered_nodes {
155+
for target in &covered_token {
156+
let mut needs_edge = true;
157+
for gs in coverage_gs.iter() {
158+
if gs.is_connected(source, *target, 1, std::ops::Bound::Included(1))? {
159+
needs_edge = false;
160+
break;
161+
}
162+
}
163+
164+
if needs_edge {
165+
gs_cov.add_edge(Edge {
166+
source,
167+
target: *target,
168+
})?;
169+
}
170+
}
164171
}
165172
}
166173

167-
directly_covered_token.extend(indirectly_covered_token);
168-
Ok(directly_covered_token)
174+
Ok(covered_token)
169175
}
170176

171177
pub struct AQLUpdateGraphIndex {
@@ -276,17 +282,18 @@ impl AQLUpdateGraphIndex {
276282

277283
let all_cov_components =
278284
graph.get_all_components(Some(AnnotationComponentType::Coverage), None);
279-
let all_dom_gs: Vec<Arc<dyn GraphStorage>> = graph
280-
.get_all_components(Some(AnnotationComponentType::Dominance), Some(""))
281-
.into_iter()
282-
.filter_map(|c| graph.get_graphstorage(&c))
283-
.collect();
285+
let all_dom_components =
286+
graph.get_all_components(Some(AnnotationComponentType::Dominance), Some(""));
284287

285288
// go over each node and calculate the left-most and right-most token
286289
for invalid in self.invalid_nodes.iter()? {
287290
let (n, _) = invalid?;
288-
let covered_token =
289-
calculate_inherited_coverage_edges(graph, n, &all_cov_components, &all_dom_gs)?;
291+
let covered_token = calculate_inherited_coverage_edges(
292+
graph,
293+
n,
294+
&all_cov_components,
295+
&all_dom_components,
296+
)?;
290297
self.calculate_token_alignment(
291298
graph,
292299
n,

graphannis/src/annis/db/corpusstorage.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1078,7 +1078,7 @@ impl CorpusStorage {
10781078
for node in file_nodes? {
10791079
// Get the linked file for this node
10801080
if let Some(original_path) = node_annos.get_value_for_item(&node, &linked_file_key)? {
1081-
let original_path = old_base_path.join(&PathBuf::from(original_path.as_ref()));
1081+
let original_path = old_base_path.join(PathBuf::from(original_path.as_ref()));
10821082
if original_path.is_file() {
10831083
if let Some(node_name) = node_annos.get_value_for_item(&node, &NODE_NAME_KEY)? {
10841084
// Create a new file name based on the node name and copy the file
@@ -1583,6 +1583,7 @@ impl CorpusStorage {
15831583

15841584
/// Count the number of results for a `query`.
15851585
/// - `query` - The search query definition.
1586+
///
15861587
/// Returns the count as number.
15871588
pub fn count<S: AsRef<str>>(&self, query: SearchQuery<S>) -> Result<u64> {
15881589
let timeout = TimeoutCheck::new(query.timeout);

graphannis/src/annis/util/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ impl From<SearchDefRaw> for SearchDef {
7373
/// Returns a vector over all query definitions defined in a CSV file.
7474
/// - `file` - The CSV file path.
7575
/// - `panic_on_invalid` - If true, an invalid query definition will trigger a panic, otherwise it will be ignored.
76+
///
7677
/// Can be used if this query is called in a test case to fail the test.
7778
pub fn get_queries_from_csv(file: &Path, panic_on_invalid: bool) -> Vec<SearchDef> {
7879
if let Ok(mut reader) = csv::Reader::from_path(file) {

graphannis/src/lib.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@ extern crate lazy_static;
2222
#[macro_use]
2323
extern crate lalrpop_util;
2424

25-
#[cfg(feature = "c-api")]
26-
extern crate simplelog;
27-
2825
mod annis;
2926

3027
pub use crate::annis::db::corpusstorage::CorpusStorage;

0 commit comments

Comments
 (0)