Skip to content

Commit b249354

Browse files
committed
Add tests and only add the inherited edges for the invalid node itself.
All covered nodes of an invalidated node will also be invalidated, so we don't need to implicitly add the edges when collected the covered token of the invalid node.
1 parent 700b11b commit b249354

4 files changed

Lines changed: 516 additions & 42 deletions

File tree

graphannis/src/annis/db/aql/model.rs

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -126,47 +126,53 @@ fn calculate_inherited_coverage_edges(
126126
.collect();
127127
let combined_gs = UnionEdgeContainer::new(all_text_coverage_gs);
128128

129-
// Remember the non-token and token nodes, so we can connect all non-token
130-
// nodes to the covered token.
131-
let mut indirectly_covered_nodes = FxHashSet::default();
132-
indirectly_covered_nodes.insert(n);
133129
let mut covered_token = FxHashSet::default();
134130

135-
let tok_helper = TokenHelper::new(graph)?;
136-
137-
for step in CycleSafeDFS::new(&combined_gs, n, 1, usize::MAX) {
138-
let step = step?;
139-
if tok_helper.is_token(step.node)? {
140-
covered_token.insert(step.node);
141-
} else {
142-
indirectly_covered_nodes.insert(step.node);
143-
}
144-
}
145-
146-
let coverage_gs = tok_helper.get_gs_coverage().clone();
147-
148-
if let Ok(gs_cov) = graph.get_or_create_writable(&AnnotationComponent::new(
131+
let inherited_cov_component = AnnotationComponent::new(
149132
AnnotationComponentType::Coverage,
150133
ANNIS_NS.into(),
151134
"inherited-coverage".into(),
152-
)) {
153-
// Connect all non-token nodes to the covered token nodes if no such direct coverage already exists
154-
for source in indirectly_covered_nodes {
155-
for target in &covered_token {
156-
let mut needs_edge = true;
157-
for gs in coverage_gs.iter() {
158-
if gs.is_connected(source, *target, 1, std::ops::Bound::Included(1))? {
159-
needs_edge = false;
160-
break;
161-
}
162-
}
135+
);
163136

164-
if needs_edge {
165-
gs_cov.add_edge(Edge {
166-
source,
167-
target: *target,
168-
})?;
169-
}
137+
{
138+
let tok_helper = TokenHelper::new(graph)?;
139+
for step in CycleSafeDFS::new(&combined_gs, n, 1, usize::MAX) {
140+
let step = step?;
141+
if tok_helper.is_token(step.node)? {
142+
covered_token.insert(step.node);
143+
}
144+
}
145+
};
146+
let other_coverage_gs: Vec<Arc<dyn GraphStorage>> = graph
147+
.get_all_components(Some(AnnotationComponentType::Coverage), None)
148+
.into_iter()
149+
.filter(|c| c != &inherited_cov_component)
150+
.filter_map(|c| graph.get_graphstorage(&c))
151+
.filter(|gs| {
152+
if let Some(stats) = gs.get_statistics() {
153+
stats.nodes > 0
154+
} else {
155+
true
156+
}
157+
})
158+
.collect();
159+
160+
// Connect all non-token nodes to the covered token nodes if no such direct coverage already exists
161+
let mut direct_coverage_targets = FxHashSet::default();
162+
for gs in other_coverage_gs.iter() {
163+
for target in gs.get_outgoing_edges(n) {
164+
direct_coverage_targets.insert(target?);
165+
}
166+
}
167+
let gs_cov = graph.get_or_create_writable(&inherited_cov_component)?;
168+
169+
for target in &covered_token {
170+
if n != *target {
171+
if !direct_coverage_targets.contains(target) {
172+
gs_cov.add_edge(Edge {
173+
source: n,
174+
target: *target,
175+
})?;
170176
}
171177
}
172178
}
@@ -283,7 +289,7 @@ impl AQLUpdateGraphIndex {
283289
let all_cov_components =
284290
graph.get_all_components(Some(AnnotationComponentType::Coverage), None);
285291
let all_dom_components =
286-
graph.get_all_components(Some(AnnotationComponentType::Dominance), Some(""));
292+
graph.get_all_components(Some(AnnotationComponentType::Dominance), None);
287293

288294
// go over each node and calculate the left-most and right-most token
289295
for invalid in self.invalid_nodes.iter()? {

graphannis/src/annis/db/aql/model/tests.rs

Lines changed: 254 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,19 @@
11
use std::{fs::File, path::PathBuf};
22

3-
use crate::{annis::db::aql::model::CorpusSize, AnnotationGraph};
3+
use crate::{
4+
annis::db::{aql::model::CorpusSize, example_generator},
5+
model::AnnotationComponent,
6+
AnnotationGraph,
7+
};
48
use assert_matches::assert_matches;
9+
use graphannis_core::graph::{
10+
storage::GraphStorage,
11+
update::{GraphUpdate, UpdateEvent},
12+
NODE_NAME_KEY,
13+
};
14+
use itertools::Itertools;
15+
16+
use super::AnnotationComponentType::Coverage;
517

618
#[test]
719
fn global_stats_token_count() {
@@ -21,3 +33,244 @@ fn global_stats_token_count() {
2133
&& *segmentation_count.get("diplomatic").unwrap() == 11
2234
&& *segmentation_count.get("norm").unwrap() == 13);
2335
}
36+
37+
#[test]
38+
fn inherited_cov_edges_simple_tokenization() {
39+
// Ad a simple dominance node structure above the example sentence.
40+
let mut u = GraphUpdate::new();
41+
example_generator::create_corpus_structure_simple(&mut u);
42+
example_generator::create_tokens(&mut u, Some("root/doc1"), Some("root/doc1"));
43+
example_generator::make_span(
44+
&mut u,
45+
"root/doc1#span1",
46+
&["root/doc1#tok1", "root/doc1#tok2", "root/doc1#tok3"],
47+
true,
48+
);
49+
example_generator::make_span(
50+
&mut u,
51+
"root/doc1#span2",
52+
&["root/doc1#tok4", "root/doc1#tok5"],
53+
true,
54+
);
55+
u.add_event(UpdateEvent::AddNode {
56+
node_name: "root/doc1#struct1".to_string(),
57+
node_type: "node".to_string(),
58+
})
59+
.unwrap();
60+
u.add_event(UpdateEvent::AddNodeLabel {
61+
node_name: "root/doc1#struct1".to_string(),
62+
anno_ns: "test".to_string(),
63+
anno_name: "cat".to_string(),
64+
anno_value: "P".to_string(),
65+
})
66+
.unwrap();
67+
u.add_event(UpdateEvent::AddEdge {
68+
source_node: "root/doc1#struct1".to_string(),
69+
target_node: "root/doc1#span1".to_string(),
70+
layer: "test".to_string(),
71+
component_type: "Dominance".to_string(),
72+
component_name: "edge".to_string(),
73+
})
74+
.unwrap();
75+
u.add_event(UpdateEvent::AddEdge {
76+
source_node: "root/doc1#struct1".to_string(),
77+
target_node: "root/doc1#span2".to_string(),
78+
layer: "test".to_string(),
79+
component_type: "Dominance".to_string(),
80+
component_name: "edge".to_string(),
81+
})
82+
.unwrap();
83+
u.add_event(UpdateEvent::AddNode {
84+
node_name: "root/doc1#struct2".to_string(),
85+
node_type: "node".to_string(),
86+
})
87+
.unwrap();
88+
u.add_event(UpdateEvent::AddNodeLabel {
89+
node_name: "root/doc1#struct2".to_string(),
90+
anno_ns: "test".to_string(),
91+
anno_name: "cat".to_string(),
92+
anno_value: "ROOT".to_string(),
93+
})
94+
.unwrap();
95+
u.add_event(UpdateEvent::AddEdge {
96+
source_node: "root/doc1#struct2".to_string(),
97+
target_node: "root/doc1#struct1".to_string(),
98+
layer: "test".to_string(),
99+
component_type: "Dominance".to_string(),
100+
component_name: "edge".to_string(),
101+
})
102+
.unwrap();
103+
104+
let mut g = AnnotationGraph::with_default_graphstorages(false).unwrap();
105+
g.apply_update(&mut u, |_| {}).unwrap();
106+
107+
// Check that the inherited coverage edges have been created
108+
let gs = g
109+
.get_graphstorage_as_ref(&AnnotationComponent::new(
110+
Coverage,
111+
"annis".into(),
112+
"inherited-coverage".into(),
113+
))
114+
.unwrap();
115+
let sources: Vec<_> = gs
116+
.source_nodes()
117+
.map(|n| {
118+
g.get_node_annos()
119+
.get_value_for_item(&n.unwrap(), &NODE_NAME_KEY)
120+
.unwrap()
121+
.unwrap()
122+
.to_string()
123+
})
124+
.sorted()
125+
.collect();
126+
assert_eq!(sources, vec!["root/doc1#struct1", "root/doc1#struct2"]);
127+
128+
// Also check that the edges target the right token
129+
assert_out_edges(
130+
&g,
131+
gs,
132+
"root/doc1#struct1",
133+
&[
134+
"root/doc1#tok1",
135+
"root/doc1#tok2",
136+
"root/doc1#tok3",
137+
"root/doc1#tok4",
138+
"root/doc1#tok5",
139+
],
140+
);
141+
assert_out_edges(
142+
&g,
143+
gs,
144+
"root/doc1#struct2",
145+
&[
146+
"root/doc1#tok1",
147+
"root/doc1#tok2",
148+
"root/doc1#tok3",
149+
"root/doc1#tok4",
150+
"root/doc1#tok5",
151+
],
152+
);
153+
}
154+
155+
#[test]
156+
fn inherited_cov_edges_multiple_segmentation() {
157+
let mut u = GraphUpdate::new();
158+
example_generator::create_corpus_structure_simple(&mut u);
159+
example_generator::create_multiple_segmentations(&mut u, "root/doc1");
160+
// Add a simple dominance node structure above the "a" segmentation
161+
example_generator::make_span(
162+
&mut u,
163+
"root/doc1#span1",
164+
&["root/doc1#a1", "root/doc1#a2", "root/doc1#a3"],
165+
true,
166+
);
167+
example_generator::make_span(&mut u, "root/doc1#span2", &["root/doc1#a4"], true);
168+
u.add_event(UpdateEvent::AddNode {
169+
node_name: "root/doc1#struct1".to_string(),
170+
node_type: "node".to_string(),
171+
})
172+
.unwrap();
173+
u.add_event(UpdateEvent::AddNodeLabel {
174+
node_name: "root/doc1#struct1".to_string(),
175+
anno_ns: "test".to_string(),
176+
anno_name: "cat".to_string(),
177+
anno_value: "ROOT".to_string(),
178+
})
179+
.unwrap();
180+
u.add_event(UpdateEvent::AddEdge {
181+
source_node: "root/doc1#struct1".to_string(),
182+
target_node: "root/doc1#span1".to_string(),
183+
layer: "test".to_string(),
184+
component_type: "Dominance".to_string(),
185+
component_name: "edge".to_string(),
186+
})
187+
.unwrap();
188+
u.add_event(UpdateEvent::AddEdge {
189+
source_node: "root/doc1#struct1".to_string(),
190+
target_node: "root/doc1#span2".to_string(),
191+
layer: "test".to_string(),
192+
component_type: "Dominance".to_string(),
193+
component_name: "edge".to_string(),
194+
})
195+
.unwrap();
196+
197+
let mut g = AnnotationGraph::with_default_graphstorages(false).unwrap();
198+
g.apply_update(&mut u, |_| {}).unwrap();
199+
200+
// TODO Check that the inherited coverage edges have been created
201+
let gs = g
202+
.get_graphstorage_as_ref(&AnnotationComponent::new(
203+
Coverage,
204+
"annis".into(),
205+
"inherited-coverage".into(),
206+
))
207+
.unwrap();
208+
209+
let sources: Vec<_> = gs
210+
.source_nodes()
211+
.map(|n| {
212+
g.get_node_annos()
213+
.get_value_for_item(&n.unwrap(), &NODE_NAME_KEY)
214+
.unwrap()
215+
.unwrap()
216+
.to_string()
217+
})
218+
.sorted()
219+
.collect();
220+
assert_eq!(
221+
sources,
222+
vec!["root/doc1#span1", "root/doc1#span2", "root/doc1#struct1"]
223+
);
224+
225+
// Also check that the edges target the right timeline items (and not the segmentation nodes)
226+
assert_out_edges(
227+
&g,
228+
gs,
229+
"root/doc1#span1",
230+
&[
231+
"root/doc1#tli1",
232+
"root/doc1#tli2",
233+
"root/doc1#tli3",
234+
"root/doc1#tli4",
235+
],
236+
);
237+
assert_out_edges(&g, gs, "root/doc1#span2", &["root/doc1#tli5"]);
238+
assert_out_edges(
239+
&g,
240+
gs,
241+
"root/doc1#struct1",
242+
&[
243+
"root/doc1#tli1",
244+
"root/doc1#tli2",
245+
"root/doc1#tli3",
246+
"root/doc1#tli4",
247+
"root/doc1#tli5",
248+
],
249+
);
250+
}
251+
252+
fn assert_out_edges(
253+
graph: &AnnotationGraph,
254+
gs: &dyn GraphStorage,
255+
source: &str,
256+
expected: &[&str],
257+
) {
258+
let out: Vec<_> = gs
259+
.get_outgoing_edges(
260+
graph
261+
.get_node_annos()
262+
.get_node_id_from_name(source)
263+
.unwrap()
264+
.unwrap(),
265+
)
266+
.map(|t| {
267+
graph
268+
.get_node_annos()
269+
.get_value_for_item(&t.unwrap(), &NODE_NAME_KEY)
270+
.unwrap()
271+
.unwrap()
272+
.to_string()
273+
})
274+
.collect();
275+
assert_eq!(out, expected);
276+
}

graphannis/src/annis/db/corpusstorage/tests.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,7 @@ fn subgraph_with_segmentation() {
676676
];
677677
for (i, t) in seg_tokens.iter().enumerate() {
678678
let node_name = format!("root/doc1#seg{}", i);
679-
example_generator::create_token_node(&mut g, &node_name, t, Some("root/doc1"));
679+
example_generator::create_token_node(&mut g, &node_name, t, None, None, Some("root/doc1"));
680680
g.add_event(UpdateEvent::AddNodeLabel {
681681
node_name,
682682
anno_ns: "default_ns".to_string(),
@@ -1488,6 +1488,8 @@ fn reoptimize_corpussizeconfig() {
14881488
&mut u,
14891489
"rootCorpus/subCorpus1/doc1#sTok12",
14901490
"!",
1491+
None,
1492+
None,
14911493
Some("rootCorpus/subCorpus1/doc1#sText1"),
14921494
);
14931495
u.add_event(UpdateEvent::AddEdge {

0 commit comments

Comments
 (0)