Skip to content

Commit 540a817

Browse files
committed
Only assume non-root nodes as population for PartOf searches, not any other base edge operator searches
1 parent 1a6a11f commit 540a817

2 files changed

Lines changed: 17 additions & 18 deletions

File tree

cli/src/bin/annis.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ impl AnnisRunner {
321321
format = ExportFormat::GraphMLZip;
322322
} else if file_ext.to_string_lossy() == ".graphml" && self.current_corpus.len() != 1 {
323323
bail!(
324-
r##"You need to select a *single* corpus first with the \"corpus\" command when exporting to a GraphML file.
324+
r##"You need to select a *single* corpus first with the \"corpus\" command when exporting to a GraphML file.
325325
To export multiple corpora, select a directory as output or a ZIP file (ending with .zip)"##
326326
);
327327
}
@@ -442,7 +442,7 @@ impl AnnisRunner {
442442
"unsorted" => ResultOrder::NotSorted,
443443
_ => {
444444
return Err(anyhow!(
445-
"Non-existing order with name {}.
445+
"Non-existing order with name {}.
446446
Must be one of \"normal\", \"inverted\", \"random\", \"unsorted\"",
447447
args
448448
));

graphannis/src/annis/db/aql/operators/edge_op.rs

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -54,29 +54,28 @@ impl BaseEdgeOp {
5454
.components
5555
.iter()
5656
.all(|c| c.get_type() == AnnotationComponentType::PartOf);
57-
// Use the single graph storage to get an estimate of population of nodes that can be found.
58-
let max_nodes_estimate = if gs.len() == 1 {
59-
gs[0]
60-
.get_statistics()
61-
.map(|s| s.nodes.saturating_sub(s.root_nodes))
62-
.unwrap_or(any_node_count)
63-
} else {
64-
// Use all nodes regardless of the component as population estimate
65-
if all_part_of_components {
57+
let max_nodes_estimate = if all_part_of_components {
58+
// PartOf components have a very skewed distribution of root nodes
59+
// vs. the actual possible targets, thus do not use all nodes as
60+
// population but only the non-roots.
61+
if gs.len() == 1 {
62+
gs[0]
63+
.get_statistics()
64+
.map(|s| s.nodes.saturating_sub(s.root_nodes))
65+
.unwrap_or(any_node_count)
66+
} else {
67+
// If multiple PartOf graph storages are combined, we can guess
68+
// the non-root nodes by estimating the number of nodes in the
69+
// corpus grah.
6670
db.get_node_annos().guess_max_count(
6771
Some(&NODE_TYPE_KEY.ns),
6872
&NODE_TYPE_KEY.name,
6973
"corpus",
7074
"text",
7175
)?
72-
} else {
73-
db.get_node_annos().guess_max_count(
74-
Some(&NODE_TYPE_KEY.ns),
75-
&NODE_TYPE_KEY.name,
76-
"node",
77-
"node",
78-
)?
7976
}
77+
} else {
78+
any_node_count
8079
};
8180
Ok(BaseEdgeOp {
8281
gs,

0 commit comments

Comments
 (0)