Skip to content

Commit 822054f

Browse files
committed
Fix timeout check for frequency queries with large results but small tuple frequency count
1 parent e14a4ef commit 822054f

3 files changed

Lines changed: 12 additions & 12 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313
- Queries with `@` could have extremly slow execution plans when the query
1414
planner introduces an inverted `@` operator and miscalculated the cost
1515
compared to the non-inverted version.
16+
- Frequency queries now execute the (additional) timeout check after a certain
17+
number of matches are processed, not if a specific tuple value has reached a
18+
treshold.
1619

1720
## [4.0.0] - 2025-08-20
1821

core/src/util/disk_collections.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,6 @@ const BLOCK_MAX_SIZE: usize = 4 * KB;
2121
/// Uses a cache for each disk table with 8 MB capacity.
2222
pub const DEFAULT_BLOCK_CACHE_CAPACITY: usize = 8 * MB;
2323

24-
#[derive(Serialize, Deserialize)]
25-
struct Entry<K, V>
26-
where
27-
K: Ord,
28-
{
29-
key: K,
30-
value: V,
31-
}
32-
3324
pub enum EvictionStrategy {
3425
MaximumItems(usize),
3526
}

graphannis/src/annis/db/corpusstorage.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ mod subgraph;
6969
#[cfg(test)]
7070
mod tests;
7171

72+
/// After how many produces tuples the timeout check should be manually triggered (in case the underlying joint did not already check the timeout)
73+
const TIMEOUT_CHECK_TUPLE_COUNT: u64 = 1_000;
74+
7275
enum CacheEntry {
7376
Loaded(AnnotationGraph),
7477
NotLoaded,
@@ -1602,7 +1605,7 @@ impl CorpusStorage {
16021605

16031606
for _ in plan {
16041607
total_count += 1;
1605-
if total_count % 1_000 == 0 {
1608+
if total_count.is_multiple_of(TIMEOUT_CHECK_TUPLE_COUNT) {
16061609
timeout.check()?;
16071610
}
16081611
}
@@ -1666,7 +1669,7 @@ impl CorpusStorage {
16661669
}
16671670
match_count += 1;
16681671

1669-
if match_count % 1_000 == 0 {
1672+
if match_count.is_multiple_of(TIMEOUT_CHECK_TUPLE_COUNT) {
16701673
timeout.check()?;
16711674
}
16721675
}
@@ -2307,9 +2310,11 @@ impl CorpusStorage {
23072310

23082311
let plan =
23092312
ExecutionPlan::from_disjunction(&prep.query, db, &self.query_config, timeout)?;
2313+
let mut total_count: u64 = 0;
23102314

23112315
for mgroup in plan {
23122316
let mgroup = mgroup?;
2317+
23132318
// for each match, extract the defined annotation (by its key) from the result node
23142319
let mut tuple: Vec<String> = Vec::with_capacity(annokeys.len());
23152320
for (node_ref, anno_keys) in &annokeys {
@@ -2328,7 +2333,8 @@ impl CorpusStorage {
23282333
let tuple_count: &mut usize = tuple_frequency.entry(tuple).or_insert(0);
23292334
*tuple_count += 1;
23302335

2331-
if *tuple_count % 1_000 == 0 {
2336+
total_count += 1;
2337+
if total_count.is_multiple_of(TIMEOUT_CHECK_TUPLE_COUNT) {
23322338
timeout.check()?;
23332339
}
23342340
}

0 commit comments

Comments
 (0)