Skip to content

Commit 2c656d5

Browse files
authored
Merge pull request #326 from korpling/fix-large-files-in-zip
Fix exporting large corpora to ZIP files
2 parents 9f5aeaa + ad3f790 commit 2c656d5

4 files changed

Lines changed: 55 additions & 3 deletions

File tree

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
55

66
## [Unreleased]
77

8+
### Fixed
9+
10+
- Export to ZIP would fail if the contained GraphML was too large with error
11+
`Error: I/O error: Large file option has not been set`. Use the ZIP64 extension
12+
(which should be supported in most current tools and libraries) to write the ZIP
13+
file.
14+
815
## [3.8.1] - 2025-05-22
916

1017
### Fixed

cli/tests/cli.rs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use assert_cmd::prelude::*;
22
use insta::Settings;
33
use insta_cmd::assert_cmd_snapshot;
44
use serial_test::serial;
5-
use std::process::Command;
5+
use std::{path::Path, process::Command};
66

77
fn standard_filter() -> Settings {
88
let mut settings = insta::Settings::clone_current();
@@ -88,3 +88,25 @@ fn list_corpora_partially_loaded() -> Result<(), Box<dyn std::error::Error>> {
8888

8989
Ok(())
9090
}
91+
92+
#[test]
93+
#[serial]
94+
fn export_to_zip_file() -> Result<(), Box<dyn std::error::Error>> {
95+
let mut cmd = Command::cargo_bin("annis")?;
96+
97+
cmd.arg("../graphannis/tests/data/")
98+
.arg("-c")
99+
.arg("corpus sample-disk-based-3.3")
100+
.arg("-c")
101+
.arg("export sample-disk-based-3.3.zip");
102+
103+
let settings = standard_filter();
104+
settings.bind(|| assert_cmd_snapshot!(cmd));
105+
106+
// Check that the file has been created
107+
let p = Path::new("sample-disk-based-3.3.zip");
108+
assert_eq!(true, p.is_file());
109+
// Cleanup created file
110+
std::fs::remove_file(p)?;
111+
Ok(())
112+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
---
2+
source: cli/tests/cli.rs
3+
info:
4+
program: annis
5+
args:
6+
- "../graphannis/tests/data/"
7+
- "-c"
8+
- corpus sample-disk-based-3.3
9+
- "-c"
10+
- export sample-disk-based-3.3.zip
11+
---
12+
success: true
13+
exit_code: 0
14+
----- stdout -----
15+
12:00:00[INFO] Loaded corpus sample-disk-based-3.3
16+
12:00:00[INFO] exporting all available annotation keys
17+
12:00:00[INFO] exporting nodes
18+
12:00:00[INFO] exporting edges
19+
12:00:00[INFO] exported corpora ["sample-disk-based-3.3"] in 0s
20+
graphANNIS says good-bye!
21+
22+
----- stderr -----

graphannis/src/annis/db/corpusstorage.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,8 +1245,9 @@ impl CorpusStorage {
12451245
W: Write + Seek,
12461246
F: Fn(&str),
12471247
{
1248-
let options =
1249-
zip::write::FileOptions::default().compression_method(zip::CompressionMethod::Deflated);
1248+
let options = zip::write::FileOptions::default()
1249+
.compression_method(zip::CompressionMethod::Deflated)
1250+
.large_file(true);
12501251

12511252
let mut base_path = PathBuf::default();
12521253
if use_corpus_subdirectory {

0 commit comments

Comments
 (0)