diff --git a/datasketches/tests/countmin_serialization_test.rs b/datasketches/tests/countmin_serialization_test.rs index 4492a55..6c2c5a5 100644 --- a/datasketches/tests/countmin_serialization_test.rs +++ b/datasketches/tests/countmin_serialization_test.rs @@ -53,17 +53,17 @@ fn assert_cpp_snapshot( #[test] fn test_deserialize_cpp_empty_snapshot() { - assert_cpp_snapshot("count_min-empty.bin", 9001, 1, 5, 0); + assert_cpp_snapshot("count_min_empty_cpp.sk", 9001, 1, 5, 0); } #[test] fn test_deserialize_cpp_non_empty_snapshot() { - assert_cpp_snapshot("count_min-non-empty.bin", 9001, 3, 1024, 2850); + assert_cpp_snapshot("count_min_non_empty_cpp.sk", 9001, 3, 1024, 2850); } #[test] fn test_deserialize_cpp_snapshot_with_wrong_seed() { - let path = serialization_test_data("cpp_generated_files", "count_min-non-empty.bin"); + let path = serialization_test_data("cpp_generated_files", "count_min_non_empty_cpp.sk"); let bytes = fs::read(&path).unwrap(); let err = CountMinSketch::::deserialize_with_seed(&bytes, 9000).unwrap_err(); diff --git a/tools/generate_serialization_test_data.py b/tools/generate_serialization_test_data.py index 28314b1..3bc9e8d 100755 --- a/tools/generate_serialization_test_data.py +++ b/tools/generate_serialization_test_data.py @@ -71,7 +71,7 @@ def generate_java_files(workspace_dir, project_dir): # 4. Clone repository repo_url = "https://github.com/apache/datasketches-java.git" - branch = "9.0.0" # FIXME: temporarily use fixed branch until mvn issue is resolved + branch = "9.0.0" run_command([ "git", "clone", "--depth", "1", @@ -133,21 +133,15 @@ def generate_cpp_files(workspace_dir, project_root): # 4. Clone repository repo_url = "https://github.com/apache/datasketches-cpp.git" - branch = "master" - # Temporary e2e checkout for apache/datasketches-cpp#505. After that PR is - # merged, pin this to the merged master commit and remove the extra fetch. - commit = "af4436280bdab53e0063268e92ff29b3fdcb1b07" - fetch_ref = "refs/pull/505/head" + commit = "401423367055acdf7502e8ed3126730a08039d91" run_command([ "git", "clone", "--depth", "1", - "--branch", branch, + "--revision", commit, "--single-branch", repo_url, str(temp_dir) ]) - run_command(["git", "fetch", "--depth", "1", "origin", fetch_ref], cwd=temp_dir) - run_command(["git", "checkout", "--detach", commit], cwd=temp_dir) # 5. Build and Run CMake build_dir = temp_dir / "build" @@ -173,14 +167,14 @@ def generate_cpp_files(workspace_dir, project_root): files_copied = 0 - for pattern in ("*_cpp.sk", "count_min-*.bin"): - for file_path in build_dir.rglob(pattern): - shutil.copy2(file_path, output_dir) - print(f"Copied: {file_path.name}") - files_copied += 1 + for file_path in build_dir.rglob("*_cpp.sk"): + shutil.copy2(file_path, output_dir) + print(f"Copied: {file_path.name}") + files_copied += 1 + if files_copied == 0: - print("Warning: No C++ serialization snapshots were found to copy.") + print("Warning: No *_cpp.sk files were found to copy.") else: print(f"Successfully copied {files_copied} files.")