This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 774fa42  build: generate snapshots on the fly  (#29)
774fa42 is described below

commit 774fa42a2f5e0ff563ec6518b6ee47c39246c7f1
Author: tison <[email protected]>
AuthorDate: Fri Dec 19 13:04:37 2025 +0800

    build: generate snapshots on the fly  (#29)
    
    * build: generate snapshots on the fly
    
    Signed-off-by: tison <[email protected]>
    
    * impl with docker
    
    Signed-off-by: tison <[email protected]>
    
    * use a python script
    
    Signed-off-by: tison <[email protected]>
    
    * for cpp snapshots
    
    Signed-off-by: tison <[email protected]>
    
    * tidy
    
    Signed-off-by: tison <[email protected]>
    
    * Add tool to generate serialization test data for Java and C++
    
    * setup jdk 25
    
    Signed-off-by: tison <[email protected]>
    
    * Enhance snapshot generation script: Windows support and output capturing
    
    * Stream command output to stdout for real-time CI logs
    
    * fine tune
    
    Signed-off-by: tison <[email protected]>
    
    ---------
    
    Signed-off-by: tison <[email protected]>
---
 .github/workflows/ci.yml                           |   8 +
 .gitignore                                         |   4 +
 tests/.gitignore                                   |   1 +
 tests/hll_serialization_test.rs                    |  20 ++-
 .../cpp_generated_files/hll4_n0_cpp.sk             | Bin 8 -> 0 bytes
 .../cpp_generated_files/hll4_n1000000_cpp.sk       | Bin 2092 -> 0 bytes
 .../cpp_generated_files/hll4_n100000_cpp.sk        | Bin 2092 -> 0 bytes
 .../cpp_generated_files/hll4_n10000_cpp.sk         | Bin 2088 -> 0 bytes
 .../cpp_generated_files/hll4_n1000_cpp.sk          | Bin 2088 -> 0 bytes
 .../cpp_generated_files/hll4_n100_cpp.sk           | Bin 412 -> 0 bytes
 .../cpp_generated_files/hll4_n10_cpp.sk            | Bin 52 -> 0 bytes
 .../cpp_generated_files/hll4_n1_cpp.sk             | Bin 12 -> 0 bytes
 .../cpp_generated_files/hll6_n0_cpp.sk             | Bin 8 -> 0 bytes
 .../cpp_generated_files/hll6_n1000000_cpp.sk       | Bin 3113 -> 0 bytes
 .../cpp_generated_files/hll6_n100000_cpp.sk        | Bin 3113 -> 0 bytes
 .../cpp_generated_files/hll6_n10000_cpp.sk         | Bin 3113 -> 0 bytes
 .../cpp_generated_files/hll6_n1000_cpp.sk          | Bin 3113 -> 0 bytes
 .../cpp_generated_files/hll6_n100_cpp.sk           | Bin 412 -> 0 bytes
 .../cpp_generated_files/hll6_n10_cpp.sk            | Bin 52 -> 0 bytes
 .../cpp_generated_files/hll6_n1_cpp.sk             |   1 -
 .../cpp_generated_files/hll8_n0_cpp.sk             | Bin 8 -> 0 bytes
 .../cpp_generated_files/hll8_n1000000_cpp.sk       | Bin 4136 -> 0 bytes
 .../cpp_generated_files/hll8_n100000_cpp.sk        | Bin 4136 -> 0 bytes
 .../cpp_generated_files/hll8_n10000_cpp.sk         | Bin 4136 -> 0 bytes
 .../cpp_generated_files/hll8_n1000_cpp.sk          | Bin 4136 -> 0 bytes
 .../cpp_generated_files/hll8_n100_cpp.sk           | Bin 412 -> 0 bytes
 .../cpp_generated_files/hll8_n10_cpp.sk            | Bin 52 -> 0 bytes
 .../cpp_generated_files/hll8_n1_cpp.sk             |   1 -
 .../java_generated_files/hll4_n0_java.sk           | Bin 8 -> 0 bytes
 .../java_generated_files/hll4_n1000000_java.sk     | Bin 2092 -> 0 bytes
 .../java_generated_files/hll4_n100000_java.sk      | Bin 2092 -> 0 bytes
 .../java_generated_files/hll4_n10000_java.sk       | Bin 2088 -> 0 bytes
 .../java_generated_files/hll4_n1000_java.sk        | Bin 2088 -> 0 bytes
 .../java_generated_files/hll4_n100_java.sk         | Bin 412 -> 0 bytes
 .../java_generated_files/hll4_n10_java.sk          | Bin 52 -> 0 bytes
 .../java_generated_files/hll4_n1_java.sk           | Bin 12 -> 0 bytes
 .../java_generated_files/hll6_n0_java.sk           | Bin 8 -> 0 bytes
 .../java_generated_files/hll6_n1000000_java.sk     | Bin 3113 -> 0 bytes
 .../java_generated_files/hll6_n100000_java.sk      | Bin 3113 -> 0 bytes
 .../java_generated_files/hll6_n10000_java.sk       | Bin 3113 -> 0 bytes
 .../java_generated_files/hll6_n1000_java.sk        | Bin 3113 -> 0 bytes
 .../java_generated_files/hll6_n100_java.sk         | Bin 412 -> 0 bytes
 .../java_generated_files/hll6_n10_java.sk          | Bin 52 -> 0 bytes
 .../java_generated_files/hll6_n1_java.sk           |   1 -
 .../java_generated_files/hll8_n0_java.sk           | Bin 8 -> 0 bytes
 .../java_generated_files/hll8_n1000000_java.sk     | Bin 4136 -> 0 bytes
 .../java_generated_files/hll8_n100000_java.sk      | Bin 4136 -> 0 bytes
 .../java_generated_files/hll8_n10000_java.sk       | Bin 4136 -> 0 bytes
 .../java_generated_files/hll8_n1000_java.sk        | Bin 4136 -> 0 bytes
 .../java_generated_files/hll8_n100_java.sk         | Bin 412 -> 0 bytes
 .../java_generated_files/hll8_n10_java.sk          | Bin 52 -> 0 bytes
 .../java_generated_files/hll8_n1_java.sk           |   1 -
 tools/generate_serialization_test_data.py          | 188 +++++++++++++++++++++
 53 files changed, 219 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 643e96e..6ce091d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -86,6 +86,14 @@ jobs:
         run: |
           rustup toolchain install ${{ matrix.rust-version }}
           rustup default ${{ matrix.rust-version }}
+      - name: Setup Java
+        uses: actions/setup-java@v5
+        with:
+          java-version: '25'
+          distribution: 'temurin'
+      - name: Prepare test data
+        shell: bash
+        run: ./tools/generate_serialization_test_data.py
       - name: Build
         run: cargo build --workspace --all-features --bins --tests --examples 
--benches --lib
       - name: Run unit tests
diff --git a/.gitignore b/.gitignore
index 36d5e44..3bd854a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,3 +36,7 @@
 
 # Build artifacts
 **/target
+
+# Temporary files
+tmp_datasketches_java/
+tmp_datasketches_cpp/
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 0000000..2c1198d
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1 @@
+serialization_test_data
diff --git a/tests/hll_serialization_test.rs b/tests/hll_serialization_test.rs
index 23e9973..a3c397b 100644
--- a/tests/hll_serialization_test.rs
+++ b/tests/hll_serialization_test.rs
@@ -32,10 +32,26 @@ use datasketches::hll::HllSketch;
 const TEST_DATA_DIR: &str = "tests/serialization_test_data";
 
 fn get_test_data_path(sub_dir: &str, name: &str) -> PathBuf {
-    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+    let path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
         .join(TEST_DATA_DIR)
         .join(sub_dir)
-        .join(name)
+        .join(name);
+
+    if !path.exists() {
+        panic!(
+            r#"serialization test data file not found: {}
+
+            Please ensure test data files are present in the repository. 
Generally, you can
+            run the following commands from the project root to regenerate the 
test data files
+            if they are missing:
+
+            $ ./tools/generate_serialization_test_data.py
+        "#,
+            path.display(),
+        );
+    }
+
+    path
 }
 
 fn test_sketch_file(path: PathBuf, expected_cardinality: usize, expected_lg_k: 
u8) {
diff --git a/tests/serialization_test_data/cpp_generated_files/hll4_n0_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll4_n0_cpp.sk
deleted file mode 100644
index 074868c..0000000
Binary files a/tests/serialization_test_data/cpp_generated_files/hll4_n0_cpp.sk 
and /dev/null differ
diff --git 
a/tests/serialization_test_data/cpp_generated_files/hll4_n1000000_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll4_n1000000_cpp.sk
deleted file mode 100644
index 9a8c626..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll4_n1000000_cpp.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/cpp_generated_files/hll4_n100000_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll4_n100000_cpp.sk
deleted file mode 100644
index 2b4b4dc..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll4_n100000_cpp.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/cpp_generated_files/hll4_n10000_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll4_n10000_cpp.sk
deleted file mode 100644
index e97b181..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll4_n10000_cpp.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/cpp_generated_files/hll4_n1000_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll4_n1000_cpp.sk
deleted file mode 100644
index a6b5f9a..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll4_n1000_cpp.sk and 
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll4_n100_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll4_n100_cpp.sk
deleted file mode 100644
index 0e12ff4..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll4_n100_cpp.sk and 
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll4_n10_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll4_n10_cpp.sk
deleted file mode 100644
index e4ed655..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll4_n10_cpp.sk and 
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll4_n1_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll4_n1_cpp.sk
deleted file mode 100644
index 1f4f6ee..0000000
Binary files a/tests/serialization_test_data/cpp_generated_files/hll4_n1_cpp.sk 
and /dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll6_n0_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll6_n0_cpp.sk
deleted file mode 100644
index c31bd26..0000000
Binary files a/tests/serialization_test_data/cpp_generated_files/hll6_n0_cpp.sk 
and /dev/null differ
diff --git 
a/tests/serialization_test_data/cpp_generated_files/hll6_n1000000_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll6_n1000000_cpp.sk
deleted file mode 100644
index a51d1e0..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll6_n1000000_cpp.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/cpp_generated_files/hll6_n100000_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll6_n100000_cpp.sk
deleted file mode 100644
index aba532b..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll6_n100000_cpp.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/cpp_generated_files/hll6_n10000_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll6_n10000_cpp.sk
deleted file mode 100644
index cd49aad..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll6_n10000_cpp.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/cpp_generated_files/hll6_n1000_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll6_n1000_cpp.sk
deleted file mode 100644
index e3d5f00..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll6_n1000_cpp.sk and 
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll6_n100_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll6_n100_cpp.sk
deleted file mode 100644
index 71fc293..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll6_n100_cpp.sk and 
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll6_n10_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll6_n10_cpp.sk
deleted file mode 100644
index 2a22d91..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll6_n10_cpp.sk and 
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll6_n1_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll6_n1_cpp.sk
deleted file mode 100644
index 3f2f545..0000000
--- a/tests/serialization_test_data/cpp_generated_files/hll6_n1_cpp.sk
+++ /dev/null
@@ -1 +0,0 @@
-���
\ No newline at end of file
diff --git a/tests/serialization_test_data/cpp_generated_files/hll8_n0_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll8_n0_cpp.sk
deleted file mode 100644
index 5ecf871..0000000
Binary files a/tests/serialization_test_data/cpp_generated_files/hll8_n0_cpp.sk 
and /dev/null differ
diff --git 
a/tests/serialization_test_data/cpp_generated_files/hll8_n1000000_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll8_n1000000_cpp.sk
deleted file mode 100644
index 67ec831..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll8_n1000000_cpp.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/cpp_generated_files/hll8_n100000_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll8_n100000_cpp.sk
deleted file mode 100644
index 0210961..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll8_n100000_cpp.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/cpp_generated_files/hll8_n10000_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll8_n10000_cpp.sk
deleted file mode 100644
index a5c8a15..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll8_n10000_cpp.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/cpp_generated_files/hll8_n1000_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll8_n1000_cpp.sk
deleted file mode 100644
index dd703ec..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll8_n1000_cpp.sk and 
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll8_n100_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll8_n100_cpp.sk
deleted file mode 100644
index c2bfa48..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll8_n100_cpp.sk and 
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll8_n10_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll8_n10_cpp.sk
deleted file mode 100644
index 86277b3..0000000
Binary files 
a/tests/serialization_test_data/cpp_generated_files/hll8_n10_cpp.sk and 
/dev/null differ
diff --git a/tests/serialization_test_data/cpp_generated_files/hll8_n1_cpp.sk 
b/tests/serialization_test_data/cpp_generated_files/hll8_n1_cpp.sk
deleted file mode 100644
index fd3a973..0000000
--- a/tests/serialization_test_data/cpp_generated_files/hll8_n1_cpp.sk
+++ /dev/null
@@ -1 +0,0 @@
-���
\ No newline at end of file
diff --git a/tests/serialization_test_data/java_generated_files/hll4_n0_java.sk 
b/tests/serialization_test_data/java_generated_files/hll4_n0_java.sk
deleted file mode 100644
index 074868c..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll4_n0_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll4_n1000000_java.sk 
b/tests/serialization_test_data/java_generated_files/hll4_n1000000_java.sk
deleted file mode 100644
index 9a8c626..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll4_n1000000_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll4_n100000_java.sk 
b/tests/serialization_test_data/java_generated_files/hll4_n100000_java.sk
deleted file mode 100644
index 2b4b4dc..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll4_n100000_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll4_n10000_java.sk 
b/tests/serialization_test_data/java_generated_files/hll4_n10000_java.sk
deleted file mode 100644
index e97b181..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll4_n10000_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll4_n1000_java.sk 
b/tests/serialization_test_data/java_generated_files/hll4_n1000_java.sk
deleted file mode 100644
index a6b5f9a..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll4_n1000_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll4_n100_java.sk 
b/tests/serialization_test_data/java_generated_files/hll4_n100_java.sk
deleted file mode 100644
index 0e12ff4..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll4_n100_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll4_n10_java.sk 
b/tests/serialization_test_data/java_generated_files/hll4_n10_java.sk
deleted file mode 100644
index e4ed655..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll4_n10_java.sk and 
/dev/null differ
diff --git a/tests/serialization_test_data/java_generated_files/hll4_n1_java.sk 
b/tests/serialization_test_data/java_generated_files/hll4_n1_java.sk
deleted file mode 100644
index 1f4f6ee..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll4_n1_java.sk and 
/dev/null differ
diff --git a/tests/serialization_test_data/java_generated_files/hll6_n0_java.sk 
b/tests/serialization_test_data/java_generated_files/hll6_n0_java.sk
deleted file mode 100644
index c31bd26..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll6_n0_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll6_n1000000_java.sk 
b/tests/serialization_test_data/java_generated_files/hll6_n1000000_java.sk
deleted file mode 100644
index 42462d2..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll6_n1000000_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll6_n100000_java.sk 
b/tests/serialization_test_data/java_generated_files/hll6_n100000_java.sk
deleted file mode 100644
index 767f24f..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll6_n100000_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll6_n10000_java.sk 
b/tests/serialization_test_data/java_generated_files/hll6_n10000_java.sk
deleted file mode 100644
index 258da08..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll6_n10000_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll6_n1000_java.sk 
b/tests/serialization_test_data/java_generated_files/hll6_n1000_java.sk
deleted file mode 100644
index 274e8b7..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll6_n1000_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll6_n100_java.sk 
b/tests/serialization_test_data/java_generated_files/hll6_n100_java.sk
deleted file mode 100644
index 71fc293..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll6_n100_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll6_n10_java.sk 
b/tests/serialization_test_data/java_generated_files/hll6_n10_java.sk
deleted file mode 100644
index 2a22d91..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll6_n10_java.sk and 
/dev/null differ
diff --git a/tests/serialization_test_data/java_generated_files/hll6_n1_java.sk 
b/tests/serialization_test_data/java_generated_files/hll6_n1_java.sk
deleted file mode 100644
index 3f2f545..0000000
--- a/tests/serialization_test_data/java_generated_files/hll6_n1_java.sk
+++ /dev/null
@@ -1 +0,0 @@
-���
\ No newline at end of file
diff --git a/tests/serialization_test_data/java_generated_files/hll8_n0_java.sk 
b/tests/serialization_test_data/java_generated_files/hll8_n0_java.sk
deleted file mode 100644
index 5ecf871..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll8_n0_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll8_n1000000_java.sk 
b/tests/serialization_test_data/java_generated_files/hll8_n1000000_java.sk
deleted file mode 100644
index e7f5f48..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll8_n1000000_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll8_n100000_java.sk 
b/tests/serialization_test_data/java_generated_files/hll8_n100000_java.sk
deleted file mode 100644
index c634e49..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll8_n100000_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll8_n10000_java.sk 
b/tests/serialization_test_data/java_generated_files/hll8_n10000_java.sk
deleted file mode 100644
index f8dfdb0..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll8_n10000_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll8_n1000_java.sk 
b/tests/serialization_test_data/java_generated_files/hll8_n1000_java.sk
deleted file mode 100644
index 761477d..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll8_n1000_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll8_n100_java.sk 
b/tests/serialization_test_data/java_generated_files/hll8_n100_java.sk
deleted file mode 100644
index c2bfa48..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll8_n100_java.sk and 
/dev/null differ
diff --git 
a/tests/serialization_test_data/java_generated_files/hll8_n10_java.sk 
b/tests/serialization_test_data/java_generated_files/hll8_n10_java.sk
deleted file mode 100644
index 86277b3..0000000
Binary files 
a/tests/serialization_test_data/java_generated_files/hll8_n10_java.sk and 
/dev/null differ
diff --git a/tests/serialization_test_data/java_generated_files/hll8_n1_java.sk 
b/tests/serialization_test_data/java_generated_files/hll8_n1_java.sk
deleted file mode 100644
index fd3a973..0000000
--- a/tests/serialization_test_data/java_generated_files/hll8_n1_java.sk
+++ /dev/null
@@ -1 +0,0 @@
-���
\ No newline at end of file
diff --git a/tools/generate_serialization_test_data.py 
b/tools/generate_serialization_test_data.py
new file mode 100755
index 0000000..512c02f
--- /dev/null
+++ b/tools/generate_serialization_test_data.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import subprocess
+import sys
+import shutil
+import argparse
+from pathlib import Path
+
+def check_command_installed(command):
+    """Checks if a command is available in the system path."""
+    if shutil.which(command) is None:
+        print(f"Error: '{command}' is not installed or not in PATH.")
+        sys.exit(1)
+
+
+def run_command(command, cwd=None, shell=False):
+    """Runs a shell command, streaming output to stdout/stderr."""
+    cmd_str = ' '.join(command) if isinstance(command, list) else command
+    print(f"Running: {cmd_str}")
+    sys.stdout.flush() # Ensure 'Running' message appears before command output
+    try:
+        # Don't capture output; let it stream to sys.stdout/sys.stderr
+        subprocess.check_call(command, cwd=cwd, stderr=subprocess.STDOUT, 
shell=shell)
+    except subprocess.CalledProcessError as e:
+        print(f"Error running command: {e}")
+        print("--- OUTPUT ---")
+        print(e.stdout)
+        print("--- END OUTPUT ---")
+        sys.exit(1)
+
+
+def generate_java_files(project_root):
+    print("--- Generating Java Test Data ---")
+
+    # 1. Check prerequisites
+    check_command_installed("git")
+    check_command_installed("java")
+    mvn_cmd_name = "mvn"
+    if os.name == 'nt':
+        mvn_cmd_name = "mvn.cmd"
+    check_command_installed(mvn_cmd_name)
+
+    # 2. Define paths
+    temp_dir = project_root / "tmp_datasketches_java"
+    output_dir = project_root / "tests" / "serialization_test_data" / 
"java_generated_files"
+
+    # 3. Setup temporary directory
+    if temp_dir.exists():
+        print(f"Removing existing temporary directory: {temp_dir}")
+        shutil.rmtree(temp_dir)
+
+    temp_dir.mkdir()
+
+    # 4. Clone repository
+    repo_url = "https://github.com/apache/datasketches-java.git";
+    run_command(["git", "clone", repo_url, str(temp_dir)])
+
+    # 5. Run Maven to generate files
+    mvn_cmd = ["mvn", "test", "-P", "generate-java-files"]
+    use_shell = False
+    if os.name == 'nt': # Windows
+        mvn_cmd[0] = "mvn.cmd"
+        use_shell = True
+
+    run_command(mvn_cmd, cwd=temp_dir, shell=use_shell)
+
+    # 6. Copy generated files
+    generated_files_dir = temp_dir / "serialization_test_data" / 
"java_generated_files"
+
+    if not generated_files_dir.exists():
+        print(f"Error: Expected generated files directory not found at 
{generated_files_dir}")
+        sys.exit(1)
+
+    print(f"Copying files from {generated_files_dir} to {output_dir}")
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    files_copied = 0
+    for file_path in generated_files_dir.glob("*.sk"):
+        shutil.copy2(file_path, output_dir)
+        print(f"Copied: {file_path.name}")
+        files_copied += 1
+
+    if files_copied == 0:
+        print("Warning: No .sk files were found to copy.")
+    else:
+        print(f"Successfully copied {files_copied} files.")
+
+
+def generate_cpp_files(project_root):
+    print("--- Generating C++ Test Data ---")
+
+    # 1. Check prerequisites
+    check_command_installed("git")
+    check_command_installed("cmake")
+    check_command_installed("ctest")
+
+    # 2. Define paths
+    temp_dir = project_root / "tmp_datasketches_cpp"
+    output_dir = project_root / "tests" / "serialization_test_data" / 
"cpp_generated_files"
+
+    # 3. Setup temporary directory
+    if temp_dir.exists():
+        print(f"Removing existing temporary directory: {temp_dir}")
+        shutil.rmtree(temp_dir)
+
+    temp_dir.mkdir()
+
+    # 4. Clone repository
+    repo_url = "https://github.com/apache/datasketches-cpp.git";
+    run_command(["git", "clone", repo_url, str(temp_dir)])
+
+    # 5. Build and Run CMake
+    build_dir = temp_dir / "build"
+    build_dir.mkdir(exist_ok=True)
+
+    # Configure: Add CMAKE_BUILD_TYPE for single-config generators (Ninja/Make)
+    run_command(["cmake", "..", "-DGENERATE=true", 
"-DCMAKE_BUILD_TYPE=Release"], cwd=build_dir)
+
+    # Build: Release config
+    run_command(["cmake", "--build", ".", "--config", "Release"], 
cwd=build_dir)
+
+    # Test: Use ctest which is more portable than 'cmake --target test' (VS 
uses RUN_TESTS)
+    # --output-on-failure helps debug if a specific test fails
+    run_command(["ctest", "-C", "Release", "--output-on-failure"], 
cwd=build_dir)
+
+    # 6. Copy generated files
+    # The instructions say: cp datasketches-cpp/build/*/test/*_cpp.sk 
serialization_test_data/cpp_generated_files
+    # We need to find where they are exactly.
+    # It seems they might be in build/test/ or subdirectories depending on 
generator.
+
+    print(f"Copying files to {output_dir}")
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    files_copied = 0
+    # Search recursively in build directory for *_cpp.sk
+    for file_path in build_dir.rglob("*_cpp.sk"):
+         # Avoid copying from CMakeFiles or other intermediate dirs if 
possible, but the pattern is specific enough
+        shutil.copy2(file_path, output_dir)
+        print(f"Copied: {file_path.name}")
+        files_copied += 1
+
+    if files_copied == 0:
+        print("Warning: No *_cpp.sk files were found to copy.")
+    else:
+        print(f"Successfully copied {files_copied} files.")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate serialization test 
data for Java and/or C++.")
+    parser.add_argument("--java", action="store_true", help="Generate Java 
test data")
+    parser.add_argument("--cpp", action="store_true", help="Generate C++ test 
data")
+    parser.add_argument("--all", action="store_true", help="Generate both Java 
and C++ test data")
+
+    args = parser.parse_args()
+
+    # Default to all if no arguments provided
+    if not args.java and not args.cpp and not args.all:
+        args.all = True
+
+    script_dir = Path(__file__).resolve().parent
+    project_root = script_dir.parent
+
+    if args.java or args.all:
+        generate_java_files(project_root)
+
+    if args.cpp or args.all:
+        generate_cpp_files(project_root)
+
+if __name__ == "__main__":
+    main()


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to