This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 5913c424f9 [SYSTEMDS-3929] Additional tests for existing HDF5 readers
5913c424f9 is described below

commit 5913c424f950de5cd574ac97eeaffd7527f366bd
Author: Lucca Di Benedetto <[email protected]>
AuthorDate: Fri Jan 16 12:56:51 2026 +0100

    [SYSTEMDS-3929] Additional tests for existing HDF5 readers
    
    Closes #2369.
---
 .../sysds/test/functions/io/hdf5/ReadHDF5Test.java | 133 +++++++++--
 .../test/functions/io/hdf5/ReadHDF5Test1.java      |  38 ----
 .../test/functions/io/hdf5/ReadHDF5Test2.java      |  38 ----
 .../test/functions/io/hdf5/ReadHDF5Test3.java      |  38 ----
 .../{ReadHDF5Test_3.dml => ReadHDF5_Default.dml}   |   0
 .../scripts/functions/io/hdf5/ReadHDF5_Verify.R    |  18 +-
 ...ReadHDF5Test_2.dml => ReadHDF5_WithDataset.dml} |   0
 ...est_1.dml => ReadHDF5_WithFormatAndDataset.dml} |   0
 .../scripts/functions/io/hdf5/gen_HDF5_testdata.R  | 247 +++++++++++++++++++++
 9 files changed, 379 insertions(+), 133 deletions(-)

diff --git 
a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test.java 
b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test.java
index c20294cd85..eb29ef1523 100644
--- a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test.java
+++ b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test.java
@@ -19,7 +19,18 @@
 
 package org.apache.sysds.test.functions.io.hdf5;
 
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.io.File;
+import org.apache.commons.io.FileUtils;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
 
 import org.apache.sysds.api.DMLScript;
 import org.apache.sysds.common.Types.ExecMode;
@@ -27,30 +38,54 @@ import org.apache.sysds.conf.CompilerConfig;
 import org.apache.sysds.runtime.matrix.data.MatrixValue;
 import org.apache.sysds.test.TestConfiguration;
 import org.apache.sysds.test.TestUtils;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
 import org.junit.Test;
 
-public abstract class ReadHDF5Test extends ReadHDF5TestBase {
+@Ignore
+public class ReadHDF5Test extends ReadHDF5TestBase {
 
-       protected abstract int getId();
+       private static final double eps = 1e-9;
+       private static final String TEST_NAME = "ReadHDF5Test";
 
-       protected String getInputHDF5FileName() {
-               return "transfusion_" + getId() + ".h5";
+       private static final List<Hdf5TestCase> TEST_CASES = 
Collections.unmodifiableList(
+               Arrays.asList(new Hdf5TestCase("test_single_dataset.h5", 
"data", DmlVariant.FORMAT_AND_DATASET),
+                       new Hdf5TestCase("test_multiple_datasets.h5", 
"matrix_2d", DmlVariant.DATASET_ONLY),
+                       new Hdf5TestCase("test_multiple_datasets.h5", 
"matrix_3d", DmlVariant.DATASET_ONLY),
+                       new Hdf5TestCase("test_multi_tensor_samples.h5", 
"label", DmlVariant.DATASET_ONLY),
+                       new Hdf5TestCase("test_multi_tensor_samples.h5", 
"sen1", DmlVariant.DATASET_ONLY),
+                       new Hdf5TestCase("test_nested_groups.h5", 
"group1/subgroup/data2", DmlVariant.FORMAT_AND_DATASET)));
+
+       @Override
+       protected String getTestName() {
+               return TEST_NAME;
        }
 
-       private final static double eps = 1e-9;
+       @Override
+       protected String getTestClassDir() {
+               return TEST_CLASS_DIR;
+       }
 
-       @Test
-       public void testHDF51_Seq_CP() {
-               runReadHDF5Test(getId(), ExecMode.SINGLE_NODE, false);
+       @BeforeClass
+       public static void setUpClass() {
+               Path scriptDir = Paths.get(SCRIPT_DIR + TEST_DIR);
+               generateHdf5Data(scriptDir);
        }
 
        @Test
-       public void testHDF51_Parallel_CP() {
-               runReadHDF5Test(getId(), ExecMode.SINGLE_NODE, true);
+       public void testReadSequential() {
+               for(Hdf5TestCase tc : TEST_CASES)
+                       runReadHDF5Test(tc, ExecMode.SINGLE_NODE, false);
        }
 
-       protected void runReadHDF5Test(int testNumber, ExecMode platform, 
boolean parallel) {
+       @Test
+       public void testReadSequentialParallelIO() {
+               for(Hdf5TestCase tc : TEST_CASES)
+                       runReadHDF5Test(tc, ExecMode.SINGLE_NODE, true);
+       }
 
+       protected void runReadHDF5Test(Hdf5TestCase testCase, ExecMode 
platform, boolean parallel) {
                ExecMode oldPlatform = rtplatform;
                rtplatform = platform;
 
@@ -61,21 +96,28 @@ public abstract class ReadHDF5Test extends ReadHDF5TestBase 
{
                boolean oldpar = CompilerConfig.FLAG_PARREADWRITE_TEXT;
 
                try {
-
                        CompilerConfig.FLAG_PARREADWRITE_TEXT = parallel;
 
                        TestConfiguration config = 
getTestConfiguration(getTestName());
                        loadTestConfiguration(config);
 
                        String HOME = SCRIPT_DIR + TEST_DIR;
-                       String inputMatrixName = HOME + INPUT_DIR + 
getInputHDF5FileName(); // always read the same data
-                       String datasetName = "DATASET_1";
+                       String inputMatrixName = HOME + INPUT_DIR + 
testCase.hdf5File;
+
+                       fullDMLScriptName = HOME + 
testCase.variant.getScriptName();
+                       programArgs = new String[] {"-args", inputMatrixName, 
testCase.dataset, output("Y")};
 
-                       fullDMLScriptName = HOME + getTestName() + "_" + 
testNumber + ".dml";
-                       programArgs = new String[] {"-args", inputMatrixName, 
datasetName, output("Y")};
+                       // Clean per-case output/expected to avoid reusing 
stale metadata between looped cases
+                       String outY = output("Y");
+                       String expY = expected("Y");
+                       FileUtils.deleteQuietly(new File(outY));
+                       FileUtils.deleteQuietly(new File(outY + ".mtd"));
+                       FileUtils.deleteQuietly(new File(expY));
+                       FileUtils.deleteQuietly(new File(expY + ".mtd"));
 
                        fullRScriptName = HOME + "ReadHDF5_Verify.R";
-                       rCmd = "Rscript" + " " + fullRScriptName + " " + 
inputMatrixName + " " + datasetName + " " + expectedDir();
+                       rCmd = "Rscript" + " " + fullRScriptName + " " + 
inputMatrixName + " " + testCase.dataset + " "
+                               + expectedDir();
 
                        runTest(true, false, null, -1);
                        runRScript(true);
@@ -90,4 +132,61 @@ public abstract class ReadHDF5Test extends ReadHDF5TestBase 
{
                        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
                }
        }
+
+       private static void generateHdf5Data(Path scriptDir) {
+               ProcessBuilder processBuilder = new ProcessBuilder("Rscript", 
"gen_HDF5_testdata.R");
+               processBuilder.directory(scriptDir.toFile());
+               processBuilder.redirectErrorStream(true);
+
+               try {
+                       Process process = processBuilder.start();
+                       StringBuilder output = new StringBuilder();
+                       try(BufferedReader reader = new BufferedReader(
+                               new InputStreamReader(process.getInputStream(), 
StandardCharsets.UTF_8))) {
+                               reader.lines().forEach(line -> 
output.append(line).append(System.lineSeparator()));
+                       }
+                       int exitCode = process.waitFor();
+                       if(exitCode != 0)
+                               Assert.fail("Failed to execute 
gen_HDF5_testdata.R (exit " + exitCode + "):\n" + output);
+               }
+               catch(IOException e) {
+                       Assert.fail("Unable to execute gen_HDF5_testdata.R: " + 
e.getMessage());
+               }
+               catch(InterruptedException e) {
+                       Thread.currentThread().interrupt();
+                       Assert.fail("Interrupted while generating HDF5 test 
data.");
+               }
+       }
+
+       private enum DmlVariant {
+               FORMAT_AND_DATASET("ReadHDF5_WithFormatAndDataset.dml"), 
DATASET_ONLY("ReadHDF5_WithDataset.dml"),
+               DEFAULT("ReadHDF5_Default.dml");
+
+               private final String scriptName;
+
+               DmlVariant(String scriptName) {
+                       this.scriptName = scriptName;
+               }
+
+               public String getScriptName() {
+                       return scriptName;
+               }
+       }
+
+       private static final class Hdf5TestCase {
+               private final String hdf5File;
+               private final String dataset;
+               private final DmlVariant variant;
+
+               private Hdf5TestCase(String hdf5File, String dataset, 
DmlVariant variant) {
+                       this.hdf5File = hdf5File;
+                       this.dataset = dataset;
+                       this.variant = variant;
+               }
+
+               @Override
+               public String toString() {
+                       return hdf5File + "::" + dataset;
+               }
+       }
 }
diff --git 
a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test1.java 
b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test1.java
deleted file mode 100644
index b0fff7a639..0000000000
--- a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test1.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysds.test.functions.io.hdf5;
-
-public class ReadHDF5Test1 extends ReadHDF5Test {
-
-       private final static String TEST_NAME = "ReadHDF5Test";
-       public final static String TEST_CLASS_DIR = TEST_DIR + 
ReadHDF5Test1.class.getSimpleName() + "/";
-
-       protected String getTestName() {
-               return TEST_NAME;
-       }
-
-       protected String getTestClassDir() {
-               return TEST_CLASS_DIR;
-       }
-
-       protected int getId() {
-               return 1;
-       }
-}
diff --git 
a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test2.java 
b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test2.java
deleted file mode 100644
index d6a4c763c3..0000000000
--- a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test2.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysds.test.functions.io.hdf5;
-
-public class ReadHDF5Test2 extends ReadHDF5Test {
-
-       private final static String TEST_NAME = "ReadHDF5Test";
-       private final static String TEST_CLASS_DIR = TEST_DIR + 
ReadHDF5Test2.class.getSimpleName() + "/";
-
-       protected String getTestName() {
-               return TEST_NAME;
-       }
-
-       protected String getTestClassDir() {
-               return TEST_CLASS_DIR;
-       }
-
-       protected int getId() {
-               return 2;
-       }
-}
diff --git 
a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test3.java 
b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test3.java
deleted file mode 100644
index 71a6b1762e..0000000000
--- a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test3.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysds.test.functions.io.hdf5;
-
-public class ReadHDF5Test3 extends ReadHDF5Test {
-
-       private final static String TEST_NAME = "ReadHDF5Test";
-       private final static String TEST_CLASS_DIR = TEST_DIR + 
ReadHDF5Test3.class.getSimpleName() + "/";
-
-       protected String getTestName() {
-               return TEST_NAME;
-       }
-
-       protected String getTestClassDir() {
-               return TEST_CLASS_DIR;
-       }
-
-       protected int getId() {
-               return 3;
-       }
-}
diff --git a/src/test/scripts/functions/io/hdf5/ReadHDF5Test_3.dml 
b/src/test/scripts/functions/io/hdf5/ReadHDF5_Default.dml
similarity index 100%
rename from src/test/scripts/functions/io/hdf5/ReadHDF5Test_3.dml
rename to src/test/scripts/functions/io/hdf5/ReadHDF5_Default.dml
diff --git a/src/test/scripts/functions/io/hdf5/ReadHDF5_Verify.R 
b/src/test/scripts/functions/io/hdf5/ReadHDF5_Verify.R
index 2b977007dd..925e092f72 100644
--- a/src/test/scripts/functions/io/hdf5/ReadHDF5_Verify.R
+++ b/src/test/scripts/functions/io/hdf5/ReadHDF5_Verify.R
@@ -26,5 +26,19 @@ options(digits=22)
 
 library("rhdf5")
 
-Y = h5read(args[1],args[2],native = TRUE)
-writeMM(as(Y, "CsparseMatrix"), paste(args[3], "Y", sep=""))
+Y = h5read(args[1], args[2], native = TRUE)
+dims = dim(Y)
+
+if(length(dims) == 1) {
+  # convert to a column matrix
+  Y_mat = matrix(Y, ncol = 1)
+} else if(length(dims) > 2) {
+  # flatten everything beyond the first dimension into columns
+  perm = c(1, rev(seq(2, length(dims))))
+  Y_mat = matrix(aperm(Y, perm), nrow = dims[1], ncol = prod(dims[-1]))
+} else {
+  # for 2d , systemds treats it the same
+  Y_mat = Y
+}
+
+writeMM(as(Y_mat, "CsparseMatrix"), paste(args[3], "Y", sep=""))
diff --git a/src/test/scripts/functions/io/hdf5/ReadHDF5Test_2.dml 
b/src/test/scripts/functions/io/hdf5/ReadHDF5_WithDataset.dml
similarity index 100%
rename from src/test/scripts/functions/io/hdf5/ReadHDF5Test_2.dml
rename to src/test/scripts/functions/io/hdf5/ReadHDF5_WithDataset.dml
diff --git a/src/test/scripts/functions/io/hdf5/ReadHDF5Test_1.dml 
b/src/test/scripts/functions/io/hdf5/ReadHDF5_WithFormatAndDataset.dml
similarity index 100%
rename from src/test/scripts/functions/io/hdf5/ReadHDF5Test_1.dml
rename to src/test/scripts/functions/io/hdf5/ReadHDF5_WithFormatAndDataset.dml
diff --git a/src/test/scripts/functions/io/hdf5/gen_HDF5_testdata.R 
b/src/test/scripts/functions/io/hdf5/gen_HDF5_testdata.R
new file mode 100644
index 0000000000..fb9fed140a
--- /dev/null
+++ b/src/test/scripts/functions/io/hdf5/gen_HDF5_testdata.R
@@ -0,0 +1,247 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+# Generate various HDF5 test files with different formats.
+# Creates test files in the 'in' directory.
+
+if (!require("rhdf5", quietly = TRUE)) {
+  cat("Error: rhdf5 is not installed.\n")
+  quit(status = 1)
+}
+
+SMALL_MATRIX_2D <- c(200, 40)
+SMALL_MATRIX_3D <- c(15, 15, 5)
+SMALL_TENSOR_4D_A <- c(120, 16, 16, 4)
+SMALL_TENSOR_4D_B <- c(120, 16, 16, 5)
+SMALL_LABEL_MATRIX <- c(120, 12)
+
+VECTOR_LENGTH <- 200
+STRING_ARRAY_LENGTH <- 30
+
+CHUNK_SHAPE <- c(100, 20)
+
+write_matrix <- function(file_path, dataset_name, shape, generator = 
function(n) rnorm(n), storage.mode = "double", H5type = NULL) {
+  values <- generator(prod(shape))
+  h5createDataset(
+    file_path,
+    dataset_name,
+    dims = rev(shape),
+    chunk = NULL,
+    filter = "NONE", # contiguous, uncompressed layout
+    level = 0,
+    shuffle = FALSE,
+    storage.mode = storage.mode,
+    H5type = H5type,
+    native = TRUE # use R column-major order, same in h5read(..., native=TRUE) 
in tests.
+  )
+  h5write(array(values, dim = shape), file_path, dataset_name, native = TRUE)
+}
+
+generate_test_file_single_dataset <- function(dir) {
+  file_path <- file.path(dir, "test_single_dataset.h5")
+  h5createFile(file_path)
+  write_matrix(file_path, "data", SMALL_MATRIX_2D)
+  cat("Created test_single_dataset.h5 (single 2D dataset)\n")
+}
+
+generate_test_file_multiple_datasets <- function(dir) {
+  file_path <- file.path(dir, "test_multiple_datasets.h5")
+  h5createFile(file_path)
+  write_matrix(file_path, "matrix_2d", SMALL_MATRIX_2D)
+  # Create 1D vector without compression/filters
+  h5createDataset(file_path, "vector_1d", dims = VECTOR_LENGTH, chunk = NULL, 
filter = "NONE", level = 0, shuffle = FALSE)
+  h5write(rnorm(VECTOR_LENGTH), file_path, "vector_1d", native = TRUE)
+  write_matrix(file_path, "matrix_3d", SMALL_MATRIX_3D)
+  cat("Created test_multiple_datasets.h5 (1D/2D/3D datasets)\n")
+}
+
+generate_test_file_different_dtypes <- function(dir) {
+  file_path <- file.path(dir, "test_different_dtypes.h5")
+  h5createFile(file_path)
+  # H5T_IEEE_F64LE (64-bit float)
+  write_matrix(file_path, "double_primary", SMALL_MATRIX_2D, storage.mode = 
"double")
+  # H5T_IEEE_F32LE (32-bit float)
+  write_matrix(file_path, "float32", SMALL_MATRIX_2D, H5type = 
"H5T_IEEE_F32LE")
+  # H5T_STD_I32LE (32-bit integer)
+  write_matrix(
+    file_path,
+    "int32",
+    SMALL_MATRIX_2D,
+    generator = function(n) as.integer(sample(-100:100, n, replace = TRUE)),
+    storage.mode = "integer"
+  )
+  # H5T_STD_I64LE (64-bit integer)
+  write_matrix(
+    file_path,
+    "int64",
+    SMALL_MATRIX_2D,
+    generator = function(n) as.integer(sample(-100:100, n, replace = TRUE)),
+    H5type = "H5T_STD_I64LE"
+  )
+  cat("Created test_different_dtypes.h5 (double/float/int32/int64 datasets)\n")
+}
+
+# 
https://support.hdfgroup.org/documentation/hdf5-docs/advanced_topics/chunking_in_hdf5.html
+generate_test_file_chunked <- function(dir) {
+  file_path <- file.path(dir, "test_chunked.h5")
+  h5createFile(file_path)
+
+  data <- array(rnorm(prod(SMALL_MATRIX_2D)), dim = SMALL_MATRIX_2D)
+
+  h5createDataset(file_path, "chunked_data", dims = SMALL_MATRIX_2D, chunk = 
CHUNK_SHAPE,
+                  filter = "NONE", level = 0, shuffle = FALSE)
+  h5write(data, file_path, "chunked_data", native = TRUE)
+
+  write_matrix(file_path, "non_chunked_data", SMALL_MATRIX_2D)
+  cat("Created test_chunked.h5 (chunked dataset)\n")
+}
+
+generate_test_file_compressed <- function(dir) {
+  file_path <- file.path(dir, "test_compressed.h5")
+  h5createFile(file_path)
+  data <- array(rnorm(prod(SMALL_MATRIX_2D)), dim = SMALL_MATRIX_2D)
+  h5createDataset(file_path, "gzip_compressed_9", dims = SMALL_MATRIX_2D,
+                  chunk = SMALL_MATRIX_2D, level = 9)
+  h5write(data, file_path, "gzip_compressed_9", native = TRUE)
+  h5createDataset(file_path, "gzip_compressed_1", dims = SMALL_MATRIX_2D,
+                  chunk = SMALL_MATRIX_2D, level = 1)
+  h5write(data, file_path, "gzip_compressed_1", native = TRUE)
+  cat("Created test_compressed.h5 (gzip compression)\n")
+}
+
+generate_test_file_multi_tensor_samples <- function(dir) {
+  file_path <- file.path(dir, "test_multi_tensor_samples.h5")
+  h5createFile(file_path)
+  write_matrix(
+    file_path,
+    "sen1",
+    SMALL_TENSOR_4D_A
+  )
+  write_matrix(
+    file_path,
+    "sen2",
+    SMALL_TENSOR_4D_B
+  )
+  write_matrix(
+    file_path,
+    "label",
+    SMALL_LABEL_MATRIX,
+    generator = function(n) as.integer(sample(0:1, n, replace = TRUE))
+  )
+  cat("Created test_multi_tensor_samples.h5 (multi-input tensors)\n")
+}
+
+generate_test_file_nested_groups <- function(dir) {
+  file_path <- file.path(dir, "test_nested_groups.h5")
+  h5createFile(file_path)
+  write_matrix(file_path, "root_data", SMALL_MATRIX_2D)
+  h5createGroup(file_path, "group1")
+  write_matrix(file_path, "group1/data1", SMALL_MATRIX_2D)
+  h5createGroup(file_path, "group1/subgroup")
+  write_matrix(file_path, "group1/subgroup/data2", SMALL_MATRIX_2D)
+  cat("Created test_nested_groups.h5 (nested group hierarchy)\n")
+}
+
+generate_test_file_with_attributes <- function(dir) {
+  file_path <- file.path(dir, "test_with_attributes.h5")
+  h5createFile(file_path)
+  write_matrix(file_path, "data", SMALL_MATRIX_2D)
+
+  fid <- H5Fopen(file_path)
+  did <- H5Dopen(fid, "data")
+  h5writeAttribute("Test dataset with attributes", did, "description")
+  h5writeAttribute(1.0, did, "version")
+  h5writeAttribute(SMALL_MATRIX_2D, did, "shape")
+  H5Dclose(did)
+
+  h5writeAttribute("2025-11-26", fid, "file_created")
+  h5writeAttribute("attributes", fid, "test_type")
+  H5Fclose(fid)
+  cat("Created test_with_attributes.h5 (dataset + file attributes)\n")
+}
+
+generate_test_file_empty_datasets <- function(dir) {
+  file_path <- file.path(dir, "test_empty_datasets.h5")
+  h5createFile(file_path)
+  h5createDataset(file_path, "empty", dims = c(0, SMALL_MATRIX_2D[2]),
+                  filter = "NONE", level = 0, shuffle = FALSE)
+
+  h5createDataset(file_path, "scalar", dims = 1,
+                  filter = "NONE", level = 0, shuffle = FALSE, chunk = 1)
+  h5write(1.0, file_path, "scalar", native = TRUE)
+  h5createDataset(file_path, "vector", dims = VECTOR_LENGTH,
+                  filter = "NONE", level = 0, shuffle = FALSE, chunk = 
VECTOR_LENGTH)
+  h5write(rnorm(VECTOR_LENGTH), file_path, "vector", native = TRUE)
+  cat("Created test_empty_datasets.h5 (empty/scalar/vector)\n")
+}
+
+generate_test_file_string_datasets <- function(dir) {
+  file_path <- file.path(dir, "test_string_datasets.h5")
+  h5createFile(file_path)
+  strings <- paste0("string_", 0:(STRING_ARRAY_LENGTH - 1))
+  # Create string dataset without compression/filters
+  h5createDataset(file_path, "string_array", dims = STRING_ARRAY_LENGTH,
+                  storage.mode = "character", filter = "NONE", level = 0,
+                  shuffle = FALSE, chunk = STRING_ARRAY_LENGTH)
+  h5write(strings, file_path, "string_array", native = TRUE)
+  cat("Created test_string_datasets.h5 (string datasets)\n")
+}
+
+main <- function() {
+  if (basename(getwd()) != "hdf5") {
+    cat("You must execute this script from the 'hdf5' directory\n")
+    quit(status = 1)
+  }
+
+  testdir <- "in"
+  if (!dir.exists(testdir)) {
+    dir.create(testdir)
+  }
+
+  test_functions <- list(
+    generate_test_file_single_dataset,
+    generate_test_file_multiple_datasets,
+    generate_test_file_different_dtypes,
+    generate_test_file_chunked,
+    generate_test_file_compressed,
+    generate_test_file_multi_tensor_samples,
+    generate_test_file_nested_groups,
+    generate_test_file_with_attributes,
+    generate_test_file_empty_datasets,
+    generate_test_file_string_datasets
+  )
+
+  for (test_func in test_functions) {
+    tryCatch({
+      test_func(testdir)
+    }, error = function(e) {
+      cat(sprintf("  ✗ Error: %s\n", conditionMessage(e)))
+    })
+  }
+
+  files <- sort(list.files(testdir, pattern = "\\.h5$", full.names = TRUE))
+  cat(sprintf("\nGenerated %d HDF5 test files in %s\n", length(files), 
normalizePath(testdir)))
+}
+
+if (!interactive()) {
+  main()
+}

Reply via email to