This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 5913c424f9 [SYSTEMDS-3929] Additional tests for existing HDF5 readers
5913c424f9 is described below
commit 5913c424f950de5cd574ac97eeaffd7527f366bd
Author: Lucca Di Benedetto <[email protected]>
AuthorDate: Fri Jan 16 12:56:51 2026 +0100
[SYSTEMDS-3929] Additional tests for existing HDF5 readers
Closes #2369.
---
.../sysds/test/functions/io/hdf5/ReadHDF5Test.java | 133 +++++++++--
.../test/functions/io/hdf5/ReadHDF5Test1.java | 38 ----
.../test/functions/io/hdf5/ReadHDF5Test2.java | 38 ----
.../test/functions/io/hdf5/ReadHDF5Test3.java | 38 ----
.../{ReadHDF5Test_3.dml => ReadHDF5_Default.dml} | 0
.../scripts/functions/io/hdf5/ReadHDF5_Verify.R | 18 +-
...ReadHDF5Test_2.dml => ReadHDF5_WithDataset.dml} | 0
...est_1.dml => ReadHDF5_WithFormatAndDataset.dml} | 0
.../scripts/functions/io/hdf5/gen_HDF5_testdata.R | 247 +++++++++++++++++++++
9 files changed, 379 insertions(+), 133 deletions(-)
diff --git
a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test.java
b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test.java
index c20294cd85..eb29ef1523 100644
--- a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test.java
+++ b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test.java
@@ -19,7 +19,18 @@
package org.apache.sysds.test.functions.io.hdf5;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.io.File;
+import org.apache.commons.io.FileUtils;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
+import java.util.List;
import org.apache.sysds.api.DMLScript;
import org.apache.sysds.common.Types.ExecMode;
@@ -27,30 +38,54 @@ import org.apache.sysds.conf.CompilerConfig;
import org.apache.sysds.runtime.matrix.data.MatrixValue;
import org.apache.sysds.test.TestConfiguration;
import org.apache.sysds.test.TestUtils;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
import org.junit.Test;
-public abstract class ReadHDF5Test extends ReadHDF5TestBase {
+@Ignore
+public class ReadHDF5Test extends ReadHDF5TestBase {
- protected abstract int getId();
+ private static final double eps = 1e-9;
+ private static final String TEST_NAME = "ReadHDF5Test";
- protected String getInputHDF5FileName() {
- return "transfusion_" + getId() + ".h5";
+ private static final List<Hdf5TestCase> TEST_CASES =
Collections.unmodifiableList(
+ Arrays.asList(new Hdf5TestCase("test_single_dataset.h5",
"data", DmlVariant.FORMAT_AND_DATASET),
+ new Hdf5TestCase("test_multiple_datasets.h5",
"matrix_2d", DmlVariant.DATASET_ONLY),
+ new Hdf5TestCase("test_multiple_datasets.h5",
"matrix_3d", DmlVariant.DATASET_ONLY),
+ new Hdf5TestCase("test_multi_tensor_samples.h5",
"label", DmlVariant.DATASET_ONLY),
+ new Hdf5TestCase("test_multi_tensor_samples.h5",
"sen1", DmlVariant.DATASET_ONLY),
+ new Hdf5TestCase("test_nested_groups.h5",
"group1/subgroup/data2", DmlVariant.FORMAT_AND_DATASET)));
+
+ @Override
+ protected String getTestName() {
+ return TEST_NAME;
}
- private final static double eps = 1e-9;
+ @Override
+ protected String getTestClassDir() {
+ return TEST_CLASS_DIR;
+ }
- @Test
- public void testHDF51_Seq_CP() {
- runReadHDF5Test(getId(), ExecMode.SINGLE_NODE, false);
+ @BeforeClass
+ public static void setUpClass() {
+ Path scriptDir = Paths.get(SCRIPT_DIR + TEST_DIR);
+ generateHdf5Data(scriptDir);
}
@Test
- public void testHDF51_Parallel_CP() {
- runReadHDF5Test(getId(), ExecMode.SINGLE_NODE, true);
+ public void testReadSequential() {
+ for(Hdf5TestCase tc : TEST_CASES)
+ runReadHDF5Test(tc, ExecMode.SINGLE_NODE, false);
}
- protected void runReadHDF5Test(int testNumber, ExecMode platform,
boolean parallel) {
+ @Test
+ public void testReadSequentialParallelIO() {
+ for(Hdf5TestCase tc : TEST_CASES)
+ runReadHDF5Test(tc, ExecMode.SINGLE_NODE, true);
+ }
+ protected void runReadHDF5Test(Hdf5TestCase testCase, ExecMode
platform, boolean parallel) {
ExecMode oldPlatform = rtplatform;
rtplatform = platform;
@@ -61,21 +96,28 @@ public abstract class ReadHDF5Test extends ReadHDF5TestBase
{
boolean oldpar = CompilerConfig.FLAG_PARREADWRITE_TEXT;
try {
-
CompilerConfig.FLAG_PARREADWRITE_TEXT = parallel;
TestConfiguration config =
getTestConfiguration(getTestName());
loadTestConfiguration(config);
String HOME = SCRIPT_DIR + TEST_DIR;
- String inputMatrixName = HOME + INPUT_DIR +
getInputHDF5FileName(); // always read the same data
- String datasetName = "DATASET_1";
+ String inputMatrixName = HOME + INPUT_DIR +
testCase.hdf5File;
+
+ fullDMLScriptName = HOME +
testCase.variant.getScriptName();
+ programArgs = new String[] {"-args", inputMatrixName,
testCase.dataset, output("Y")};
- fullDMLScriptName = HOME + getTestName() + "_" +
testNumber + ".dml";
- programArgs = new String[] {"-args", inputMatrixName,
datasetName, output("Y")};
+ // Clean per-case output/expected to avoid reusing
stale metadata between looped cases
+ String outY = output("Y");
+ String expY = expected("Y");
+ FileUtils.deleteQuietly(new File(outY));
+ FileUtils.deleteQuietly(new File(outY + ".mtd"));
+ FileUtils.deleteQuietly(new File(expY));
+ FileUtils.deleteQuietly(new File(expY + ".mtd"));
fullRScriptName = HOME + "ReadHDF5_Verify.R";
- rCmd = "Rscript" + " " + fullRScriptName + " " +
inputMatrixName + " " + datasetName + " " + expectedDir();
+ rCmd = "Rscript" + " " + fullRScriptName + " " +
inputMatrixName + " " + testCase.dataset + " "
+ + expectedDir();
runTest(true, false, null, -1);
runRScript(true);
@@ -90,4 +132,61 @@ public abstract class ReadHDF5Test extends ReadHDF5TestBase
{
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}
+
+ private static void generateHdf5Data(Path scriptDir) {
+ ProcessBuilder processBuilder = new ProcessBuilder("Rscript",
"gen_HDF5_testdata.R");
+ processBuilder.directory(scriptDir.toFile());
+ processBuilder.redirectErrorStream(true);
+
+ try {
+ Process process = processBuilder.start();
+ StringBuilder output = new StringBuilder();
+ try(BufferedReader reader = new BufferedReader(
+ new InputStreamReader(process.getInputStream(),
StandardCharsets.UTF_8))) {
+ reader.lines().forEach(line ->
output.append(line).append(System.lineSeparator()));
+ }
+ int exitCode = process.waitFor();
+ if(exitCode != 0)
+ Assert.fail("Failed to execute
gen_HDF5_testdata.R (exit " + exitCode + "):\n" + output);
+ }
+ catch(IOException e) {
+ Assert.fail("Unable to execute gen_HDF5_testdata.R: " +
e.getMessage());
+ }
+ catch(InterruptedException e) {
+ Thread.currentThread().interrupt();
+ Assert.fail("Interrupted while generating HDF5 test
data.");
+ }
+ }
+
+ private enum DmlVariant {
+ FORMAT_AND_DATASET("ReadHDF5_WithFormatAndDataset.dml"),
DATASET_ONLY("ReadHDF5_WithDataset.dml"),
+ DEFAULT("ReadHDF5_Default.dml");
+
+ private final String scriptName;
+
+ DmlVariant(String scriptName) {
+ this.scriptName = scriptName;
+ }
+
+ public String getScriptName() {
+ return scriptName;
+ }
+ }
+
+ private static final class Hdf5TestCase {
+ private final String hdf5File;
+ private final String dataset;
+ private final DmlVariant variant;
+
+ private Hdf5TestCase(String hdf5File, String dataset,
DmlVariant variant) {
+ this.hdf5File = hdf5File;
+ this.dataset = dataset;
+ this.variant = variant;
+ }
+
+ @Override
+ public String toString() {
+ return hdf5File + "::" + dataset;
+ }
+ }
}
diff --git
a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test1.java
b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test1.java
deleted file mode 100644
index b0fff7a639..0000000000
--- a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test1.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysds.test.functions.io.hdf5;
-
-public class ReadHDF5Test1 extends ReadHDF5Test {
-
- private final static String TEST_NAME = "ReadHDF5Test";
- public final static String TEST_CLASS_DIR = TEST_DIR +
ReadHDF5Test1.class.getSimpleName() + "/";
-
- protected String getTestName() {
- return TEST_NAME;
- }
-
- protected String getTestClassDir() {
- return TEST_CLASS_DIR;
- }
-
- protected int getId() {
- return 1;
- }
-}
diff --git
a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test2.java
b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test2.java
deleted file mode 100644
index d6a4c763c3..0000000000
--- a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test2.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysds.test.functions.io.hdf5;
-
-public class ReadHDF5Test2 extends ReadHDF5Test {
-
- private final static String TEST_NAME = "ReadHDF5Test";
- private final static String TEST_CLASS_DIR = TEST_DIR +
ReadHDF5Test2.class.getSimpleName() + "/";
-
- protected String getTestName() {
- return TEST_NAME;
- }
-
- protected String getTestClassDir() {
- return TEST_CLASS_DIR;
- }
-
- protected int getId() {
- return 2;
- }
-}
diff --git
a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test3.java
b/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test3.java
deleted file mode 100644
index 71a6b1762e..0000000000
--- a/src/test/java/org/apache/sysds/test/functions/io/hdf5/ReadHDF5Test3.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysds.test.functions.io.hdf5;
-
-public class ReadHDF5Test3 extends ReadHDF5Test {
-
- private final static String TEST_NAME = "ReadHDF5Test";
- private final static String TEST_CLASS_DIR = TEST_DIR +
ReadHDF5Test3.class.getSimpleName() + "/";
-
- protected String getTestName() {
- return TEST_NAME;
- }
-
- protected String getTestClassDir() {
- return TEST_CLASS_DIR;
- }
-
- protected int getId() {
- return 3;
- }
-}
diff --git a/src/test/scripts/functions/io/hdf5/ReadHDF5Test_3.dml
b/src/test/scripts/functions/io/hdf5/ReadHDF5_Default.dml
similarity index 100%
rename from src/test/scripts/functions/io/hdf5/ReadHDF5Test_3.dml
rename to src/test/scripts/functions/io/hdf5/ReadHDF5_Default.dml
diff --git a/src/test/scripts/functions/io/hdf5/ReadHDF5_Verify.R
b/src/test/scripts/functions/io/hdf5/ReadHDF5_Verify.R
index 2b977007dd..925e092f72 100644
--- a/src/test/scripts/functions/io/hdf5/ReadHDF5_Verify.R
+++ b/src/test/scripts/functions/io/hdf5/ReadHDF5_Verify.R
@@ -26,5 +26,19 @@ options(digits=22)
library("rhdf5")
-Y = h5read(args[1],args[2],native = TRUE)
-writeMM(as(Y, "CsparseMatrix"), paste(args[3], "Y", sep=""))
+Y = h5read(args[1], args[2], native = TRUE)
+dims = dim(Y)
+
+if(length(dims) == 1) {
+ # convert to a column matrix
+ Y_mat = matrix(Y, ncol = 1)
+} else if(length(dims) > 2) {
+ # flatten everything beyond the first dimension into columns
+ perm = c(1, rev(seq(2, length(dims))))
+ Y_mat = matrix(aperm(Y, perm), nrow = dims[1], ncol = prod(dims[-1]))
+} else {
+ # for 2d , systemds treats it the same
+ Y_mat = Y
+}
+
+writeMM(as(Y_mat, "CsparseMatrix"), paste(args[3], "Y", sep=""))
diff --git a/src/test/scripts/functions/io/hdf5/ReadHDF5Test_2.dml
b/src/test/scripts/functions/io/hdf5/ReadHDF5_WithDataset.dml
similarity index 100%
rename from src/test/scripts/functions/io/hdf5/ReadHDF5Test_2.dml
rename to src/test/scripts/functions/io/hdf5/ReadHDF5_WithDataset.dml
diff --git a/src/test/scripts/functions/io/hdf5/ReadHDF5Test_1.dml
b/src/test/scripts/functions/io/hdf5/ReadHDF5_WithFormatAndDataset.dml
similarity index 100%
rename from src/test/scripts/functions/io/hdf5/ReadHDF5Test_1.dml
rename to src/test/scripts/functions/io/hdf5/ReadHDF5_WithFormatAndDataset.dml
diff --git a/src/test/scripts/functions/io/hdf5/gen_HDF5_testdata.R
b/src/test/scripts/functions/io/hdf5/gen_HDF5_testdata.R
new file mode 100644
index 0000000000..fb9fed140a
--- /dev/null
+++ b/src/test/scripts/functions/io/hdf5/gen_HDF5_testdata.R
@@ -0,0 +1,247 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+# Generate various HDF5 test files with different formats.
+# Creates test files in the 'in' directory.
+
+if (!require("rhdf5", quietly = TRUE)) {
+ cat("Error: rhdf5 is not installed.\n")
+ quit(status = 1)
+}
+
+SMALL_MATRIX_2D <- c(200, 40)
+SMALL_MATRIX_3D <- c(15, 15, 5)
+SMALL_TENSOR_4D_A <- c(120, 16, 16, 4)
+SMALL_TENSOR_4D_B <- c(120, 16, 16, 5)
+SMALL_LABEL_MATRIX <- c(120, 12)
+
+VECTOR_LENGTH <- 200
+STRING_ARRAY_LENGTH <- 30
+
+CHUNK_SHAPE <- c(100, 20)
+
+write_matrix <- function(file_path, dataset_name, shape, generator =
function(n) rnorm(n), storage.mode = "double", H5type = NULL) {
+ values <- generator(prod(shape))
+ h5createDataset(
+ file_path,
+ dataset_name,
+ dims = rev(shape),
+ chunk = NULL,
+ filter = "NONE", # contiguous, uncompressed layout
+ level = 0,
+ shuffle = FALSE,
+ storage.mode = storage.mode,
+ H5type = H5type,
+ native = TRUE # use R column-major order, same in h5read(..., native=TRUE)
in tests.
+ )
+ h5write(array(values, dim = shape), file_path, dataset_name, native = TRUE)
+}
+
+generate_test_file_single_dataset <- function(dir) {
+ file_path <- file.path(dir, "test_single_dataset.h5")
+ h5createFile(file_path)
+ write_matrix(file_path, "data", SMALL_MATRIX_2D)
+ cat("Created test_single_dataset.h5 (single 2D dataset)\n")
+}
+
+generate_test_file_multiple_datasets <- function(dir) {
+ file_path <- file.path(dir, "test_multiple_datasets.h5")
+ h5createFile(file_path)
+ write_matrix(file_path, "matrix_2d", SMALL_MATRIX_2D)
+ # Create 1D vector without compression/filters
+ h5createDataset(file_path, "vector_1d", dims = VECTOR_LENGTH, chunk = NULL,
filter = "NONE", level = 0, shuffle = FALSE)
+ h5write(rnorm(VECTOR_LENGTH), file_path, "vector_1d", native = TRUE)
+ write_matrix(file_path, "matrix_3d", SMALL_MATRIX_3D)
+ cat("Created test_multiple_datasets.h5 (1D/2D/3D datasets)\n")
+}
+
+generate_test_file_different_dtypes <- function(dir) {
+ file_path <- file.path(dir, "test_different_dtypes.h5")
+ h5createFile(file_path)
+ # H5T_IEEE_F64LE (64-bit float)
+ write_matrix(file_path, "double_primary", SMALL_MATRIX_2D, storage.mode =
"double")
+ # H5T_IEEE_F32LE (32-bit float)
+ write_matrix(file_path, "float32", SMALL_MATRIX_2D, H5type =
"H5T_IEEE_F32LE")
+ # H5T_STD_I32LE (32-bit integer)
+ write_matrix(
+ file_path,
+ "int32",
+ SMALL_MATRIX_2D,
+ generator = function(n) as.integer(sample(-100:100, n, replace = TRUE)),
+ storage.mode = "integer"
+ )
+ # H5T_STD_I64LE (64-bit integer)
+ write_matrix(
+ file_path,
+ "int64",
+ SMALL_MATRIX_2D,
+ generator = function(n) as.integer(sample(-100:100, n, replace = TRUE)),
+ H5type = "H5T_STD_I64LE"
+ )
+ cat("Created test_different_dtypes.h5 (double/float/int32/int64 datasets)\n")
+}
+
+#
https://support.hdfgroup.org/documentation/hdf5-docs/advanced_topics/chunking_in_hdf5.html
+generate_test_file_chunked <- function(dir) {
+ file_path <- file.path(dir, "test_chunked.h5")
+ h5createFile(file_path)
+
+ data <- array(rnorm(prod(SMALL_MATRIX_2D)), dim = SMALL_MATRIX_2D)
+
+ h5createDataset(file_path, "chunked_data", dims = SMALL_MATRIX_2D, chunk =
CHUNK_SHAPE,
+ filter = "NONE", level = 0, shuffle = FALSE)
+ h5write(data, file_path, "chunked_data", native = TRUE)
+
+ write_matrix(file_path, "non_chunked_data", SMALL_MATRIX_2D)
+ cat("Created test_chunked.h5 (chunked dataset)\n")
+}
+
+generate_test_file_compressed <- function(dir) {
+ file_path <- file.path(dir, "test_compressed.h5")
+ h5createFile(file_path)
+ data <- array(rnorm(prod(SMALL_MATRIX_2D)), dim = SMALL_MATRIX_2D)
+ h5createDataset(file_path, "gzip_compressed_9", dims = SMALL_MATRIX_2D,
+ chunk = SMALL_MATRIX_2D, level = 9)
+ h5write(data, file_path, "gzip_compressed_9", native = TRUE)
+ h5createDataset(file_path, "gzip_compressed_1", dims = SMALL_MATRIX_2D,
+ chunk = SMALL_MATRIX_2D, level = 1)
+ h5write(data, file_path, "gzip_compressed_1", native = TRUE)
+ cat("Created test_compressed.h5 (gzip compression)\n")
+}
+
+generate_test_file_multi_tensor_samples <- function(dir) {
+ file_path <- file.path(dir, "test_multi_tensor_samples.h5")
+ h5createFile(file_path)
+ write_matrix(
+ file_path,
+ "sen1",
+ SMALL_TENSOR_4D_A
+ )
+ write_matrix(
+ file_path,
+ "sen2",
+ SMALL_TENSOR_4D_B
+ )
+ write_matrix(
+ file_path,
+ "label",
+ SMALL_LABEL_MATRIX,
+ generator = function(n) as.integer(sample(0:1, n, replace = TRUE))
+ )
+ cat("Created test_multi_tensor_samples.h5 (multi-input tensors)\n")
+}
+
+generate_test_file_nested_groups <- function(dir) {
+ file_path <- file.path(dir, "test_nested_groups.h5")
+ h5createFile(file_path)
+ write_matrix(file_path, "root_data", SMALL_MATRIX_2D)
+ h5createGroup(file_path, "group1")
+ write_matrix(file_path, "group1/data1", SMALL_MATRIX_2D)
+ h5createGroup(file_path, "group1/subgroup")
+ write_matrix(file_path, "group1/subgroup/data2", SMALL_MATRIX_2D)
+ cat("Created test_nested_groups.h5 (nested group hierarchy)\n")
+}
+
+generate_test_file_with_attributes <- function(dir) {
+ file_path <- file.path(dir, "test_with_attributes.h5")
+ h5createFile(file_path)
+ write_matrix(file_path, "data", SMALL_MATRIX_2D)
+
+ fid <- H5Fopen(file_path)
+ did <- H5Dopen(fid, "data")
+ h5writeAttribute("Test dataset with attributes", did, "description")
+ h5writeAttribute(1.0, did, "version")
+ h5writeAttribute(SMALL_MATRIX_2D, did, "shape")
+ H5Dclose(did)
+
+ h5writeAttribute("2025-11-26", fid, "file_created")
+ h5writeAttribute("attributes", fid, "test_type")
+ H5Fclose(fid)
+ cat("Created test_with_attributes.h5 (dataset + file attributes)\n")
+}
+
+generate_test_file_empty_datasets <- function(dir) {
+ file_path <- file.path(dir, "test_empty_datasets.h5")
+ h5createFile(file_path)
+ h5createDataset(file_path, "empty", dims = c(0, SMALL_MATRIX_2D[2]),
+ filter = "NONE", level = 0, shuffle = FALSE)
+
+ h5createDataset(file_path, "scalar", dims = 1,
+ filter = "NONE", level = 0, shuffle = FALSE, chunk = 1)
+ h5write(1.0, file_path, "scalar", native = TRUE)
+ h5createDataset(file_path, "vector", dims = VECTOR_LENGTH,
+ filter = "NONE", level = 0, shuffle = FALSE, chunk =
VECTOR_LENGTH)
+ h5write(rnorm(VECTOR_LENGTH), file_path, "vector", native = TRUE)
+ cat("Created test_empty_datasets.h5 (empty/scalar/vector)\n")
+}
+
+generate_test_file_string_datasets <- function(dir) {
+ file_path <- file.path(dir, "test_string_datasets.h5")
+ h5createFile(file_path)
+ strings <- paste0("string_", 0:(STRING_ARRAY_LENGTH - 1))
+ # Create string dataset without compression/filters
+ h5createDataset(file_path, "string_array", dims = STRING_ARRAY_LENGTH,
+ storage.mode = "character", filter = "NONE", level = 0,
+ shuffle = FALSE, chunk = STRING_ARRAY_LENGTH)
+ h5write(strings, file_path, "string_array", native = TRUE)
+ cat("Created test_string_datasets.h5 (string datasets)\n")
+}
+
+main <- function() {
+ if (basename(getwd()) != "hdf5") {
+ cat("You must execute this script from the 'hdf5' directory\n")
+ quit(status = 1)
+ }
+
+ testdir <- "in"
+ if (!dir.exists(testdir)) {
+ dir.create(testdir)
+ }
+
+ test_functions <- list(
+ generate_test_file_single_dataset,
+ generate_test_file_multiple_datasets,
+ generate_test_file_different_dtypes,
+ generate_test_file_chunked,
+ generate_test_file_compressed,
+ generate_test_file_multi_tensor_samples,
+ generate_test_file_nested_groups,
+ generate_test_file_with_attributes,
+ generate_test_file_empty_datasets,
+ generate_test_file_string_datasets
+ )
+
+ for (test_func in test_functions) {
+ tryCatch({
+ test_func(testdir)
+ }, error = function(e) {
+ cat(sprintf(" ✗ Error: %s\n", conditionMessage(e)))
+ })
+ }
+
+ files <- sort(list.files(testdir, pattern = "\\.h5$", full.names = TRUE))
+ cat(sprintf("\nGenerated %d HDF5 test files in %s\n", length(files),
normalizePath(testdir)))
+}
+
+if (!interactive()) {
+ main()
+}