Hi, I think I've come across a performance issue with H5Dread when reading non-contiguous hyperslab selections. The use case in my software is a bit complicated, so instead I came up with a small example that shows the same issue. Please let me know if I'm missing something here, it's possible that a different approach could be much better.
In my example I write a 2D native int chunked dataset to an HDF5 file (adapted from the h5_extend example, now writes a 229 MB file). I then construct a hyperslab selection of the dataset and read it back using a single call to H5Dread. When I use a stride of 1 (so all elements of the selection are contiguous) the read is very fast. However, when I set the stride to 2 the read time slows down significantly, on the order of 15 times slower. The dataset has a chunk shape of 1000x500, and the 0th dimension is the one being tested with a stride of 1 and 2. Is this a typical slowdown seen with a stride of 2? If the chunksize is 1000, then a stride of 1 and 2 would still need to read the same amount of data, so I would expect similar performance. I've run the stride of 2 scenario under Valgrind (using the callgrind tool) for profiling and it shows that 95% of the time is being spent in H5S_select_iterate (I can share the callgrind output if it helps), which is making this program CPU bound and nowhere near I/O bound. Any ideas on how to optimize this function or otherwise increase the performance of this use case? Thanks, Chris LeBlanc
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Copyright by The HDF Group. * * Copyright by the Board of Trustees of the University of Illinois. * * All rights reserved. * * * * This file is part of HDF5. The full HDF5 copyright notice, including * * terms governing use, modification, and redistribution, is contained in * * the files COPYING and Copyright.html. COPYING can be found at the root * * of the source code distribution tree; Copyright.html can be found at the * * root level of an installed copy of the electronic HDF5 document set and * * is linked from the top-level documents page. It can also be found at * * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have * * access to either file, you may request a copy from [email protected]. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* * This example how to work with extendible datasets. The dataset * must be chunked in order to be extendible. * * It is used in the HDF5 Tutorial. */ // Modified example of h5_extend.c to show performance difference between reading with a stride of 1 vs 2: #include <stdlib.h> #include <stdio.h> #include <time.h> #include "hdf5.h" #define FILE "extend.h5" #define DATASETNAME "ExtendibleArray" #define RANK 2 void write_file() { hid_t file; /* handles */ hid_t dataspace, dataset; hid_t filespace, memspace; hid_t cparms; hsize_t dims[2] = {20000, 3000}; /* dataset dimensions at creation time */ hsize_t maxdims[2] = {H5S_UNLIMITED, H5S_UNLIMITED}; herr_t status; hsize_t chunk_dims[2] = {1000, 500}; int *data = calloc(dims[0]*dims[1], sizeof(int)); /* Variables used in reading data back */ hsize_t chunk_dimsr[2]; hsize_t dimsr[2]; hsize_t i, j; int *datar = calloc(dims[0]*dims[1], sizeof(int)); herr_t status_n; int rank, rank_chunk; /* Create the data space with unlimited dimensions. */ dataspace = H5Screate_simple (RANK, dims, maxdims); /* Create a new file. If file exists its contents will be overwritten. */ file = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); /* Modify dataset creation properties, i.e. enable chunking */ cparms = H5Pcreate (H5P_DATASET_CREATE); status = H5Pset_chunk (cparms, RANK, chunk_dims); /* Create a new dataset within the file using cparms creation properties. */ dataset = H5Dcreate2 (file, DATASETNAME, H5T_NATIVE_INT, dataspace, H5P_DEFAULT, cparms, H5P_DEFAULT); status = H5Sclose (dataspace); /* Write data to dataset */ status = H5Dwrite (dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); /* Close resources */ status = H5Dclose (dataset); status = H5Fclose (file); status = H5Pclose (cparms); free(data); } void read_file(hsize_t dim1_stride, hsize_t dim2_stride) { /* Variables used in reading data back */ hid_t file; hid_t dataspace, dataset; hid_t filespace, memspace; hsize_t chunk_dimsr[2]; hsize_t dimsr[2]; hsize_t memspace_dims[2]; hsize_t i, j; int *datar; hsize_t mem_offsets[2] = {0, 0}; hsize_t strides[2] = {dim1_stride, dim2_stride}; hsize_t count[2]; herr_t status_n; int rank_chunk; file = H5Fopen (FILE, H5F_ACC_RDONLY, H5P_DEFAULT); dataset = H5Dopen2 (file, DATASETNAME, H5P_DEFAULT); filespace = H5Dget_space (dataset); //rank = H5Sget_simple_extent_ndims (filespace); status_n = H5Sget_simple_extent_dims (filespace, dimsr, NULL); memspace_dims[0] = dimsr[0] / strides[0]; memspace_dims[1] = dimsr[1]; memspace = H5Screate_simple (RANK, memspace_dims, NULL); count[0] = dimsr[0] / strides[0]; count[1] = dimsr[1]; // core of this test: a hyperslab with varying stride: H5Sselect_hyperslab( filespace, H5S_SELECT_SET, mem_offsets, strides, count, NULL ); datar = calloc(memspace_dims[0]*memspace_dims[1], sizeof(int)); printf("reading with stride = %d, memspace_dims: %d %d, count: %d %d\n", (int) strides[0], (int) memspace_dims[0], (int) memspace_dims[1], (int) count[0], (int) count[1]); time_t t1 = time(NULL); int status = H5Dread (dataset, H5T_NATIVE_INT, memspace, filespace, H5P_DEFAULT, datar); time_t t2 = time(NULL); printf("done reading with stride = %d, time = %d (nearest sec)\n", (int) strides[1], (int) (t2-t1) ); status = H5Dclose (dataset); status = H5Sclose (filespace); status = H5Sclose (memspace); status = H5Fclose (file); free(datar); } int main (void) { write_file(); read_file(1, 1); read_file(2, 1); }
_______________________________________________ Hdf-forum is for HDF software users discussion. [email protected] http://mail.hdfgroup.org/mailman/listinfo/hdf-forum_hdfgroup.org
