All,
I am writing to update you on the HDF5 1.8.6 release.
The release is on hold due to tests failures in the parallel HDF5 library. We
still do not know if there is a problem in HDF5, in the version of MPI I/O
library we are using, or in both.
We now have a standalone HDF5 program (attached) that consistently reproduces
the problem on NCSA's abe with Lustre and MVAPICH2. The data in the created
file is not correct. There is no problem when OpenMPI is used.
While we are working on the MPI counterpart of the program, we will highly
appreciate if you can test the program with the parallel HDF5 libraries
installed on your systems and report
- testing results
- version of HDF5 (any of the 1.8.* series will be OK to use)
- version of the MPI I/O library
- version of OS and file system
- version of compilers used to build MPI I/O and HDF5
You should be able to compile the program by using h5pcc compiler script
(usually it can be found in the bin directory under the HDF5 installation
directory). The program should be run on several nodes with the number of
processes greater than number of nodes, for example, 4 nodes and 6 processes.
Thanks a lot for your help!
Elena on behalf of The HDF Team
#include <hdf5.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
int nerrors = 0;
#define VRFY(val, mesg) do { \
if(!(val)) { \
printf("Proc %d: ", mpi_rank); \
printf("*** PHDF5 ERROR ***\n"); \
printf(" Assertion (%s) failed at line %4d in %s\n", \
mesg, (int)__LINE__, __FILE__); \
++nerrors; \
fflush(stdout); \
printf("aborting MPI process\n"); \
MPI_Finalize(); \
exit(nerrors); \
} \
} while(0)
#define EDGE_SIZE 10
#define RANK 2
/*-------------------------------------------------------------------------
* Function: hyperslab_dr_pio_setup__run_test()
*
* Purpose: Tests the setup code for
* contig_hyperslab_dr_pio_test__run_test and
* checker_board_hyperslab_dr_pio_test__run_test.
*
* Return: void
*
* Programmer: NAF -- 12/2/09
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
#define PAR_SS_DR_MAX_RANK 5
static void
hyperslab_dr_pio_setup__run_test(const hid_t dset_type)
{
hbool_t mis_match = 0;
int i;
int mrc;
int mpi_size = -1;
int mpi_rank = -1;
const int test_max_rank = 5; /* must update code if this changes */
uint32_t expected_value;
uint32_t * large_ds_buf_0 = NULL;
uint32_t * large_ds_buf_1 = NULL;
uint32_t * ptr_0;
uint32_t * ptr_1;
MPI_Comm mpi_comm = MPI_COMM_NULL;
MPI_Info mpi_info = MPI_INFO_NULL;
hid_t fid; /* HDF5 file ID */
hid_t acc_tpl; /* File access templates */
hid_t xfer_plist = H5P_DEFAULT;
hid_t full_mem_large_ds_sid;
hid_t full_file_large_ds_sid;
hid_t mem_large_ds_sid;
hid_t file_large_ds_sid;
hid_t large_ds_dcpl_id = H5P_DEFAULT;
hid_t large_dataset; /* Dataset ID */
size_t large_ds_size = 1;
size_t buf_size = (size_t)EDGE_SIZE;
hsize_t dims[PAR_SS_DR_MAX_RANK];
hsize_t start[PAR_SS_DR_MAX_RANK];
hsize_t stride[PAR_SS_DR_MAX_RANK];
hsize_t count[PAR_SS_DR_MAX_RANK];
hsize_t block[PAR_SS_DR_MAX_RANK];
herr_t ret; /* Generic return value */
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
assert( mpi_size >= 1 );
mpi_comm = MPI_COMM_WORLD;
mpi_info = MPI_INFO_NULL;
for ( i = 0; i < RANK - 1; i++ ) {
large_ds_size *= (size_t)EDGE_SIZE;
}
large_ds_size *= (size_t)(mpi_size + 1);
if(mpi_rank == 0)
buf_size *= 2;
large_ds_buf_0 = (uint32_t *)malloc(sizeof(uint32_t) * buf_size);
VRFY((large_ds_buf_0 != NULL), "malloc of large_ds_buf_0 succeeded");
large_ds_buf_1 = (uint32_t *)malloc(sizeof(uint32_t) * large_ds_size);
VRFY((large_ds_buf_1 != NULL), "malloc of large_ds_buf_1 succeeded");
ptr_0 = large_ds_buf_0;
for(i = 0; i < EDGE_SIZE; i++)
*ptr_0++ = (uint32_t)i + (EDGE_SIZE * mpi_rank);
if(mpi_rank == 0)
for(i = 0; i < EDGE_SIZE; i++)
*ptr_0++ = (uint32_t)i + (EDGE_SIZE * mpi_size);
memset(large_ds_buf_1, 0, sizeof(uint32_t) * large_ds_size);
/* ----------------------------------------
* CREATE AN HDF5 FILE WITH PARALLEL ACCESS
* ---------------------------------------*/
/* setup file access template */
acc_tpl = H5Pcreate(H5P_FILE_ACCESS);
ret = H5Pset_fapl_mpio(acc_tpl, mpi_comm, mpi_info);
VRFY((ret >= 0), "H5Pset_fapl_mpio() succeeded");
ret = H5Pset_fclose_degree(acc_tpl, H5F_CLOSE_SEMI);
VRFY((ret >= 0), "H5Pset_fclose_degree() succeeded");
/* create the file collectively */
fid = H5Fcreate("hsss.h5", H5F_ACC_TRUNC, H5P_DEFAULT, acc_tpl);
VRFY((fid >= 0), "H5Fcreate succeeded");
/* setup dims: */
dims[0] = (int)(mpi_size + 1);
dims[1] = EDGE_SIZE;
/* Create large ds dataspaces */
full_mem_large_ds_sid = H5Screate_simple(RANK, dims, NULL);
VRFY((full_mem_large_ds_sid >= 0),
"H5Screate_simple() full_mem_large_ds_sid succeeded");
full_file_large_ds_sid = H5Screate_simple(RANK, dims, NULL);
VRFY((full_file_large_ds_sid >= 0),
"H5Screate_simple() full_file_large_ds_sid succeeded");
if(mpi_rank == 0) {
dims[0] = 2;
mem_large_ds_sid = H5Screate_simple(2, dims, NULL);
VRFY((mem_large_ds_sid >= 0),
"H5Screate_simple() mem_large_ds_sid succeeded");
dims[0] = (int)(mpi_size + 1);
} else {
mem_large_ds_sid = H5Screate_simple(1, &(dims[1]), NULL);
VRFY((mem_large_ds_sid >= 0),
"H5Screate_simple() mem_large_ds_sid succeeded");
}
file_large_ds_sid = H5Screate_simple(RANK, dims, NULL);
VRFY((file_large_ds_sid >= 0),
"H5Screate_simple() file_large_ds_sid succeeded");
/* create the large dataset */
large_dataset = H5Dcreate2(fid, "large_dataset", dset_type,
file_large_ds_sid, H5P_DEFAULT,
large_ds_dcpl_id, H5P_DEFAULT);
VRFY((ret >= 0), "H5Dcreate2() large_dataset succeeded");
/* setup xfer property list */
xfer_plist = H5Pcreate(H5P_DATASET_XFER);
VRFY((xfer_plist >= 0), "H5Pcreate(H5P_DATASET_XFER) succeeded");
ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
/* setup selection to write initial data to the small and large data sets */
start[0] = mpi_rank;
stride[0] = 2 * (mpi_size + 1);
count[0] = 1;
block[0] = 1;
for ( i = 1; i < RANK; i++ ) {
start[i] = 0;
stride[i] = 2 * EDGE_SIZE;
count[i] = 1;
block[i] = EDGE_SIZE;
}
/* setup selections for writing initial data to the large data set */
start[0] = mpi_rank;
ret = H5Sselect_hyperslab(file_large_ds_sid,
H5S_SELECT_SET,
start,
stride,
count,
block);
VRFY((ret >= 0), "H5Sselect_hyperslab(file_large_ds_sid, set) suceeded");
if ( mpi_rank == 0 ) { /* add an additional slice to the selections */
start[0] = mpi_size;
ret = H5Sselect_hyperslab(file_large_ds_sid,
H5S_SELECT_OR,
start,
stride,
count,
block);
VRFY((ret>= 0), "H5Sselect_hyperslab(file_large_ds_sid, or) suceeded");
}
/* write the initial value of the large data set to file */
ret = H5Dwrite(large_dataset, dset_type, mem_large_ds_sid,
file_large_ds_sid,
xfer_plist, large_ds_buf_0);
if ( ret < 0 ) H5Eprint2(H5E_DEFAULT, stderr);
VRFY((ret >= 0), "H5Dwrite() large_dataset initial write succeeded");
/* Close and reopen the file, to satisfy sequential consistency semantics */
ret = H5Dclose(large_dataset);
VRFY((ret >= 0), "H5Dclose(large_dataset) succeeded");
ret = H5Fclose(fid);
VRFY((ret >= 0), "file close succeeded");
mrc = MPI_Barrier(MPI_COMM_WORLD);
VRFY((mrc==MPI_SUCCESS), "Sync after small dataset writes");
fid = H5Fopen("hsss.h5", H5F_ACC_RDONLY, acc_tpl);
if(fid < 0) H5Eprint2(0,0);
VRFY((fid >= 0), "H5Fopen succeeded");
large_dataset = H5Dopen2(fid, "large_dataset", H5P_DEFAULT);
/* read the small data set back to verify that it contains the
* expected data. Note that each process reads in the entire
* data set.
*/
ret = H5Dread(large_dataset,
H5T_NATIVE_UINT32,
full_mem_large_ds_sid,
full_file_large_ds_sid,
xfer_plist,
large_ds_buf_1);
VRFY((ret >= 0), "H5Dread() large_dataset initial read succeeded");
/* verify that the correct data was written to the large data set */
expected_value = 0;
mis_match = 0;
ptr_1 = large_ds_buf_1;
/* Check that the write buffer didn't get corrupted */
ptr_0 = large_ds_buf_0;
for(i = 0; i < EDGE_SIZE; i++)
if(*ptr_0++ != (uint32_t)i + (EDGE_SIZE * mpi_rank))
printf("Invalid wbuf rank %d!\n", mpi_rank);
if(mpi_rank == 0)
for(i = 0; i < EDGE_SIZE; i++)
if(*ptr_0++ != (uint32_t)i + (EDGE_SIZE * mpi_size))
printf("Invalid wbuf rank 0 (p2)!\n");
for ( i = 0; i < (int)large_ds_size; i++ ) {
if ( *ptr_1 != expected_value ) {
mis_match = 1;
}
ptr_1++;
expected_value++;
}
/*VRFY( (mis_match == 0), "large ds init data good.");*/
/* Close dataspaces */
ret = H5Sclose(full_mem_large_ds_sid);
VRFY((ret >= 0), "H5Sclose(full_mem_large_ds_sid) succeeded");
ret = H5Sclose(full_file_large_ds_sid);
VRFY((ret >= 0), "H5Sclose(full_file_large_ds_sid) succeeded");
ret = H5Sclose(mem_large_ds_sid);
VRFY((ret >= 0), "H5Sclose(mem_large_ds_sid) succeeded");
ret = H5Sclose(file_large_ds_sid);
VRFY((ret >= 0), "H5Sclose(mem_large_ds_sid) succeeded");
/* Close Datasets */
ret = H5Dclose(large_dataset);
VRFY((ret >= 0), "H5Dclose(large_dataset) succeeded");
/* Release file-access template */
ret = H5Pclose(acc_tpl);
VRFY((ret >= 0), "H5Pclose(acc_tpl) succeeded");
/* close the file collectively */
ret = H5Fclose(fid);
VRFY((ret >= 0), "file close succeeded");
/* Free memory buffers */
if ( large_ds_buf_0 != NULL ) free(large_ds_buf_0);
if ( large_ds_buf_1 != NULL ) free(large_ds_buf_1);
MPI_Barrier(MPI_COMM_WORLD);
VRFY( (mis_match == 0), "large ds init data good.");
return;
} /* hyperslab_dr_pio_setup__run_test() */
/*-------------------------------------------------------------------------
* Function: main()
*
* Purpose: Tests the setup code for
* contig_hyperslab_dr_pio_test__run_test and
* checker_board_hyperslab_dr_pio_test__run_test.
*
* Return: void
*
* Programmer: NAF -- 12/2/10
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
int
main(int argc, char **argv)
{
int i;
hid_t dset_type = H5T_NATIVE_UINT;
MPI_Init(&argc, &argv);
for (i=0; i<100; i++) {
/* contiguous data set, collective I/O */
hyperslab_dr_pio_setup__run_test(dset_type);
}
puts("Test passed");
H5close();
MPI_Finalize();
return 0;
} /* hyperslab_dr_pio_setup_test() */
On Oct 13, 2010, at 9:19 AM, Elena Pourmal wrote:
> Hi All,
>
> Thanks to everyone who tested 1.8.6-pre1. We greatly appreciate your effort.
>
> The release date was pushed to November due to the issues discovered during
> the testing. We have been working on the fixes and will make the next
> pre-release candidate available for testing as soon as possible. Stay tune.
>
> Thank you!
>
> The HDF Team
>
>
> On Sep 24, 2010, at 9:08 PM, [email protected] wrote:
>
>> Hi all,
>>
>> A pre-release candidate version of HDF5 1.8.6 is available for testing,
>> and can be downloaded at the following link:
>>
>> http://www.hdfgroup.uiuc.edu/ftp/pub/outgoing/hdf5/hdf5-1.8.6-pre1/
>>
>> If you have some time to test this within the next two weeks, we would
>> greatly appreciate it, as while we test on the platforms we have access
>> to, there are many more that we are unable to test on ourselves. Please
>> report any errors found by October 10th. If no critical errors are
>> reported beforehand, we're targeting mid-October for our release date.
>>
>> Thank you!
>>
>> The HDF Team
>>
>>
>> _______________________________________________
>> Hdf-forum is for HDF software users discussion.
>> [email protected]
>> http://mail.hdfgroup.org/mailman/listinfo/hdf-forum_hdfgroup.org
>
>
> _______________________________________________
> Hdf-forum is for HDF software users discussion.
> [email protected]
> http://mail.hdfgroup.org/mailman/listinfo/hdf-forum_hdfgroup.org
_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://mail.hdfgroup.org/mailman/listinfo/hdf-forum_hdfgroup.org