Calls that affect the metadata need to be collective so that each process has a 
consistent view of what the file metadata should be.

https://support.hdfgroup.org/HDF5/doc/RM/CollectiveCalls.html



Something like this (or the attached):



plist_id = H5Pcreate(H5P_FILE_ACCESS);

H5Pset_fapl_mpio(plist_id, comm, info);

H5Pset_all_coll_metadata_ops( plist_id, true );

file_id = H5Fcreate(H5FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);

H5Pclose(plist_id);

for(int procid = 0; i < mpi_size; ++i) {

  hid_t gr_id = H5Gcreate(file_id, std::to_string(procid).c_str(), H5P_DEFAULT, 
H5P_DEFAULT, H5P_DEFAULT);

  H5Gclose(gr_id);

}

H5Fclose(file_id);



-----Original Message-----
From: Hdf-forum [mailto:hdf-forum-boun...@lists.hdfgroup.org] On Behalf Of 
Rafal Lichwala
Sent: Wednesday, September 27, 2017 12:32 AM
To: hdf-forum@lists.hdfgroup.org
Subject: Re: [Hdf-forum] high level API for parallel version of HDF5 library



Hi Barbara, Hi All,



Thank you for your answer. That's clear now about H5TBmake_table() call, but...

H5Gcreate() in not a high level API, isn't it?

So why I cannot use it in parallel processes?

Maybe I'm just doing something wrong, so could you please provide me a short 
example how to create a set of groups (each one is the process

number) running 4 parallel MPI processes? You can limit the example code to the 
sequence of HDF5 calls only...

My current code works fine for just one process, but when I try it for 2 (or 
more) parallel processes the result file is corrupted:



plist_id = H5Pcreate(H5P_FILE_ACCESS);

H5Pset_fapl_mpio(plist_id, comm, info);

H5Pset_all_coll_metadata_ops( plist_id, true ); file_id = 
H5Fcreate(H5FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, plist_id); 
H5Pclose(plist_id); hid_t gr_id = H5Gcreate(file_id, 
std::to_string(procid).c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); 
H5Gclose(gr_id); H5Fclose(file_id);





Best regards,

Rafal





W dniu 2017-09-25 o 22:20, Barbara Jones pisze:

> Hi Rafal,

>

> No, the HDF5 High Level APIs are not supported in the parallel version of 
> HDF5.

>

> -Barbara

> h...@hdfgroup.org<mailto:h...@hdfgroup.org>

>

> -----Original Message-----

> From: Hdf-forum [mailto:hdf-forum-boun...@lists.hdfgroup.org] On Behalf Of 
> Rafal Lichwala

> Sent: Monday, September 18, 2017 8:53 AM

> To: hdf-forum@lists.hdfgroup.org<mailto:hdf-forum@lists.hdfgroup.org>

> Subject: [Hdf-forum] high level API for parallel version of HDF5 library

>

> Hi,

>

> Can I use high level API function calls (H5TBmake_table(...)) in parallel 
> version of the HDF5 library?

> There are no property list parameters for that function calls...

>

> Regards,

> Rafal

>

>

> _______________________________________________

> Hdf-forum is for HDF software users discussion.

> Hdf-forum@lists.hdfgroup.org<mailto:Hdf-forum@lists.hdfgroup.org>

> http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org

> Twitter: https://twitter.com/hdf5

>

> _______________________________________________

> Hdf-forum is for HDF software users discussion.

> Hdf-forum@lists.hdfgroup.org<mailto:Hdf-forum@lists.hdfgroup.org>

> http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org

> Twitter: https://twitter.com/hdf5

>





_______________________________________________

Hdf-forum is for HDF software users discussion.

Hdf-forum@lists.hdfgroup.org<mailto:Hdf-forum@lists.hdfgroup.org>

http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org

Twitter: https://twitter.com/hdf5
#include "hdf5.h"
#include <iostream>
#include <string>
#include <mpi.h>
#include <cstdint>
#include <cstring>
#include <sstream>

// this size of array will be split among the ranks in both independent 
datasets,
// and in hyperslabs in the same dataset to test the strong scaling of the hdf5 
library
static const int LARGE_DATA_SIZE = 134217728; // 1GB of 64-bit longs = 2^30 / 8 
bytes

/**
 * @brief Simple tests to work through library integration issues with Parallel 
HDF5
 */
int main(int argc, char **argv) {
  try {
    /*
     * MPI variables
     */
    int mpi_size, mpi_rank;
    MPI_Comm comm = MPI_COMM_WORLD;
    MPI_Info info = MPI_INFO_NULL;
    /*
     * Initialize MPI
     */
    MPI_Init(&argc, &argv);
    MPI_Comm_size(comm, &mpi_size);
    MPI_Comm_rank(comm, &mpi_rank);

    std::stringstream outfilename;
    if (mpi_size > 1) {
      outfilename << "h5g_output_parallel." << mpi_size << ".h5";
    } else {
      outfilename << "h5g_output_serial.h5";
    }

    hsize_t rank_array_size = LARGE_DATA_SIZE / mpi_size;
    const int64_t ras_int = static_cast<int64_t>(rank_array_size);
    hsize_t large_dims[1] = {rank_array_size};
    int64_t large_data[rank_array_size];
    for (int64_t j = 0; j < ras_int; ++j) {
      large_data[j] = 100000000 * mpi_rank + j;
    }

    hid_t fapl_id = H5Pcreate(H5P_FILE_ACCESS);
    H5Pset_fapl_mpio(fapl_id, comm, info);
    hid_t fcpl_id = H5P_DEFAULT;
    // requires HDF5 version 1.10.1 or greater (or avoid_truncate subversion 
trunk, but that is not merged with 1.10 for coll_metadata below)
    //    fcpl_id = H5Pcreate(H5P_FILE_CREATE);
    //    H5Pset_avoid_truncate(fcpl_id, H5F_AVOID_TRUNCATE_ALL);
    // requires HDF5 version 1.10
    //    H5Pset_coll_metadata_write(fapl_id, true);
    hid_t file_id = H5Fcreate(outfilename.str().c_str(), H5F_ACC_TRUNC, 
fcpl_id, fapl_id);
    H5Pclose(fapl_id);
    H5Pclose(fcpl_id);

    hsize_t dims[1] = {1};
    hid_t datatype = H5T_STD_I8LE;
    std::int8_t data1[1] = {( int8_t ) (mpi_rank + 100)};
    std::int8_t data2[1] = {( int8_t ) (mpi_rank - 100)};

    // dataspace is the same for all the datasets below
    hid_t dataspace = H5Screate_simple(1, dims, dims);

    hid_t large_datatype = H5T_NATIVE_INT64;
    // dataspace for the large array
    hid_t large_dataspace = H5Screate_simple(1, large_dims, large_dims);

    // create a common group to contain distinct datasets for each rank
    hid_t common_group = H5Gcreate(file_id, "common group", H5P_DEFAULT, 
H5P_DEFAULT, H5P_DEFAULT);

    // do collective calls to create all the distinct datasets for each rank
    // (each rank must create each dataset)
    hid_t dataset_by_rank[mpi_size];
    hid_t large_dataset_by_rank[mpi_size];
    for (int i = 0; i < mpi_size; ++i) {
      std::stringstream rank_name;
      rank_name << "rank" << i;
      std::cout << rank_name.str() << std::endl;

      dataset_by_rank[i] = H5Dcreate(common_group, rank_name.str().c_str(), 
datatype,
                                     dataspace, H5P_DEFAULT, H5P_DEFAULT, 
H5P_DEFAULT);

      rank_name << "_large";
      large_dataset_by_rank[i] = H5Dcreate(common_group, 
rank_name.str().c_str(), large_datatype,
                                           large_dataspace, H5P_DEFAULT, 
H5P_DEFAULT, H5P_DEFAULT);
    }

    // set up dataset transfer property list for collective MPI I/O
    hid_t xferplist = H5Pcreate(H5P_DATASET_XFER);
    // H5Pset_dxpl_mpio(xferplist, H5FD_MPIO_INDEPENDENT);
    H5Pset_dxpl_mpio(xferplist, H5FD_MPIO_COLLECTIVE);

    // each rank writes it's own rank to the corresponding dataset for that rank
    H5Dwrite(dataset_by_rank[mpi_rank], datatype, H5S_ALL, H5S_ALL, xferplist, 
data1);

    // each rank creates its own large data array and writes to the file
    H5Dwrite(large_dataset_by_rank[mpi_rank], large_datatype, H5S_ALL, H5S_ALL, 
xferplist, large_data);

    // collective calls to close each dataset
    for (int i = 0; i < mpi_size; ++i) {
      H5Dclose(dataset_by_rank[i]);
      H5Dclose(large_dataset_by_rank[i]);
    }
    H5Gclose(common_group);

    // collective calls to create a huge dataset containing all the data from 
all the ranks as hyperslabs
    hsize_t huge_dims[2] = { ( hsize_t ) mpi_size, rank_array_size};
    hid_t huge_dataspace = H5Screate_simple(2, huge_dims, huge_dims);
    hid_t huge_dataset = H5Dcreate(file_id, "common huge dataset", 
large_datatype,
                                   huge_dataspace, H5P_DEFAULT, H5P_DEFAULT, 
H5P_DEFAULT);

    // select the hyperslab from within the full dataset dataspace, matching 
the size of the array in memory
    hsize_t offset[2] = { ( hsize_t ) mpi_rank, 0};
    hsize_t count[2] = {1, rank_array_size};
    // stride and block are null, we want one big contiguous chunk of memory
    H5Sselect_hyperslab(huge_dataspace, H5S_SELECT_SET, offset, NULL, count, 
NULL);

    // each rank writes out its hyperslab
    H5Dwrite(huge_dataset, large_datatype, large_dataspace, huge_dataspace, 
xferplist, large_data);
    H5Dclose(huge_dataset);
    H5Sclose(huge_dataspace);

    // do collective calls to create all the groups for every rank
    // (each rank must create each group, and each dataset within each group)
    hid_t group_by_rank[mpi_size];
    for (int i = 0; i < mpi_size; ++i) {
      std::stringstream rank_name;
      rank_name << "rank" << i;
      std::cout << rank_name.str() << std::endl;

      group_by_rank[i] = H5Gcreate(file_id, rank_name.str().c_str(),
                                   H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
      dataset_by_rank[i] = H5Dcreate(group_by_rank[i], "common dataset", 
datatype,
                                     dataspace, H5P_DEFAULT, H5P_DEFAULT, 
H5P_DEFAULT);
    }

    // then each rank does an independent call to write data to the 
corresponding dataset
    H5Dwrite(dataset_by_rank[mpi_rank], datatype, H5S_ALL, H5S_ALL, xferplist, 
data2);
    H5Pclose(xferplist);
    H5Sclose(dataspace);
    H5Sclose(large_dataspace);
    for (int i = 0; i < mpi_size; ++i) {
      H5Dclose(dataset_by_rank[i]);
      H5Gclose(group_by_rank[i]);
    }
    H5Fclose(file_id);

    MPI_Finalize();
  } catch (std::exception &e) {
    std::cerr << "std::exception thrown:" << e.what() << std::endl;
    return -1;
  } catch (int e) {
    std::cerr << "Unrecognized error thrown" << e << std::endl;
    return e ? e : -1;
  }
  return 0;
}
_______________________________________________
Hdf-forum is for HDF software users discussion.
Hdf-forum@lists.hdfgroup.org
http://lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5

Reply via email to