Hi Håkon,

Below is the program that you can start with. I am using variable length strings. For fixed length strings, there are some extra work. You may have to make the
strings to the same length.

You may try different chunk sizes and block sizes to have the best performance.

=======================
import ncsa.hdf.hdf5lib.H5;
import ncsa.hdf.hdf5lib.HDF5Constants;
import ncsa.hdf.hdf5lib.exceptions.HDF5Exception;

public class CreateStrings {

   private final static String H5_FILE = "G:\\temp\\strings.h5";
   private final static String DNAME = "/strs";
   private final static int RANK = 1;
   private final static long[] DIMS = { 25000000 };
   private final static long[] MAX_DIMS = { HDF5Constants.H5S_UNLIMITED };
   private final static long[] CHUNK_SIZE = { 25000 };
   private final static int BLOCK_SIZE = 250000;

   private void createDataset(int fid) throws Exception {
       int did = -1, tid = -1, sid = -1, plist = -1;

       try {
           tid = H5.H5Tcopy(HDF5Constants.H5T_C_S1);
           // use variable length to save space
           H5.H5Tset_size(tid, HDF5Constants.H5T_VARIABLE);
           sid = H5.H5Screate_simple(RANK, DIMS, MAX_DIMS);

           // figure out creation properties
           plist = H5.H5Pcreate(HDF5Constants.H5P_DATASET_CREATE);
           H5.H5Pset_layout(plist, HDF5Constants.H5D_CHUNKED);
           H5.H5Pset_chunk(plist, RANK, CHUNK_SIZE);

           did = H5.H5Dcreate(fid, DNAME, tid, sid, plist);
       } finally {
           try {
               H5.H5Pclose(plist);
           } catch (HDF5Exception ex) {
           }
           try {
               H5.H5Sclose(sid);
           } catch (HDF5Exception ex) {
           }
           try {
               H5.H5Dclose(did);
           } catch (HDF5Exception ex) {
           }
       }
   }

   private void writeData(int fid) throws Exception {
       int did = -1, tid = -1, msid = -1, fsid = -1;
       long[] count = { BLOCK_SIZE };

       try {
           did = H5.H5Dopen(fid, DNAME);
           tid = H5.H5Dget_type(did);
           fsid = H5.H5Dget_space(did);
           msid = H5.H5Screate_simple(RANK, count, null);
           String[] strs = new String[BLOCK_SIZE];

           int idx = 0, block_indx = 0, start_idx = 0;
           long t0 = 0, t1 = 0;
           t0 = System.currentTimeMillis();
           System.out.println("Total number of blocks = "
                   + (DIMS[0] / BLOCK_SIZE));
           for (int i = 0; i < DIMS[0]; i++) {
               strs[idx++] = "str" + i;
               if (idx == BLOCK_SIZE) { // operator % is very expensive
                   idx = 0;
H5.H5Sselect_hyperslab(fsid, HDF5Constants.H5S_SELECT_SET,
                           new long[] { start_idx }, null, count, null);
                   H5.H5Dwrite(did, tid, msid, fsid,
                           HDF5Constants.H5P_DEFAULT, strs);

                   if (block_indx == 10) {
                       t1 = System.currentTimeMillis();
                       System.out.println("Total time (minutes) = "
+ ((t1 - t0) * (DIMS[0] / BLOCK_SIZE)) / 1000
                               / 600);
                   }

                   block_indx++;
                   start_idx = i;
               }

           }

       } finally {
           try {
               H5.H5Sclose(fsid);
           } catch (HDF5Exception ex) {
           }
           try {
               H5.H5Sclose(msid);
           } catch (HDF5Exception ex) {
           }
           try {
               H5.H5Dclose(did);
           } catch (HDF5Exception ex) {
           }
       }
   }

   private void createFile() throws Exception {
       int fid = -1;

       fid = H5.H5Fcreate(H5_FILE, HDF5Constants.H5F_ACC_TRUNC,
               HDF5Constants.H5P_DEFAULT, HDF5Constants.H5P_DEFAULT);

       if (fid < 0)
           return;

       try {
           createDataset(fid);
           writeData(fid);
       } finally {
           H5.H5Fclose(fid);
       }
   }

   /**
    * @param args
    */
   public static void main(String[] args) {
       try {
           (new CreateStrings()).createFile();
       } catch (Exception ex) {
           ex.printStackTrace();
       }
   }

}
=========================



_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://mail.hdfgroup.org/mailman/listinfo/hdf-forum_hdfgroup.org

Reply via email to