Hi Håkon,
Below is the program that you can start with. I am using variable length
strings.
For fixed length strings, there are some extra work. You may have to
make the
strings to the same length.
You may try different chunk sizes and block sizes to have the best
performance.
=======================
import ncsa.hdf.hdf5lib.H5;
import ncsa.hdf.hdf5lib.HDF5Constants;
import ncsa.hdf.hdf5lib.exceptions.HDF5Exception;
public class CreateStrings {
private final static String H5_FILE = "G:\\temp\\strings.h5";
private final static String DNAME = "/strs";
private final static int RANK = 1;
private final static long[] DIMS = { 25000000 };
private final static long[] MAX_DIMS = { HDF5Constants.H5S_UNLIMITED };
private final static long[] CHUNK_SIZE = { 25000 };
private final static int BLOCK_SIZE = 250000;
private void createDataset(int fid) throws Exception {
int did = -1, tid = -1, sid = -1, plist = -1;
try {
tid = H5.H5Tcopy(HDF5Constants.H5T_C_S1);
// use variable length to save space
H5.H5Tset_size(tid, HDF5Constants.H5T_VARIABLE);
sid = H5.H5Screate_simple(RANK, DIMS, MAX_DIMS);
// figure out creation properties
plist = H5.H5Pcreate(HDF5Constants.H5P_DATASET_CREATE);
H5.H5Pset_layout(plist, HDF5Constants.H5D_CHUNKED);
H5.H5Pset_chunk(plist, RANK, CHUNK_SIZE);
did = H5.H5Dcreate(fid, DNAME, tid, sid, plist);
} finally {
try {
H5.H5Pclose(plist);
} catch (HDF5Exception ex) {
}
try {
H5.H5Sclose(sid);
} catch (HDF5Exception ex) {
}
try {
H5.H5Dclose(did);
} catch (HDF5Exception ex) {
}
}
}
private void writeData(int fid) throws Exception {
int did = -1, tid = -1, msid = -1, fsid = -1;
long[] count = { BLOCK_SIZE };
try {
did = H5.H5Dopen(fid, DNAME);
tid = H5.H5Dget_type(did);
fsid = H5.H5Dget_space(did);
msid = H5.H5Screate_simple(RANK, count, null);
String[] strs = new String[BLOCK_SIZE];
int idx = 0, block_indx = 0, start_idx = 0;
long t0 = 0, t1 = 0;
t0 = System.currentTimeMillis();
System.out.println("Total number of blocks = "
+ (DIMS[0] / BLOCK_SIZE));
for (int i = 0; i < DIMS[0]; i++) {
strs[idx++] = "str" + i;
if (idx == BLOCK_SIZE) { // operator % is very expensive
idx = 0;
H5.H5Sselect_hyperslab(fsid,
HDF5Constants.H5S_SELECT_SET,
new long[] { start_idx }, null, count, null);
H5.H5Dwrite(did, tid, msid, fsid,
HDF5Constants.H5P_DEFAULT, strs);
if (block_indx == 10) {
t1 = System.currentTimeMillis();
System.out.println("Total time (minutes) = "
+ ((t1 - t0) * (DIMS[0] / BLOCK_SIZE)) /
1000
/ 600);
}
block_indx++;
start_idx = i;
}
}
} finally {
try {
H5.H5Sclose(fsid);
} catch (HDF5Exception ex) {
}
try {
H5.H5Sclose(msid);
} catch (HDF5Exception ex) {
}
try {
H5.H5Dclose(did);
} catch (HDF5Exception ex) {
}
}
}
private void createFile() throws Exception {
int fid = -1;
fid = H5.H5Fcreate(H5_FILE, HDF5Constants.H5F_ACC_TRUNC,
HDF5Constants.H5P_DEFAULT, HDF5Constants.H5P_DEFAULT);
if (fid < 0)
return;
try {
createDataset(fid);
writeData(fid);
} finally {
H5.H5Fclose(fid);
}
}
/**
* @param args
*/
public static void main(String[] args) {
try {
(new CreateStrings()).createFile();
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
=========================
_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://mail.hdfgroup.org/mailman/listinfo/hdf-forum_hdfgroup.org