I'm learning HDF5. I create a hdf file which has a group. This group has
two dataset with the same size of 2D. I read and write the same several
rows repeatly. However, I get the different speed. That's what confuse me.
#include <iostream>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <fstream>
#include <assert.h>
#include <vector>
#include "hdf5.h"
#include "mpi.h"
#define H5FILE_NAME "test1.h5"
#define DATASET_NAME "/Data/IntArray"
#define DATASET_NAME1 "/Data/IntArray1"
using namespace std;
static int *icomp_vec;
int icomp(const void *p1, const void *p2)
{
int i = * (int *) p1;
int j = * (int *) p2;
return icomp_vec[i] > icomp_vec[j] ? 1:-1;
}
void isort(int n, int *x, int direction, int *indx)
{
int i;
assert(direction*direction==1);
icomp_vec = (int*)calloc(n,sizeof(int));
for (i=0; i<n; i++) {
icomp_vec[i] = direction*x[i];
indx[i] = i;
}
qsort(indx,n,sizeof(int),icomp);
free(icomp_vec);
}
int main(int argc, char** argv) {
hid_t file_id, dataspace_id, dataset_id, datatype_id, dsid, mid, dataspace_id1, dataset_id1, grp;
herr_t status;
hsize_t dims[2],dims_out[2];
hsize_t start[2],start_out[2]; /* Start of hyperslab */
hsize_t stride[2]; /* Stride of hyperslab */
hsize_t count[2]; /* Block count */
hsize_t block[2]; /* Block sizes */
clock_t t1,t2;
int out[200],out1[200];
int i, j, k, temp, D, W, NNZ, di, wi, value, p, q;
int *order,order1[2000],index1[2000];
time_t tt1,tt2;
ifstream fin("/home/gaoyang/E/deal_with_data/9494voc/enron_train.txt");
int myid, pnum;
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Info info = MPI_INFO_NULL;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &pnum);
fin>>D>>W>>NNZ;if(myid==0)cout<<D<<endl<<W<<endl<<NNZ<<endl;
int *index_wkx;
index_wkx=new int[D*W];
for(i=0;i<NNZ;i++)
{
fin>>di>>wi>>value;
index_wkx[(di-1)*W+wi-1]=value;
}
file_id=H5Fcreate(H5FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
grp=H5Gcreate2(file_id,"/Data",H5P_DEFAULT,H5P_DEFAULT,H5P_DEFAULT);
dims[0]=D;
dims[1]=W;
dataspace_id=H5Screate_simple(2, dims, NULL);
dataspace_id1=H5Screate_simple(2, dims, NULL);
dataset_id=H5Dcreate(file_id, DATASET_NAME, H5T_STD_I32BE, dataspace_id,
H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
dataset_id1=H5Dcreate(file_id, DATASET_NAME1, H5T_STD_I32BE, dataspace_id1,
H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
status=H5Dwrite(dataset_id, H5T_NATIVE_INT, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT, index_wkx);
status=H5Dwrite(dataset_id1, H5T_NATIVE_INT, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT, index_wkx);
stride[0]=1;stride[1]=1;
count[0]=1;count[1]=W;
block[0]=1;block[1]=1;
dims_out[0]=1;
dims_out[1]=W;
start_out[0]=0;start_out[1]=0;
mid=H5Screate_simple(2, dims_out, NULL);
status=H5Sselect_hyperslab(mid,H5S_SELECT_SET,start_out,stride,count,block);
if(myid==0)cout<<"ok"<<endl;
order=(int *)calloc(W,sizeof(int));
srand(time(NULL));
for(k=0;k<20;k++)
{
//file_id=H5Fopen(H5FILE_NAME, H5F_ACC_RDWR, H5P_DEFAULT);
if(k==0)
{
for(i=0;i<W;i++)
order[i]=i;
for(i=0;i<W-1;i++)
{
j=i+(int)((double)(W-i)*(double)(rand()%3000/3000.0));
temp=order[j];
order[j]=order[i];
order[i]=temp;
}
for(i=0;i<2000;i++)
order1[i]=order[i];
isort(2000, order1, 1, index1);
}
//if(myid==0) cout<<k<<","<<order1[index1[0]]<<","<<order1[index1[1]]<<endl;
/*if(myid==0)
{
for(p=0;p<100;p++)
cout<<order1[index1[p]]<<" ";
cout<<endl;
}*/
time(&tt1);
if(myid==0)
for(i=0;i<2000;i++)
{
start[0]=order1[index1[i]];
//start[0]=0;
start[1]=0;
status=H5Sselect_hyperslab(dataspace_id,H5S_SELECT_SET,start,stride,count,block);
status=H5Sselect_hyperslab(dataspace_id1,H5S_SELECT_SET,start,stride,count,block);
status=H5Dread(dataset_id, H5T_NATIVE_INT, mid, dataspace_id, H5P_DEFAULT, index_wkx+i*W);
status=H5Dread(dataset_id1, H5T_NATIVE_INT, mid, dataspace_id1, H5P_DEFAULT, index_wkx+i*W);
//for(j=0;j<W;j++)
//index_wkx[i*W+j]++;
/*if(i==0)
t1=clock();
for(q=0;q<10000;q++)
for(p=0;p<50;p++)
;
if(i==0 && k==0)
{
t2=clock();
cout<<(double)(t2-t1)/CLOCKS_PER_SEC;
}*/
status=H5Dwrite(dataset_id, H5T_NATIVE_INT, mid, dataspace_id, H5P_DEFAULT, index_wkx+i*W);
status=H5Dwrite(dataset_id1, H5T_NATIVE_INT, mid, dataspace_id1, H5P_DEFAULT, index_wkx+i*W);
}
time(&tt2);
if(myid==0)
printf("%d k=%d %f\n",myid,k,difftime(tt2,tt1));
MPI_Barrier(MPI_COMM_WORLD);
}
status=H5Dclose(dataset_id);
status=H5Sclose(dataspace_id);
status=H5Dclose(dataset_id1);
status=H5Sclose(dataspace_id1);
status=H5Fclose(file_id);
fin.close();
MPI_Finalize();
return 0;
}
_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://mail.hdfgroup.org/mailman/listinfo/hdf-forum_hdfgroup.org