I'm learning HDF5. I create a hdf file which has a group. This group has two dataset with the same size of 2D. I read and write the same several rows repeatly. However, I get the different speed. That's what confuse me.
#include <iostream> #include <time.h> #include <stdio.h> #include <stdlib.h> #include <math.h> #include <fstream> #include <assert.h> #include <vector> #include "hdf5.h" #include "mpi.h" #define H5FILE_NAME "test1.h5" #define DATASET_NAME "/Data/IntArray" #define DATASET_NAME1 "/Data/IntArray1" using namespace std;
static int *icomp_vec; int icomp(const void *p1, const void *p2) { int i = * (int *) p1; int j = * (int *) p2; return icomp_vec[i] > icomp_vec[j] ? 1:-1; } void isort(int n, int *x, int direction, int *indx) { int i; assert(direction*direction==1); icomp_vec = (int*)calloc(n,sizeof(int)); for (i=0; i<n; i++) { icomp_vec[i] = direction*x[i]; indx[i] = i; } qsort(indx,n,sizeof(int),icomp); free(icomp_vec); } int main(int argc, char** argv) { hid_t file_id, dataspace_id, dataset_id, datatype_id, dsid, mid, dataspace_id1, dataset_id1, grp; herr_t status; hsize_t dims[2],dims_out[2]; hsize_t start[2],start_out[2]; /* Start of hyperslab */ hsize_t stride[2]; /* Stride of hyperslab */ hsize_t count[2]; /* Block count */ hsize_t block[2]; /* Block sizes */ clock_t t1,t2; int out[200],out1[200]; int i, j, k, temp, D, W, NNZ, di, wi, value, p, q; int *order,order1[2000],index1[2000]; time_t tt1,tt2; ifstream fin("/home/gaoyang/E/deal_with_data/9494voc/enron_train.txt"); int myid, pnum; MPI_Comm comm = MPI_COMM_WORLD; MPI_Info info = MPI_INFO_NULL; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_size(MPI_COMM_WORLD, &pnum); fin>>D>>W>>NNZ;if(myid==0)cout<<D<<endl<<W<<endl<<NNZ<<endl; int *index_wkx; index_wkx=new int[D*W]; for(i=0;i<NNZ;i++) { fin>>di>>wi>>value; index_wkx[(di-1)*W+wi-1]=value; } file_id=H5Fcreate(H5FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); grp=H5Gcreate2(file_id,"/Data",H5P_DEFAULT,H5P_DEFAULT,H5P_DEFAULT); dims[0]=D; dims[1]=W; dataspace_id=H5Screate_simple(2, dims, NULL); dataspace_id1=H5Screate_simple(2, dims, NULL); dataset_id=H5Dcreate(file_id, DATASET_NAME, H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); dataset_id1=H5Dcreate(file_id, DATASET_NAME1, H5T_STD_I32BE, dataspace_id1, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); status=H5Dwrite(dataset_id, H5T_NATIVE_INT, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT, index_wkx); status=H5Dwrite(dataset_id1, H5T_NATIVE_INT, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT, index_wkx); stride[0]=1;stride[1]=1; count[0]=1;count[1]=W; block[0]=1;block[1]=1; dims_out[0]=1; dims_out[1]=W; start_out[0]=0;start_out[1]=0; mid=H5Screate_simple(2, dims_out, NULL); status=H5Sselect_hyperslab(mid,H5S_SELECT_SET,start_out,stride,count,block); if(myid==0)cout<<"ok"<<endl; order=(int *)calloc(W,sizeof(int)); srand(time(NULL)); for(k=0;k<20;k++) { //file_id=H5Fopen(H5FILE_NAME, H5F_ACC_RDWR, H5P_DEFAULT); if(k==0) { for(i=0;i<W;i++) order[i]=i; for(i=0;i<W-1;i++) { j=i+(int)((double)(W-i)*(double)(rand()%3000/3000.0)); temp=order[j]; order[j]=order[i]; order[i]=temp; } for(i=0;i<2000;i++) order1[i]=order[i]; isort(2000, order1, 1, index1); } //if(myid==0) cout<<k<<","<<order1[index1[0]]<<","<<order1[index1[1]]<<endl; /*if(myid==0) { for(p=0;p<100;p++) cout<<order1[index1[p]]<<" "; cout<<endl; }*/ time(&tt1); if(myid==0) for(i=0;i<2000;i++) { start[0]=order1[index1[i]]; //start[0]=0; start[1]=0; status=H5Sselect_hyperslab(dataspace_id,H5S_SELECT_SET,start,stride,count,block); status=H5Sselect_hyperslab(dataspace_id1,H5S_SELECT_SET,start,stride,count,block); status=H5Dread(dataset_id, H5T_NATIVE_INT, mid, dataspace_id, H5P_DEFAULT, index_wkx+i*W); status=H5Dread(dataset_id1, H5T_NATIVE_INT, mid, dataspace_id1, H5P_DEFAULT, index_wkx+i*W); //for(j=0;j<W;j++) //index_wkx[i*W+j]++; /*if(i==0) t1=clock(); for(q=0;q<10000;q++) for(p=0;p<50;p++) ; if(i==0 && k==0) { t2=clock(); cout<<(double)(t2-t1)/CLOCKS_PER_SEC; }*/ status=H5Dwrite(dataset_id, H5T_NATIVE_INT, mid, dataspace_id, H5P_DEFAULT, index_wkx+i*W); status=H5Dwrite(dataset_id1, H5T_NATIVE_INT, mid, dataspace_id1, H5P_DEFAULT, index_wkx+i*W); } time(&tt2); if(myid==0) printf("%d k=%d %f\n",myid,k,difftime(tt2,tt1)); MPI_Barrier(MPI_COMM_WORLD); } status=H5Dclose(dataset_id); status=H5Sclose(dataspace_id); status=H5Dclose(dataset_id1); status=H5Sclose(dataspace_id1); status=H5Fclose(file_id); fin.close(); MPI_Finalize(); return 0; }
_______________________________________________ Hdf-forum is for HDF software users discussion. Hdf-forum@hdfgroup.org http://mail.hdfgroup.org/mailman/listinfo/hdf-forum_hdfgroup.org