I'm learning HDF5. I create a hdf file which has a group. This group has
two dataset with the same size of 2D. I read and write the same several
rows repeatly. However, I get the different speed. That's what confuse me.
#include <iostream>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <fstream>
#include <assert.h>
#include <vector>
#include "hdf5.h"
#include "mpi.h"
#define H5FILE_NAME "test1.h5"
#define DATASET_NAME "/Data/IntArray"
#define DATASET_NAME1 "/Data/IntArray1"
using namespace std;

static int *icomp_vec;

int icomp(const void *p1, const void *p2)
{
	int i = * (int *) p1;
	int j = * (int *) p2;
	return icomp_vec[i] > icomp_vec[j] ? 1:-1;
}

void isort(int n, int *x, int direction, int *indx) 
{
	int i;
	assert(direction*direction==1);
	icomp_vec = (int*)calloc(n,sizeof(int));
	for (i=0; i<n; i++) {
		icomp_vec[i] = direction*x[i];
		indx[i] = i;
	}
	qsort(indx,n,sizeof(int),icomp);
	free(icomp_vec);
}


int main(int argc, char** argv) {

	hid_t file_id, dataspace_id, dataset_id, datatype_id, dsid, mid, dataspace_id1, dataset_id1, grp;
	herr_t status;
	hsize_t dims[2],dims_out[2];

	hsize_t start[2],start_out[2];  /* Start of hyperslab */
   	hsize_t stride[2]; /* Stride of hyperslab */
   	hsize_t count[2];  /* Block count */
   	hsize_t block[2];  /* Block sizes */

	clock_t t1,t2;
	int out[200],out1[200];
	int i, j, k, temp, D, W, NNZ, di, wi, value, p, q;
	int *order,order1[2000],index1[2000];
	time_t tt1,tt2;

	ifstream fin("/home/gaoyang/E/deal_with_data/9494voc/enron_train.txt");
	
	int myid, pnum;
	MPI_Comm comm  = MPI_COMM_WORLD;
        MPI_Info info  = MPI_INFO_NULL;
	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &myid);
	MPI_Comm_size(MPI_COMM_WORLD, &pnum);


	fin>>D>>W>>NNZ;if(myid==0)cout<<D<<endl<<W<<endl<<NNZ<<endl;
	int *index_wkx;
	index_wkx=new int[D*W];
	for(i=0;i<NNZ;i++)
	{
	  fin>>di>>wi>>value;
	  index_wkx[(di-1)*W+wi-1]=value;
	}
	
	file_id=H5Fcreate(H5FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
	grp=H5Gcreate2(file_id,"/Data",H5P_DEFAULT,H5P_DEFAULT,H5P_DEFAULT);
	
	dims[0]=D;
	dims[1]=W;
	dataspace_id=H5Screate_simple(2, dims, NULL);
	dataspace_id1=H5Screate_simple(2, dims, NULL);
	dataset_id=H5Dcreate(file_id, DATASET_NAME, H5T_STD_I32BE, dataspace_id, 
                          H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
	dataset_id1=H5Dcreate(file_id, DATASET_NAME1, H5T_STD_I32BE, dataspace_id1, 
                          H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
	status=H5Dwrite(dataset_id, H5T_NATIVE_INT, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT, index_wkx);
	status=H5Dwrite(dataset_id1, H5T_NATIVE_INT, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT, index_wkx);
	stride[0]=1;stride[1]=1;
	count[0]=1;count[1]=W;
	block[0]=1;block[1]=1;
	dims_out[0]=1;
	dims_out[1]=W;
	start_out[0]=0;start_out[1]=0;
	mid=H5Screate_simple(2, dims_out, NULL);
        status=H5Sselect_hyperslab(mid,H5S_SELECT_SET,start_out,stride,count,block);
	if(myid==0)cout<<"ok"<<endl;
	

	order=(int *)calloc(W,sizeof(int));
	srand(time(NULL));
	for(k=0;k<20;k++)
	{
	//file_id=H5Fopen(H5FILE_NAME, H5F_ACC_RDWR, H5P_DEFAULT);
	if(k==0)
	{
	for(i=0;i<W;i++)
		order[i]=i;
	for(i=0;i<W-1;i++)
	{
		j=i+(int)((double)(W-i)*(double)(rand()%3000/3000.0));
		temp=order[j];
		order[j]=order[i];
		order[i]=temp;
	}
	for(i=0;i<2000;i++)
		order1[i]=order[i];
	isort(2000, order1, 1, index1);
	}			
	//if(myid==0) cout<<k<<","<<order1[index1[0]]<<","<<order1[index1[1]]<<endl; 

	/*if(myid==0)
	{
		for(p=0;p<100;p++)
			cout<<order1[index1[p]]<<" "; 
		cout<<endl;
	}*/
	time(&tt1);
	if(myid==0)
	for(i=0;i<2000;i++)
	{
	    	start[0]=order1[index1[i]];
		//start[0]=0;
		start[1]=0;
	    	status=H5Sselect_hyperslab(dataspace_id,H5S_SELECT_SET,start,stride,count,block);
	    	status=H5Sselect_hyperslab(dataspace_id1,H5S_SELECT_SET,start,stride,count,block);		
		status=H5Dread(dataset_id, H5T_NATIVE_INT, mid, dataspace_id, H5P_DEFAULT, index_wkx+i*W);
		status=H5Dread(dataset_id1, H5T_NATIVE_INT, mid, dataspace_id1, H5P_DEFAULT, index_wkx+i*W);		
		//for(j=0;j<W;j++)
			//index_wkx[i*W+j]++;
		/*if(i==0)
			t1=clock();	
		for(q=0;q<10000;q++)
			for(p=0;p<50;p++)
				;
		if(i==0 && k==0)
		{
			t2=clock();
			cout<<(double)(t2-t1)/CLOCKS_PER_SEC;
		}*/
		status=H5Dwrite(dataset_id, H5T_NATIVE_INT, mid, dataspace_id, H5P_DEFAULT, index_wkx+i*W);
		status=H5Dwrite(dataset_id1, H5T_NATIVE_INT, mid, dataspace_id1, H5P_DEFAULT, index_wkx+i*W);	
	}
	time(&tt2);
	if(myid==0)
		printf("%d k=%d %f\n",myid,k,difftime(tt2,tt1));
	MPI_Barrier(MPI_COMM_WORLD);
	}
		
	status=H5Dclose(dataset_id);
	status=H5Sclose(dataspace_id);
	status=H5Dclose(dataset_id1);
	status=H5Sclose(dataspace_id1);
	status=H5Fclose(file_id); 
	fin.close();

	MPI_Finalize();
	return 0;
}

_______________________________________________
Hdf-forum is for HDF software users discussion.
Hdf-forum@hdfgroup.org
http://mail.hdfgroup.org/mailman/listinfo/hdf-forum_hdfgroup.org

Reply via email to