Greetings

I have encountered a "memory leak" problem using h5py and also netcdf4-python 
so I attempted to replicate the problem in C with hdf5 API. The problem is:
When using chunked storage memory usage grows linearly with read and write 
operations and is never released until the process is killed by the kernel. 
When I run the same test with no chunking memory use (as measured) is static.

I see the same situation with the C example (attached). But when doing the 
chunked read example if I select the same dataspace hyperslab for each read (as 
opposed to a new hyperslab for each iteration) then memory use stays static.

My test dataset is (250,400,300) and the test loops through extracting 
hyperslabs (1,1,300).

I have tested this with:
HDF5 Version: 1.8.13
Linux 2.6.32-431.11.2.el6.x86_64

The databases I used for test are ~120Mb, but I can upload similar with smaller 
dimensions if required.
Has anyone encountered this problem or can suggest where I am going wrong?
Any suggestions would be greatly appreciated.


Cheers
Stuart

#include <stdio.h>
#include <stdlib.h>
#include "hdf5.h"
#define H5FILE "test_read_chunked_000.h5"
//#define H5FILE "test_read_notchunked_000.h5"

typedef struct {
    unsigned long size,resident,share,text,lib,data,dt;
} statm_t;

int read_off_memory_status(statm_t *result)
{
  const char* statm_path = "/proc/self/statm";
  FILE *f = fopen(statm_path,"r");

  if(!f){
    perror(statm_path);
    abort();
  }

  if(7 != fscanf(f,"%ld %ld %ld %ld %ld %ld %ld",
    
&result->size,&result->resident,&result->share,&result->text,&result->lib,&result->data,&result->dt))
  {
    perror(statm_path);
    abort();
  }

  //printf("%d\n", result.resident);
  fclose(f);
  return result->resident;
}

int main() {

   hid_t       file, dataset;  /* identifiers */
   hid_t       dataspace, memspace;
   hsize_t     dims[3];
   hsize_t     mem_dim[]    = {300};
   hsize_t     mem_offset[] = {0};
   hsize_t     mem_count[]  = {300};
   hsize_t     data_offset[] = {0, 0, 0};
   hsize_t     data_count[]  = {1, 1, 300};
   statm_t    *pmem_use = malloc(sizeof(statm_t));
   int         rank;
   int         i, j;
   float       data_out[300];

   /* Open an existing file. */
   file = H5Fopen(H5FILE, H5F_ACC_RDONLY, H5P_DEFAULT);
   dataset = H5Dopen2(file, "data", H5P_DEFAULT);

   dataspace = H5Dget_space(dataset);
   rank      = H5Sget_simple_extent_ndims(dataspace);
   H5Sget_simple_extent_dims(dataspace, dims, NULL);
   printf("dataset rank %d, dimensions %lu x %lu x %lu\n",
      rank, (unsigned long)(dims[0]), (unsigned long)(dims[1]), (unsigned 
long)(dims[2]));


   memspace  = H5Screate_simple(1,mem_dim,NULL);

   H5Sselect_hyperslab(memspace, H5S_SELECT_SET, mem_offset, NULL,
                                mem_count, NULL);

   printf ("Reading data (1,1,300):\n");
   for (j = 0; j < 250; j++) {
      if (j % 5 == 0)
      printf("row : %d    mem : %d %d\n", j, pmem_use->resident, 
pmem_use->size);

      for (i = 0; i < 400; i++) {

         data_offset[0] = j;
         data_offset[1] = i;

         H5Sselect_hyperslab (dataspace, H5S_SELECT_SET, data_offset, NULL, 
                                       data_count, NULL);

         H5Dread(dataset, H5T_NATIVE_FLOAT, memspace, dataspace, H5P_DEFAULT, 
                          data_out);

         read_off_memory_status(pmem_use);
      }
   }
   /* End access to the dataset and release resources used by it. */
   H5Dclose(dataset);
   H5Sclose(dataspace);
   H5Sclose(memspace);
   H5Fclose(file);
   free(pmem_use);
}

_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://mail.lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5

Reply via email to