Greetings
I have encountered a "memory leak" problem using h5py and also netcdf4-python
so I attempted to replicate the problem in C with hdf5 API. The problem is:
When using chunked storage memory usage grows linearly with read and write
operations and is never released until the process is killed by the kernel.
When I run the same test with no chunking memory use (as measured) is static.
I see the same situation with the C example (attached). But when doing the
chunked read example if I select the same dataspace hyperslab for each read (as
opposed to a new hyperslab for each iteration) then memory use stays static.
My test dataset is (250,400,300) and the test loops through extracting
hyperslabs (1,1,300).
I have tested this with:
HDF5 Version: 1.8.13
Linux 2.6.32-431.11.2.el6.x86_64
The databases I used for test are ~120Mb, but I can upload similar with smaller
dimensions if required.
Has anyone encountered this problem or can suggest where I am going wrong?
Any suggestions would be greatly appreciated.
Cheers
Stuart
#include <stdio.h>
#include <stdlib.h>
#include "hdf5.h"
#define H5FILE "test_read_chunked_000.h5"
//#define H5FILE "test_read_notchunked_000.h5"
typedef struct {
unsigned long size,resident,share,text,lib,data,dt;
} statm_t;
int read_off_memory_status(statm_t *result)
{
const char* statm_path = "/proc/self/statm";
FILE *f = fopen(statm_path,"r");
if(!f){
perror(statm_path);
abort();
}
if(7 != fscanf(f,"%ld %ld %ld %ld %ld %ld %ld",
&result->size,&result->resident,&result->share,&result->text,&result->lib,&result->data,&result->dt))
{
perror(statm_path);
abort();
}
//printf("%d\n", result.resident);
fclose(f);
return result->resident;
}
int main() {
hid_t file, dataset; /* identifiers */
hid_t dataspace, memspace;
hsize_t dims[3];
hsize_t mem_dim[] = {300};
hsize_t mem_offset[] = {0};
hsize_t mem_count[] = {300};
hsize_t data_offset[] = {0, 0, 0};
hsize_t data_count[] = {1, 1, 300};
statm_t *pmem_use = malloc(sizeof(statm_t));
int rank;
int i, j;
float data_out[300];
/* Open an existing file. */
file = H5Fopen(H5FILE, H5F_ACC_RDONLY, H5P_DEFAULT);
dataset = H5Dopen2(file, "data", H5P_DEFAULT);
dataspace = H5Dget_space(dataset);
rank = H5Sget_simple_extent_ndims(dataspace);
H5Sget_simple_extent_dims(dataspace, dims, NULL);
printf("dataset rank %d, dimensions %lu x %lu x %lu\n",
rank, (unsigned long)(dims[0]), (unsigned long)(dims[1]), (unsigned
long)(dims[2]));
memspace = H5Screate_simple(1,mem_dim,NULL);
H5Sselect_hyperslab(memspace, H5S_SELECT_SET, mem_offset, NULL,
mem_count, NULL);
printf ("Reading data (1,1,300):\n");
for (j = 0; j < 250; j++) {
if (j % 5 == 0)
printf("row : %d mem : %d %d\n", j, pmem_use->resident,
pmem_use->size);
for (i = 0; i < 400; i++) {
data_offset[0] = j;
data_offset[1] = i;
H5Sselect_hyperslab (dataspace, H5S_SELECT_SET, data_offset, NULL,
data_count, NULL);
H5Dread(dataset, H5T_NATIVE_FLOAT, memspace, dataspace, H5P_DEFAULT,
data_out);
read_off_memory_status(pmem_use);
}
}
/* End access to the dataset and release resources used by it. */
H5Dclose(dataset);
H5Sclose(dataspace);
H5Sclose(memspace);
H5Fclose(file);
free(pmem_use);
}
_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://mail.lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
Twitter: https://twitter.com/hdf5