Hi Elena
On Mon, 2013-12-16 at 22:56 -0600, Elena Pourmal wrote:
> Hi Eugen,
> 
> It may be a feature (or a bug ;-)
> 
> In preparation for I/O HDF5 allocates internal structures for handling 
> chunks. The overhead for each chunk is a couple Ks (cmp. with the size of the 
> chunk!). After I/O is done, memory required for handling the structures is 
> put on a "free" list for reuse. 3GBs seems little-bit too much and we will 
> need to investigate. 

Ok. That's what I have expected. 

> 
> Meanwhile, could you please try to call H5garbage_collect 
> http://www.hdfgroup.org/HDF5/doc/RM/RM_H5.html#Library-GarbageCollect after 
> H5Dwrite to see if memory is released?

Did not have any effect. 

> 
> You may also try to write just one chunk after the "big" write. It should 
> also release memory.

No. It does not ;) - at least not in my case.

> 
> But as I said, we will need to look more closely at memory consumption and 
> see if any improvements/tuning/fixes could be done. I'll add the issue to our 
> database.

Attached is a new version of the program I have used for testing. 

regards
  Eugen

> 
> Thank you!
> 
> Elena
> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> Elena Pourmal  The HDF Group  http://hdfgroup.org   
> 1800 So. Oak St., Suite 203, Champaign IL 61820
> 217.531.6112
> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> 
> 
> 
> 
> On Dec 11, 2013, at 2:33 AM, "Wintersberger, Eugen" 
> <[email protected]> wrote:
> 
> > Hi there
> >  we (a colleague and I) stumbled upon an interesting behavior of the
> > HDF5 library (we are using 1.8.11 on Debian Wheezy). 
> > We try to write a single 2D array of data to an HDF5 dataset with an
> > absolutely stupid chunking scheme (see the test() function in the
> > attached source file). As a result the library allocates quite a lot of
> > memory  (around 3 GByte). What surprised us is that this memory is not
> > freed even after closing the file. Moreover, it does not grow when
> > calling the test() several times as can be seen in the output of the
> > attached program
> > 
> > ./test
> > Startup ...
> > RSS            - 6.880000e+02 kB
> > Shared Memory  - 5.200000e+02 kB
> > Private Memory - 1.680000e+02 kB
> > 
> > After first write ...
> > RSS            - 2.916884e+06 kB
> > Shared Memory  - 2.160000e+03 kB
> > Private Memory - 2.914724e+06 kB
> > 
> > After second write ...
> > RSS            - 2.921896e+06 kB
> > Shared Memory  - 2.160000e+03 kB
> > Private Memory - 2.919736e+06 kB
> > 
> > Obviously this is not a resource leak in the classical sense. My
> > suspicion is that the memory is occupied by some persistent cache. 
> > Which leads me to my question: is there a possibility to free this
> > memory?
> > 
> > regards
> >  Eugen
> > 
> > -- 
> > ---------------------------------------
> > DI. Dr. Eugen Wintersberger  
> > 
> > FS-EC                        
> > DESY                     
> > Notkestr. 85             
> > D-22607 Hamburg          
> > Germany                  
> > 
> > E-Mail: [email protected] 
> > Telefon: +49-40-8998-1917           
> > ---------------------------------------
> > <test.c>_______________________________________________
> > Hdf-forum is for HDF software users discussion.
> > [email protected]
> > http://mail.lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org
> 
> 
> _______________________________________________
> Hdf-forum is for HDF software users discussion.
> [email protected]
> http://mail.lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org

#include <stdlib.h>
#include <stdio.h>
#include <hdf5.h>
#include <time.h>
#include <unistd.h>

#define NX 1024
#define NY 2048
#define NTOT NX*NY
#define NCX 1
#define NCY 4

typedef unsigned short data_type;

//-----------------------------------------------------------------------------
hid_t create_dataset(hid_t parent,const char *name,hid_t type_id,hid_t
        space_id,hsize_t *chunk)
{
    //setup the creation property list for the dataset
    hid_t cplist = H5Pcreate(H5P_DATASET_CREATE);
    H5Pset_layout(cplist,H5D_CHUNKED);
    H5Pset_chunk(cplist,2,chunk);
    hid_t dset_id;

    if(chunk!=NULL)
    {
        fprintf(stdout,"Create dataset with chunks ...\n");
        dset_id = H5Dcreate(parent,name,type_id,space_id,H5P_DEFAULT,cplist,
                              H5P_DEFAULT);
    }
    else 
    {
        fprintf(stdout,"Create dataset without chunks ...\n");
        dset_id = H5Dcreate(parent,name,type_id,space_id,H5P_DEFAULT,H5P_DEFAULT,
                              H5P_DEFAULT);
    }

    H5Pclose(cplist);
    return dset_id;
}

//-----------------------------------------------------------------------------
void test(int gbcollect,int rewrite_first)
{

    hsize_t shape[2] = {NX,NY};    //shape of the dataset 
    hsize_t chunk[2] = {NCX,NCY};  //stupid chunk shape for the dataset

    //create a new file
    hid_t fid = H5Fcreate("test2.h5",H5F_ACC_TRUNC,H5P_DEFAULT,H5P_DEFAULT);

    //create type and data space
    hid_t type_id = H5Tcopy(H5T_NATIVE_USHORT);
    hid_t space_id = H5Screate_simple(2,shape,shape);
    hid_t dset_id = create_dataset(fid,"test",type_id,space_id,chunk);

    //generate data
    unsigned short *data = malloc(sizeof(data_type)*NTOT);
    for(size_t i=0;i<NTOT;++i) data[i] = 123;

    //write data to disk
    H5Dwrite(dset_id,type_id,H5S_ALL,H5S_ALL,H5P_DEFAULT,data);

    if(gbcollect)
    {
        fprintf(stdout,"Running garbage collector ...\n");
        H5garbage_collect();
    }

    if(rewrite_first)
    {
        hsize_t start[2]  = {0,0};
        hsize_t count[2]  = {1,1};
        H5Sselect_hyperslab(space_id,H5S_SELECT_SET,start,NULL,count,NULL);
        data_type buffer = 1;
        H5Dwrite(dset_id,type_id,H5S_ALL,space_id,H5P_DEFAULT,&buffer);
    }

    //close everything and release memory
    H5Dclose(dset_id);
    H5Tclose(type_id);
    H5Sclose(space_id);
    H5Fflush(fid,H5F_SCOPE_GLOBAL);
    H5Fclose(fid);
    free(data);
}

//-----------------------------------------------------------------------------
void check()
{
    //read the programs statm file from the proc file system
    FILE *f = fopen("/proc/self/statm","r");
    size_t size,resident,share;
    fscanf(f,"%lu %lu %lu",&size,&resident,&share);
    fclose(f);

    long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; 
    double rss = resident * page_size_kb;
    fprintf(stdout,"RSS            - %e kB\n",rss);

    double shared_mem = share * page_size_kb;
    fprintf(stdout,"Shared Memory  - %e kB\n",shared_mem);
    fprintf(stdout,"Private Memory - %e kB\n",rss - shared_mem);
}

//-----------------------------------------------------------------------------
int main(int argc,char **argv)
{
    fprintf(stdout,"Startup ...\n"); 
    check();
#ifdef GARBAGE_COLLECT
    test(1,0);
#else
    test(0,1);
#endif
    fprintf(stdout,"\nAfter first write ...\n");
    check();
#ifdef GARBAGE_COLLECT
    test(1,0);
#else
    test(0,1);
#endif
    fprintf(stdout,"\nAfter second write ...\n");
    check();

    return 0;
}

Attachment: signature.asc
Description: This is a digitally signed message part

_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://mail.lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org

Reply via email to