Hello,

I am using an HDF5 packet table to save streaming data from a
photon-counting detector that outputs data as UDP packets. The basic
operation involves a one-time open and initialization of a .h5 file with a
packet table storing fixed length compound types. Then the data processing
loop runs and continually stores packets to the file by appending a packet
to the open packet table and performing an H5Fflush().

During a steady high rate data situation I consistently get a segfault in
libhdf5 at around the same file size each time (around 70 to 85 megabytes).
I'm estimating that I'm writing approximately 1636352 bytes/second to the
.h5 file during this process.

The process doing the work is running on Debian  "squeeze" Linux 32-bit
(kernel 2.6.32-5). The process has the best nice priority and realtime IO
priority with a value of 1. The process is being run by the root user. The
disk being written to is an Apacer APS25P6B032G-DT industrial SSD with
great write specs. This problem occurs with two version of hdf5 that I have
tried. First, I was using the libhdf5-serial-1.8.4 that came as a standard
package on debian squeeze. I then compiled hdf5 1.8.11 (compiled with
CLFAGS=-O0 -g). I experience the same problem with 1.8.11 and finally ran
it with GDB to create the backtrace that is attached in this email.

I've now also compiled hdf5-1.8.4-patch1 (with CFLAGS=-O0 -g) and gotten a
backtrace which is attached as 1.8.4-backtrace.txt.

The source code for this program is located here:
https://github.com/cosmonaut/chess_tmif/tree/master/src

The file tmif_hdf5.c contains all of my hdf5-related code. The function
called repeatedly to store data is save_packet().

Does anyone have advice on further debugging or why this always crashes the
same way?

Thanks,
~Nick


-- 
Nicholas Nell
Professional Research Assistant
University of Colorado
[email protected]
303-492-5661
#0  0xb7d71eae in H5I_find_id (id=167772168) at H5I.c:2208
        type_ptr = 0x8057a38
        last_id = 0x0
        id_ptr = 0x8075ca0
        type = H5I_GENPROP_LST
        hash_loc = 0
        ret_value = 0x8075ca0
#1  0xb7d6ec6e in H5I_object (id=167772168) at H5I.c:1022
        err_occurred = 0
        id_ptr = 0x8075ca0
        ret_value = 0x0
        __func__ = "H5I_object"
#2  0xb7ca0df1 in H5D__pre_write (dset_id=83886080, mem_type_id=50331958, 
    mem_space_id=67108866, file_space_id=67108867, dxpl_id=167772168, 
    buf=0xbfffef28) at H5Dio.c:278
        err_occurred = 0
        dset = 0x8075cc8
        plist = 0x1
        direct_write = 0
        ret_value = 0
        __func__ = "H5D__pre_write"
#3  0xb7ca0b55 in H5Dwrite (dset_id=83886080, mem_type_id=50331958, 
    mem_space_id=67108866, file_space_id=67108867, dxpl_id=0, buf=0xbfffef28)
    at H5Dio.c:233
        err_occurred = 0
        ret_value = 0
        __func__ = "H5Dwrite"
#4  0xb7be1f5c in H5TB_common_append_records (dataset_id=83886080, 
    mem_type_id=50331958, nrecords=1, orig_table_size=49546, buf=0xbfffef28)
    at H5TB.c:3638
        count = {1}
        offset = {49546}
        sid = 67108867
        m_sid = 67108866
        dims = {49547}
        mem_dims = {1}
#5  0xb7bd99ee in H5PTappend (table_id=234881024, nrecords=1, data=0xbfffef28)
    at H5PT.c:438
        table = 0x80753a8
#6  0x0804a4f4 in save_packet ()
No symbol table info available.
#7  0x08049a40 in main ()
No symbol table info available.
#0  0xb7d25b4c in H5FL_fac_malloc (head=0x8057c08) at H5FL.c:2115
        FUNC = "H5FL_fac_malloc"
        err_occurred = 0
        ret_value = 0x604b45d0
#1  0xb7e3b9b4 in H5SL_new_node (item=0x8074980, key=0x8074980, hashval=0)
    at H5SL.c:586
        FUNC = "H5SL_new_node"
        err_occurred = 0
        ret_value = 0x80735f8
#2  0xb7e3e891 in H5SL_insert_common (slist=0x8073390, item=0x8074980, 
    key=0x8074980) at H5SL.c:679
        FUNC = "H5SL_insert_common"
        err_occurred = 0
        x = 0x80735f8
        prev = 0x805c3f8
        hashval = 0
        ret_value = 0x80735f8
#3  0xb7e3eef2 in H5SL_insert (slist=0x8073390, item=0x8074980, key=0x8074980)
    at H5SL.c:966
        FUNC = "H5SL_insert"
        err_occurred = 0
        ret_value = 0
#4  0xb7ca5efc in H5C_mark_pinned_or_protected_entry_dirty (
    cache_ptr=0xb724e008, thing=0x8074980) at H5C.c:5546
        FUNC = "H5C_mark_pinned_or_protected_entry_dirty"
        err_occurred = 0
        ret_value = 0
        was_pinned_unprotected_and_clean = 1
        entry_ptr = 0x8074980
#5  0xb7c81fa5 in H5AC_mark_pinned_or_protected_entry_dirty (f=0x8072cd8, 
    thing=0x8074980) at H5AC.c:1444
        FUNC = "H5AC_mark_pinned_or_protected_entry_dirty"
        err_occurred = 0
        cache_ptr = 0xb724e008
        result = 1381533014
        ret_value = 0
#6  0xb7db1dfe in H5O_touch_oh (f=0x8072cd8, dxpl_id=167772168, oh=0x8074980, 
    force=0) at H5O.c:1733
        FUNC = "H5O_touch_oh"
        err_occurred = 0
        now = 1381533014
        ret_value = 0
#7  0xb7de4c6b in H5O_copy_mesg (f=0x8072cd8, dxpl_id=167772168, oh=0x8074980, 
    idx=0, type=0xb7fafcc0, mesg=0x8075b78, mesg_flags=0, update_flags=1)
    at H5Omessage.c:1997
        FUNC = "H5O_copy_mesg"
        err_occurred = 0
        idx_msg = 0x8074a30
        ret_value = 0
#8  0xb7de1e42 in H5O_msg_write_real (f=0x8072cd8, dxpl_id=167772168, 
    oh=0x8074980, type=0xb7fafcc0, mesg_flags=0, update_flags=1, 
    mesg=0x8075b78) at H5Omessage.c:437
        FUNC = "H5O_msg_write_real"
        err_occurred = 0
        idx_msg = 0x8074a30
        idx = 0
        ret_value = 0
#9  0xb7de1a2b in H5O_msg_write_oh (f=0x8072cd8, dxpl_id=167772168, 
    oh=0x8074980, type_id=1, mesg_flags=0, update_flags=1, mesg=0x8075b78)
    at H5Omessage.c:342
        FUNC = "H5O_msg_write_oh"
        err_occurred = 0
        type = 0xb7fafcc0
        ret_value = 0
#10 0xb7e1fca0 in H5S_write (f=0x8072cd8, dxpl_id=167772168, oh=0x8074980, 
    update_flags=1, ds=0x8075b78) at H5S.c:960
        FUNC = "H5S_write"
        err_occurred = 0
        ret_value = 0
#11 0xb7cd7e38 in H5D_flush_real (dataset=0x8074c68, dxpl_id=167772168)
    at H5Dint.c:2266
        update_flags = 1
        FUNC = "H5D_flush_real"
        err_occurred = 0
        oh = 0x8074980
        ret_value = 0
#12 0xb7cd804a in H5D_flush_cb (_dataset=0x8074c68, id=83886080, 
    _udata=0xbfffee50) at H5Dint.c:2321
        FUNC = "H5D_flush_cb"
        err_occurred = 0
        dataset = 0x8074c68
        udata = 0xbfffee50
        ret_value = 0
#13 0xb7d992e2 in H5I_search (type=H5I_DATASET, 
    func=0xb7cd7fef <H5D_flush_cb>, key=0xbfffee50, app_ref=0) at H5I.c:1991
        id_ptr = 0x80777d8
        next_id = 0x0
        i = 0
        FUNC = "H5I_search"
        err_occurred = 0
        type_ptr = 0x8072568
        ret_value = 0x0
#14 0xb7cd81a7 in H5D_flush (f=0x8072cd8, dxpl_id=167772168) at H5Dint.c:2359
        FUNC = "H5D_flush"
        err_occurred = 0
        udata = {f = 0x8072cd8, dxpl_id = 167772168}
        ret_value = 0
#15 0xb7cf9d04 in H5F_flush (f=0x8072cd8, dxpl_id=167772168) at H5F.c:1664
        FUNC = "H5F_flush"
        err_occurred = 0
        ret_value = 0
#16 0xb7cf9bb3 in H5Fflush (object_id=16777216, scope=H5F_SCOPE_LOCAL)
    at H5F.c:1630
        FUNC = "H5Fflush"
        err_occurred = 0
        f = 0x8072cd8
        oloc = 0x0
        ret_value = 0
#17 0x0804a521 in save_packet (chess_pkt=0xbffff5d2) at tmif_hdf5.c:233
        status = 0
        data = {packet = {244, 22, 0, 8930, 11731, 38, 1729, 6250, 28, 6255, 
            12020, 56, 3502, 2514, 94, 4502, 3310, 59, 11611, 15097, 34, 7428, 
            1038, 62, 747, 10107, 32, 4050, 9191, 49, 11604, 12075, 17, 8794, 
            15565, 10, 10455, 9134, 54, 15175, 10120, 11, 14131, 3801, 60, 
            15169, 5249, 42, 1942, 6671, 94, 7956, 15098, 37, 11706, 3186, 13, 
            1558, 5409, 80, 2142, 4668, 47, 1616, 6201, 30, 2678, 7395, 43, 
            4407, 5623, 76, 2306, 8704, 70, 8012, 11918, 17, 9798, 7227, 46, 
            6755, 5566, 66, 5159, 15200, 36, 12257, 13441, 38, 8280, 3661, 38, 
            2589, 4201, 45, 4891, 2087, 54, 4149, 14210, 60, 10983, 15054, 14, 
            13630, 11633, 16, 2849, 7193, 51, 6438, 2586, 81, 7933, 9241, 32, 
            3014, 5759, 38, 1516, 10330, 20, 11739, 7766, 34, 3280, 3929, 50, 
            9889, 12967, 73, 15211, 10639, 42, 4993, 11161, 10, 3076, 7140, 
            53, 12050, 3669, 70, 2529, 10833, 13, 5780, 3178, 24, 5592, 8241, 
            47, 12407, 9070, 55, 4848, 6481, 38, 4405, 12083, 9, 10726, 2815, 
            71, 7567, 2562, 57, 7233, 3010, 74, 4865, 10372, 84, 14798, 6976, 
            16, 7442, 11071, 26, 4154, 3333, 75, 5398, 14832, 23, 10046, 6862, 
            47, 14182, 8589, 13, 10144, 2919, 89, 8540, 5625, 50, 10759, 
            10479...}, timestamp_s = 1381533014, timestamp_us = 487806}
        ts = {tv_sec = 1381533014, tv_usec = 487806}
        error = 0
#18 0x08049a40 in main () at tmif.c:378
        dm7820_status = 0
        output_board = 0x804f008
        fifo_status = 0 '\000'
        dma_buf = 0x804f0a8
        dma_i = 18300
        dma_chk = 4
        sock_fd = 5
        sock_status = 0
        sin = {sin_family = 2, sin_port = 24810, sin_addr = {s_addr = 0}, 
          sin_zero = "\000\000\000\000\000\000\000"}
        sock_opts = 0
        from_addr = {ss_family = 2, __ss_align = 83994816, 
          __ss_padding = 
"\000\000\000\000\000\000\000\000\b\000\000\000\016\000\000\000\370&\253\267\310{÷.N=\366\060\030\262\267\b",
 '\000' <repeats 11 times>, 
"\001\000\000\000\223\b\000\000\b|÷\220u÷`\211\004\b(/\253\267\360\203\004\b\001\000\000\000\364\357\377\267\340\374\377\277\260\372\377\267\264\374\377\277\362\304\376\267\244\374\377\277\360\203\004\b\230\374\377\277T\372\377\267\000\000\000\000\b|÷"}
        addr_len = 16
        opt_status = 0
        sock_nbytes = 1470
        sock_so_rcvbuf = 16777216
        optlen = 4
        packet_buf = {244, 22, 0, 8930, 11731, 38, 1729, 6250, 28, 6255, 
          12020, 56, 3502, 2514, 94, 4502, 3310, 59, 11611, 15097, 34, 7428, 
          1038, 62, 747, 10107, 32, 4050, 9191, 49, 11604, 12075, 17, 8794, 
          15565, 10, 10455, 9134, 54, 15175, 10120, 11, 14131, 3801, 60, 
          15169, 5249, 42, 1942, 6671, 94, 7956, 15098, 37, 11706, 3186, 13, 
          1558, 5409, 80, 2142, 4668, 47, 1616, 6201, 30, 2678, 7395, 43, 
          4407, 5623, 76, 2306, 8704, 70, 8012, 11918, 17, 9798, 7227, 46, 
          6755, 5566, 66, 5159, 15200, 36, 12257, 13441, 38, 8280, 3661, 38, 
          2589, 4201, 45, 4891, 2087, 54, 4149, 14210, 60, 10983, 15054, 14, 
          13630, 11633, 16, 2849, 7193, 51, 6438, 2586, 81, 7933, 9241, 32, 
          3014, 5759, 38, 1516, 10330, 20, 11739, 7766, 34, 3280, 3929, 50, 
          9889, 12967, 73, 15211, 10639, 42, 4993, 11161, 10, 3076, 7140, 53, 
          12050, 3669, 70, 2529, 10833, 13, 5780, 3178, 24, 5592, 8241, 47, 
          12407, 9070, 55, 4848, 6481, 38, 4405, 12083, 9, 10726, 2815, 71, 
          7567, 2562, 57, 7233, 3010, 74, 4865, 10372, 84, 14798, 6976, 16, 
          7442, 11071, 26, 4154, 3333, 75, 5398, 14832, 23, 10046, 6862, 47, 
          14182, 8589, 13, 10144, 2919, 89, 8540, 5625, 50, 10759, 10479...}
        pbufptr = 0xbffff5d2
        sa_quit = {__sigaction_handler = {
            sa_handler = 0x8049074 <signal_handler>, 
            sa_sigaction = 0x8049074 <signal_handler>}, sa_mask = {__val = {
              0 <repeats 32 times>}}, sa_flags = 0, sa_restorer = 0}
        tot_pkt_count = 39799
        packet_counter = 22
        packet_counter_s = 65532
        i = 735
        num_photons = 244
        pkt_mismatch_cnt = 2
        missed_pkts = 4294927519
        status = 0
        r_stack = {rlim_cur = 16777216, rlim_max = 4294967295}
        tmif_stack = 16777216
        pid = 27785
_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://mail.lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org

Reply via email to