Hello,
I am using an HDF5 packet table to save streaming data from a
photon-counting detector that outputs data as UDP packets. The basic
operation involves a one-time open and initialization of a .h5 file with a
packet table storing fixed length compound types. Then the data processing
loop runs and continually stores packets to the file by appending a packet
to the open packet table and performing an H5Fflush().
During a steady high rate data situation I consistently get a segfault in
libhdf5 at around the same file size each time (around 70 to 85 megabytes).
I'm estimating that I'm writing approximately 1636352 bytes/second to the
.h5 file during this process.
The process doing the work is running on Debian "squeeze" Linux 32-bit
(kernel 2.6.32-5). The process has the best nice priority and realtime IO
priority with a value of 1. The process is being run by the root user. The
disk being written to is an Apacer APS25P6B032G-DT industrial SSD with
great write specs. This problem occurs with two version of hdf5 that I have
tried. First, I was using the libhdf5-serial-1.8.4 that came as a standard
package on debian squeeze. I then compiled hdf5 1.8.11 (compiled with
CLFAGS=-O0 -g). I experience the same problem with 1.8.11 and finally ran
it with GDB to create the backtrace that is attached in this email.
I've now also compiled hdf5-1.8.4-patch1 (with CFLAGS=-O0 -g) and gotten a
backtrace which is attached as 1.8.4-backtrace.txt.
The source code for this program is located here:
https://github.com/cosmonaut/chess_tmif/tree/master/src
The file tmif_hdf5.c contains all of my hdf5-related code. The function
called repeatedly to store data is save_packet().
Does anyone have advice on further debugging or why this always crashes the
same way?
Thanks,
~Nick
--
Nicholas Nell
Professional Research Assistant
University of Colorado
[email protected]
303-492-5661
#0 0xb7d71eae in H5I_find_id (id=167772168) at H5I.c:2208
type_ptr = 0x8057a38
last_id = 0x0
id_ptr = 0x8075ca0
type = H5I_GENPROP_LST
hash_loc = 0
ret_value = 0x8075ca0
#1 0xb7d6ec6e in H5I_object (id=167772168) at H5I.c:1022
err_occurred = 0
id_ptr = 0x8075ca0
ret_value = 0x0
__func__ = "H5I_object"
#2 0xb7ca0df1 in H5D__pre_write (dset_id=83886080, mem_type_id=50331958,
mem_space_id=67108866, file_space_id=67108867, dxpl_id=167772168,
buf=0xbfffef28) at H5Dio.c:278
err_occurred = 0
dset = 0x8075cc8
plist = 0x1
direct_write = 0
ret_value = 0
__func__ = "H5D__pre_write"
#3 0xb7ca0b55 in H5Dwrite (dset_id=83886080, mem_type_id=50331958,
mem_space_id=67108866, file_space_id=67108867, dxpl_id=0, buf=0xbfffef28)
at H5Dio.c:233
err_occurred = 0
ret_value = 0
__func__ = "H5Dwrite"
#4 0xb7be1f5c in H5TB_common_append_records (dataset_id=83886080,
mem_type_id=50331958, nrecords=1, orig_table_size=49546, buf=0xbfffef28)
at H5TB.c:3638
count = {1}
offset = {49546}
sid = 67108867
m_sid = 67108866
dims = {49547}
mem_dims = {1}
#5 0xb7bd99ee in H5PTappend (table_id=234881024, nrecords=1, data=0xbfffef28)
at H5PT.c:438
table = 0x80753a8
#6 0x0804a4f4 in save_packet ()
No symbol table info available.
#7 0x08049a40 in main ()
No symbol table info available.
#0 0xb7d25b4c in H5FL_fac_malloc (head=0x8057c08) at H5FL.c:2115
FUNC = "H5FL_fac_malloc"
err_occurred = 0
ret_value = 0x604b45d0
#1 0xb7e3b9b4 in H5SL_new_node (item=0x8074980, key=0x8074980, hashval=0)
at H5SL.c:586
FUNC = "H5SL_new_node"
err_occurred = 0
ret_value = 0x80735f8
#2 0xb7e3e891 in H5SL_insert_common (slist=0x8073390, item=0x8074980,
key=0x8074980) at H5SL.c:679
FUNC = "H5SL_insert_common"
err_occurred = 0
x = 0x80735f8
prev = 0x805c3f8
hashval = 0
ret_value = 0x80735f8
#3 0xb7e3eef2 in H5SL_insert (slist=0x8073390, item=0x8074980, key=0x8074980)
at H5SL.c:966
FUNC = "H5SL_insert"
err_occurred = 0
ret_value = 0
#4 0xb7ca5efc in H5C_mark_pinned_or_protected_entry_dirty (
cache_ptr=0xb724e008, thing=0x8074980) at H5C.c:5546
FUNC = "H5C_mark_pinned_or_protected_entry_dirty"
err_occurred = 0
ret_value = 0
was_pinned_unprotected_and_clean = 1
entry_ptr = 0x8074980
#5 0xb7c81fa5 in H5AC_mark_pinned_or_protected_entry_dirty (f=0x8072cd8,
thing=0x8074980) at H5AC.c:1444
FUNC = "H5AC_mark_pinned_or_protected_entry_dirty"
err_occurred = 0
cache_ptr = 0xb724e008
result = 1381533014
ret_value = 0
#6 0xb7db1dfe in H5O_touch_oh (f=0x8072cd8, dxpl_id=167772168, oh=0x8074980,
force=0) at H5O.c:1733
FUNC = "H5O_touch_oh"
err_occurred = 0
now = 1381533014
ret_value = 0
#7 0xb7de4c6b in H5O_copy_mesg (f=0x8072cd8, dxpl_id=167772168, oh=0x8074980,
idx=0, type=0xb7fafcc0, mesg=0x8075b78, mesg_flags=0, update_flags=1)
at H5Omessage.c:1997
FUNC = "H5O_copy_mesg"
err_occurred = 0
idx_msg = 0x8074a30
ret_value = 0
#8 0xb7de1e42 in H5O_msg_write_real (f=0x8072cd8, dxpl_id=167772168,
oh=0x8074980, type=0xb7fafcc0, mesg_flags=0, update_flags=1,
mesg=0x8075b78) at H5Omessage.c:437
FUNC = "H5O_msg_write_real"
err_occurred = 0
idx_msg = 0x8074a30
idx = 0
ret_value = 0
#9 0xb7de1a2b in H5O_msg_write_oh (f=0x8072cd8, dxpl_id=167772168,
oh=0x8074980, type_id=1, mesg_flags=0, update_flags=1, mesg=0x8075b78)
at H5Omessage.c:342
FUNC = "H5O_msg_write_oh"
err_occurred = 0
type = 0xb7fafcc0
ret_value = 0
#10 0xb7e1fca0 in H5S_write (f=0x8072cd8, dxpl_id=167772168, oh=0x8074980,
update_flags=1, ds=0x8075b78) at H5S.c:960
FUNC = "H5S_write"
err_occurred = 0
ret_value = 0
#11 0xb7cd7e38 in H5D_flush_real (dataset=0x8074c68, dxpl_id=167772168)
at H5Dint.c:2266
update_flags = 1
FUNC = "H5D_flush_real"
err_occurred = 0
oh = 0x8074980
ret_value = 0
#12 0xb7cd804a in H5D_flush_cb (_dataset=0x8074c68, id=83886080,
_udata=0xbfffee50) at H5Dint.c:2321
FUNC = "H5D_flush_cb"
err_occurred = 0
dataset = 0x8074c68
udata = 0xbfffee50
ret_value = 0
#13 0xb7d992e2 in H5I_search (type=H5I_DATASET,
func=0xb7cd7fef <H5D_flush_cb>, key=0xbfffee50, app_ref=0) at H5I.c:1991
id_ptr = 0x80777d8
next_id = 0x0
i = 0
FUNC = "H5I_search"
err_occurred = 0
type_ptr = 0x8072568
ret_value = 0x0
#14 0xb7cd81a7 in H5D_flush (f=0x8072cd8, dxpl_id=167772168) at H5Dint.c:2359
FUNC = "H5D_flush"
err_occurred = 0
udata = {f = 0x8072cd8, dxpl_id = 167772168}
ret_value = 0
#15 0xb7cf9d04 in H5F_flush (f=0x8072cd8, dxpl_id=167772168) at H5F.c:1664
FUNC = "H5F_flush"
err_occurred = 0
ret_value = 0
#16 0xb7cf9bb3 in H5Fflush (object_id=16777216, scope=H5F_SCOPE_LOCAL)
at H5F.c:1630
FUNC = "H5Fflush"
err_occurred = 0
f = 0x8072cd8
oloc = 0x0
ret_value = 0
#17 0x0804a521 in save_packet (chess_pkt=0xbffff5d2) at tmif_hdf5.c:233
status = 0
data = {packet = {244, 22, 0, 8930, 11731, 38, 1729, 6250, 28, 6255,
12020, 56, 3502, 2514, 94, 4502, 3310, 59, 11611, 15097, 34, 7428,
1038, 62, 747, 10107, 32, 4050, 9191, 49, 11604, 12075, 17, 8794,
15565, 10, 10455, 9134, 54, 15175, 10120, 11, 14131, 3801, 60,
15169, 5249, 42, 1942, 6671, 94, 7956, 15098, 37, 11706, 3186, 13,
1558, 5409, 80, 2142, 4668, 47, 1616, 6201, 30, 2678, 7395, 43,
4407, 5623, 76, 2306, 8704, 70, 8012, 11918, 17, 9798, 7227, 46,
6755, 5566, 66, 5159, 15200, 36, 12257, 13441, 38, 8280, 3661, 38,
2589, 4201, 45, 4891, 2087, 54, 4149, 14210, 60, 10983, 15054, 14,
13630, 11633, 16, 2849, 7193, 51, 6438, 2586, 81, 7933, 9241, 32,
3014, 5759, 38, 1516, 10330, 20, 11739, 7766, 34, 3280, 3929, 50,
9889, 12967, 73, 15211, 10639, 42, 4993, 11161, 10, 3076, 7140,
53, 12050, 3669, 70, 2529, 10833, 13, 5780, 3178, 24, 5592, 8241,
47, 12407, 9070, 55, 4848, 6481, 38, 4405, 12083, 9, 10726, 2815,
71, 7567, 2562, 57, 7233, 3010, 74, 4865, 10372, 84, 14798, 6976,
16, 7442, 11071, 26, 4154, 3333, 75, 5398, 14832, 23, 10046, 6862,
47, 14182, 8589, 13, 10144, 2919, 89, 8540, 5625, 50, 10759,
10479...}, timestamp_s = 1381533014, timestamp_us = 487806}
ts = {tv_sec = 1381533014, tv_usec = 487806}
error = 0
#18 0x08049a40 in main () at tmif.c:378
dm7820_status = 0
output_board = 0x804f008
fifo_status = 0 '\000'
dma_buf = 0x804f0a8
dma_i = 18300
dma_chk = 4
sock_fd = 5
sock_status = 0
sin = {sin_family = 2, sin_port = 24810, sin_addr = {s_addr = 0},
sin_zero = "\000\000\000\000\000\000\000"}
sock_opts = 0
from_addr = {ss_family = 2, __ss_align = 83994816,
__ss_padding =
"\000\000\000\000\000\000\000\000\b\000\000\000\016\000\000\000\370&\253\267\310{÷.N=\366\060\030\262\267\b",
'\000' <repeats 11 times>,
"\001\000\000\000\223\b\000\000\b|÷\220u÷`\211\004\b(/\253\267\360\203\004\b\001\000\000\000\364\357\377\267\340\374\377\277\260\372\377\267\264\374\377\277\362\304\376\267\244\374\377\277\360\203\004\b\230\374\377\277T\372\377\267\000\000\000\000\b|÷"}
addr_len = 16
opt_status = 0
sock_nbytes = 1470
sock_so_rcvbuf = 16777216
optlen = 4
packet_buf = {244, 22, 0, 8930, 11731, 38, 1729, 6250, 28, 6255,
12020, 56, 3502, 2514, 94, 4502, 3310, 59, 11611, 15097, 34, 7428,
1038, 62, 747, 10107, 32, 4050, 9191, 49, 11604, 12075, 17, 8794,
15565, 10, 10455, 9134, 54, 15175, 10120, 11, 14131, 3801, 60,
15169, 5249, 42, 1942, 6671, 94, 7956, 15098, 37, 11706, 3186, 13,
1558, 5409, 80, 2142, 4668, 47, 1616, 6201, 30, 2678, 7395, 43,
4407, 5623, 76, 2306, 8704, 70, 8012, 11918, 17, 9798, 7227, 46,
6755, 5566, 66, 5159, 15200, 36, 12257, 13441, 38, 8280, 3661, 38,
2589, 4201, 45, 4891, 2087, 54, 4149, 14210, 60, 10983, 15054, 14,
13630, 11633, 16, 2849, 7193, 51, 6438, 2586, 81, 7933, 9241, 32,
3014, 5759, 38, 1516, 10330, 20, 11739, 7766, 34, 3280, 3929, 50,
9889, 12967, 73, 15211, 10639, 42, 4993, 11161, 10, 3076, 7140, 53,
12050, 3669, 70, 2529, 10833, 13, 5780, 3178, 24, 5592, 8241, 47,
12407, 9070, 55, 4848, 6481, 38, 4405, 12083, 9, 10726, 2815, 71,
7567, 2562, 57, 7233, 3010, 74, 4865, 10372, 84, 14798, 6976, 16,
7442, 11071, 26, 4154, 3333, 75, 5398, 14832, 23, 10046, 6862, 47,
14182, 8589, 13, 10144, 2919, 89, 8540, 5625, 50, 10759, 10479...}
pbufptr = 0xbffff5d2
sa_quit = {__sigaction_handler = {
sa_handler = 0x8049074 <signal_handler>,
sa_sigaction = 0x8049074 <signal_handler>}, sa_mask = {__val = {
0 <repeats 32 times>}}, sa_flags = 0, sa_restorer = 0}
tot_pkt_count = 39799
packet_counter = 22
packet_counter_s = 65532
i = 735
num_photons = 244
pkt_mismatch_cnt = 2
missed_pkts = 4294927519
status = 0
r_stack = {rlim_cur = 16777216, rlim_max = 4294967295}
tmif_stack = 16777216
pid = 27785
_______________________________________________
Hdf-forum is for HDF software users discussion.
[email protected]
http://mail.lists.hdfgroup.org/mailman/listinfo/hdf-forum_lists.hdfgroup.org