Hello, the string_val field look ok. But p variable which is assigned that value a line before the reported crash is not, as reported by bt full:
p = 0xcfcdffaf <Address 0xcfcdffaf out of bounds> Do you have enabled core files per process/pid? I still wonder if it is a overwritten core. Otherwise, there might be physical problems with memory... By pointer addresses, I guess it is a 32b CPU, right? Cheers, Daniel On 14/01/15 11:30, Kristian F. Høgh wrote: > Hi Daniel, > > (gdb) p *row->values > $1 = {type = DB1_STRING, nul = 0, free = 0, val = {int_val = 58055112, ll_val > = 58055112, double_val = 2.8683036404665902e-316, time_val = 58055112, > string_val = 0x375d9c8 > "sip:44221xxx@192.168.1.157:37612;rinstance=8c613319a30caff6;transport=TCP", > str_val = { > s = 0x375d9c8 > "sip:44221xxx@192.168.1.157:37612;rinstance=8c613319a30caff6;transport=TCP", > len = 0}, blob_val = {s = 0x375d9c8 > "sip:44221xxx@192.168.1.157:37612;rinstance=8c613319a30caff6;transport=TCP", > len = 0}, bitmap_val = 58055112}} > > (gdb) p i > $2 = <value optimized out> > > Regards, > Kristian. > > > On Wednesday 14 January 2015 11:13:43 Daniel-Constantin Mierla wrote: >> Hello, >> >> get also: >> >> p *row->values >> p i >> >> Cheers, >> Daniel >> >> On 14/01/15 10:50, Kristian F. Høgh wrote: >>> Hi Daniel, >>> >>> (gdb) p *row >>> $1 = {values = 0x9bd2b8, n = 13} >>> >>> Regards, >>> Kristian. >>> >>> >>> On Wednesday 14 January 2015 10:01:41 Daniel-Constantin Mierla wrote: >>>> Hello, >>>> >>>> the operation done by that code is practically a sql query, fetching the >>>> result and packing it for nathelper usage. It is no race between it and >>>> handling any sip packet, no shared memory accessed. >>>> >>>> Do you get only one corefile? >>>> >>>> From the backtrace it looks like row pointer is not ok, can you get the >>>> output for: >>>> >>>> p *row >>>> >>>> Cheers, >>>> Daniel >>>> >>>> On 13/01/15 15:08, Kristian F. Høgh wrote: >>>>> Hi, >>>>> >>>>> On an ancient kamailio 3.1, we occasionally experience crash in the >>>>> function get_all_db_ucontacts in usrloc. >>>>> >>>>> A backtrace reveals the following: >>>>> #0 0x00002b505f2fb4ff in get_all_db_ucontacts (buf=0xadd270, len=<value >>>>> optimized out>, flags=64, part_idx=1, part_max=30) at dlist.c:176 >>>>> 176 if (VAL_NULL(ROW_VALUES(row)+4) || p1==0 >>>>> || p1[0]==0){ >>>>> >>>>> >>>>> The lines around line 176 (which look similar to trunk, except p and p1 >>>>> are str today) >>>>> 159 for(i = 0; i < RES_ROW_N(res); i++) { >>>>> 160 row = RES_ROWS(res) + i; >>>>> 161 >>>>> 162 /* received */ >>>>> 163 p = (char*)VAL_STRING(ROW_VALUES(row)); >>>>> 164 if ( VAL_NULL(ROW_VALUES(row)) || p==0 || >>>>> p[0]==0 ) { >>>>> 165 /* contact */ >>>>> 166 p = >>>>> (char*)VAL_STRING(ROW_VALUES(row)+1); >>>>> 167 if (VAL_NULL(ROW_VALUES(row)+1) >>>>> || p==0 || p[0]==0) { >>>>> 168 LM_ERR("empty contact -> >>>>> skipping\n"); >>>>> 169 continue; >>>>> 170 } >>>>> 171 } >>>>> 172 p_len = strlen(p); >>>>> 173 >>>>> 174 /* path */ >>>>> 175 p1 = (char*)VAL_STRING(ROW_VALUES(row)+4); >>>>> 176 if (VAL_NULL(ROW_VALUES(row)+4) || p1==0 >>>>> || p1[0]==0){ >>>>> 177 p1 = NULL; >>>>> 178 p1_len = 0; >>>>> 179 } else { >>>>> 180 p1_len = strlen(p1); >>>>> 181 } >>>>> >>>>> The client from frame #2 (bt full below), send a REGISTER, which was >>>>> replied "200 OK" at the same second as the crash. >>>>> Is it a timing issue between REGISTER and nathelper ping? >>>>> >>>>> I have a trace and the core. >>>>> >>>>> Regards, >>>>> Kristian Høgh >>>>> Uni-tel >>>>> >>>>> >>>>> >>>>> bt full >>>>> #0 0x00002b505f2fb4ff in get_all_db_ucontacts (buf=0xadd270, len=<value >>>>> optimized out>, flags=64, part_idx=1, part_max=30) at dlist.c:176 >>>>> sock = 0x0 >>>>> dbflags = 11088 >>>>> host = {s = 0x375ef8e "PROXY_IP:5060", len = 13} >>>>> i = <value optimized out> >>>>> row = 0xa13488 >>>>> dom = 0x2b5062723278 >>>>> p1 = 0x2 <Address 0x2 out of bounds> >>>>> now_s = "'2015-01-13 12:58:51'\177\000\000)" >>>>> port = 11088 >>>>> p_len = 73 >>>>> cp = <value optimized out> >>>>> needed = 16 >>>>> res = 0xa2ed38 >>>>> p = 0xcfcdffaf <Address 0xcfcdffaf out of bounds> >>>>> now_len = 21 >>>>> proto = 1642847400 >>>>> p1_len = 0 >>>>> shortage = 0 >>>>> query_buf = "select received, contact, socket, cflags, path from >>>>> location where expires > '2015-01-13 12:58:51' and cflags & 64 = 64 and >>>>> id % 30 = 1", '\000' <repeats 376 times> >>>>> query_str = {s = 0x2b505f515520 "select received, contact, >>>>> socket, cflags, path from location where expires > '2015-01-13 12:58:51' >>>>> and cflags & 64 = 64 and id % 30 = 1", len = 135} >>>>> #1 get_all_ucontacts (buf=0xadd270, len=<value optimized out>, flags=64, >>>>> part_idx=1, part_max=30) at dlist.c:398 >>>>> No locals. >>>>> #2 0x00002b5061eb6b1a in nh_timer (ticks=<value optimized out>, >>>>> timer_idx=<value optimized out>) at nathelper.c:1623 >>>>> rval = <value optimized out> >>>>> buf = 0xadd270 >>>>> cp = <value optimized out> >>>>> c = {s = 0x0, len = 0} >>>>> opt = {s = 0x0, len = 0} >>>>> path = {s = 0x0, len = 0} >>>>> curi = {user = {s = 0x0, len = 0}, passwd = {s = 0x0, len = 0}, >>>>> host = {s = 0xadd836 "CLIENT_IP:62252\250u\220", len = 14}, port = {s = >>>>> 0xadd845 "62252\250u\220", len = 5}, params = {s = 0x0, len = 0}, >>>>> sip_params = {s = 0x0, len = 0}, headers = { >>>>> s = 0x7fff110134b0 "", len = 566862356}, port_no = 50336, >>>>> proto = 70, type = ERROR_URI_T, flags = 65536, transport = {s = 0x8cbfa0 >>>>> "", len = 5441948}, ttl = {s = 0x7fff1101352f "", len = 4448297}, >>>>> user_param = {s = 0x375ef8e "PROXY_IP:5060", len = 13}, >>>>> maddr = {s = 0x9204b0 "sip3", len = 10677560}, method = {s = >>>>> 0x0, len = 0}, lr = {s = 0xadd84a "\250u\220", len = 0}, r2 = {s = >>>>> 0xa2ebc0 "\250\272\252", len = 285291296}, transport_val = {s = 0x40 >>>>> <Address 0x40 out of bounds>, len = 335544320}, ttl_val = { >>>>> s = 0x3221c302d0 "H\307\300\017", len = 0}, user_param_val = >>>>> {s = 0x0, len = 0}, maddr_val = {s = 0x0, len = 0}, method_val = {s = >>>>> 0x0, len = 0}, lr_val = {s = 0x0, len = 0}, r2_val = {s = 0x0, len = 0}} >>>>> he = <value optimized out> >>>>> send_sock = <value optimized out> >>>>> flags = <value optimized out> >>>>> proto = <value optimized out> >>>>> dst = {send_sock = 0x0, to = {s = {sa_family = 0, sa_data = >>>>> '\000' <repeats 13 times>}, sin = {sin_family = 0, sin_port = 0, sin_addr >>>>> = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, sin6 = >>>>> {sin6_family = 0, sin6_port = 0, sin6_flowinfo = 0, >>>>> sin6_addr = {in6_u = {u6_addr8 = '\000' <repeats 15 times>, >>>>> u6_addr16 = {0, 0, 0, 0, 0, 0, 0, 0}, u6_addr32 = {0, 0, 0, 0}}}, >>>>> sin6_scope_id = 0}}, id = 0, proto = 0 '\000', send_flags = {f = 0 >>>>> '\000', blst_imask = 0 '\000'}} >>>>> iteration = 1 >>>>> __FUNCTION__ = "nh_timer" >>>>> #3 0x0000000000521c54 in fork_dummy_timer (child_id=<value optimized >>>>> out>, desc=<value optimized out>, make_sock=<value optimized out>, >>>>> f=0x2b5061eb6a70 <nh_timer>, param=0x0, interval=1) at timer_proc.c:71 >>>>> pid = <value optimized out> >>>>> #4 0x00002b5061eb42ee in child_init (rank=<value optimized out>) at >>>>> nathelper.c:673 >>>>> i = 58055190 >>>>> #5 0x00000000004db1d0 in init_mod_child (m=0x917dc0, rank=0) at >>>>> sr_module.c:829 >>>>> No locals. >>>>> #6 0x00000000004db1a9 in init_mod_child (m=0x918420, rank=0) at >>>>> sr_module.c:826 >>>>> No locals. >>>>> #7 0x00000000004db1a9 in init_mod_child (m=0x918aa0, rank=0) at >>>>> sr_module.c:826 >>>>> No locals. >>>>> >>>>> >>>>> >>>>> _______________________________________________ >>>>> SIP Express Router (SER) and Kamailio (OpenSER) - sr-users mailing list >>>>> sr-users@lists.sip-router.org >>>>> http://lists.sip-router.org/cgi-bin/mailman/listinfo/sr-users >> -- Daniel-Constantin Mierla http://twitter.com/#!/miconda - http://www.linkedin.com/in/miconda _______________________________________________ SIP Express Router (SER) and Kamailio (OpenSER) - sr-users mailing list sr-users@lists.sip-router.org http://lists.sip-router.org/cgi-bin/mailman/listinfo/sr-users