[jira] [Updated] (TS-1545) Possible crash in records stat snap
[ https://issues.apache.org/jira/browse/TS-1545?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Bryan Call updated TS-1545: --- Fix Version/s: (was: sometime) 7.1.0 > Possible crash in records stat snap > --- > > Key: TS-1545 > URL: https://issues.apache.org/jira/browse/TS-1545 > Project: Traffic Server > Issue Type: Bug > Components: Core, Metrics >Affects Versions: 3.3.0 > Environment: debug build, long time regression testing >Reporter: Zhao Yongming >Assignee: Phil Sorber > Labels: Crash > Fix For: 7.1.0 > > Attachments: records.snap, stats.snap > > > when I running regression testing in the loop script, we come to fail to > start the server. well, the codes may handle the issue when we build without > --enable-debug, but that looks like we have something may break records snap. > open this issue in case someone need it. > {code} > [TrafficServer] using root directory '/opt/ats' > FATAL: RecMessage.cc:426: failed assert `eh->magic == REC_MESSAGE_ELE_MAGIC` > /opt/ats/bin/traffic_server - STACK TRACE: > /opt/ats/lib/libtsutil.so.3(ink_fatal_die+0x0)[0x77baeca1] > /opt/ats/lib/libtsutil.so.3(_Z12ink_get_randv+0x0)[0x77badbb8] > /opt/ats/bin/traffic_server(_Z23RecMessageUnmarshalNextP13RecMessageHdrP13RecMessageItrPP9RecRecord+0xbf)[0x6ed860] > /opt/ats/bin/traffic_server(_Z16RecReadStatsFilev+0xc1)[0x6e5e36] > /opt/ats/bin/traffic_server(_Z11RecCoreInit8RecModeTP5Diags+0xec)[0x6e254b] > /opt/ats/bin/traffic_server(_Z14RecProcessInit8RecModeTP5Diags+0x3b)[0x6e7769] > /opt/ats/bin/traffic_server[0x51d4a5] > /opt/ats/bin/traffic_server(main+0x1df)[0x51ee39] > /lib64/libc.so.6(__libc_start_main+0xed)[0x7515b60d] > /opt/ats/bin/traffic_server[0x4d8f99] > Program received signal SIGABRT, Aborted. > 0x7516ec15 in raise () from /lib64/libc.so.6 > (gdb) bt > #0 0x7516ec15 in raise () from /lib64/libc.so.6 > #1 0x7517008b in abort () from /lib64/libc.so.6 > #2 0x77baeb2c in ink_die_die_die (retval=1) at ink_error.cc:43 > #3 0x77baebfe in ink_fatal_va(int, const char *, typedef > __va_list_tag __va_list_tag *) (return_code=1, > message_format=0x77bca3e0 "%s:%d: failed assert `%s`", > ap=0x7fffc8b8) at ink_error.cc:65 > #4 0x77baeca1 in ink_fatal (return_code=1, > message_format=0x77bca3e0 "%s:%d: failed assert `%s`") at ink_error.cc:73 > #5 0x77badbb8 in _ink_assert (expression=0x76ffa0 "eh->magic == > REC_MESSAGE_ELE_MAGIC", file=0x76fe40 "RecMessage.cc", line=426) > at ink_assert.cc:38 > #6 0x006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, > itr=0x7fffca00, record=0x7fffca10) at RecMessage.cc:426 > #7 0x006e5e36 in RecReadStatsFile () at P_RecCore.i:569 > #8 0x006e254b in RecCoreInit (mode_type=RECM_STAND_ALONE, > _diags=0xfe5f70) at RecCore.cc:209 > #9 0x006e7769 in RecProcessInit (mode_type=RECM_STAND_ALONE, > _diags=0xfe5f70) at RecProcess.cc:313 > #10 0x0051d4a5 in initialize_process_manager () at Main.cc:413 > #11 0x0051ee39 in main (argc=1, argv=0x7fffdd08) at Main.cc:1409 > (gdb) f 6 > #6 0x006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, > itr=0x7fffca00, record=0x7fffca10) at RecMessage.cc:426 > 426 ink_debug_assert(eh->magic == REC_MESSAGE_ELE_MAGIC); > (gdb) l > 421 itr->ele_hdr = (RecMessageEleHdr *) ((char *) (msg) + > itr->ele_hdr->o_next); > 422 itr->next += 1; > 423 eh = itr->ele_hdr; > 424 } > 425 > 426 ink_debug_assert(eh->magic == REC_MESSAGE_ELE_MAGIC); > 427 > 428 // If the file is corrupt, ignore the the rest of the file. > 429 if (eh->magic != REC_MESSAGE_ELE_MAGIC) { > 430 Warning("Persistent statistics file records.stat is corrupted. > Ignoring the rest of the file\n"); > (gdb) p eh->magic > $1 = 0 > (gdb) p REC_MESSAGE_ELE_MAGIC > $2 = 4027445261 > (gdb) p eh > $3 = (RecMessageEleHdr *) 0xff4138 > (gdb) p *eh > $4 = {magic = 0, o_next = 0} > (gdb) f 7 > #7 0x006e5e36 in RecReadStatsFile () at P_RecCore.i:569 > 569 } while (RecMessageUnmarshalNext(m, , ) != REC_ERR_FAIL); > (gdb) l > 564 if (RecMessageUnmarshalFirst(m, , ) != REC_ERR_FAIL) { > 565 do { > 566 if ((r->name == NULL) || (!strlen(r->name))) > 567 continue; > 568 RecSetRecord(r->rec_type, r->name, r->data_type, &(r->data), > &(r->stat_meta.data_raw), false); > 569 } while (RecMessageUnmarshalNext(m, , ) != REC_ERR_FAIL); > 570 } > 571 } > 572 > 573 ink_rwlock_unlock(_records_rwlock); > (gdb) p r > $5 = (RecRecord *) 0xff4070 > (gdb) p *r > $6 = {rec_type = RECT_PROCESS, name = 0xff4118 "", data_type = RECD_INT, data > = {rec_int =
[jira] [Updated] (TS-1545) Possible crash in records stat snap
[ https://issues.apache.org/jira/browse/TS-1545?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Bryan Call updated TS-1545: --- Assignee: Phil Sorber > Possible crash in records stat snap > --- > > Key: TS-1545 > URL: https://issues.apache.org/jira/browse/TS-1545 > Project: Traffic Server > Issue Type: Bug > Components: Core, Metrics >Affects Versions: 3.3.0 > Environment: debug build, long time regression testing >Reporter: Zhao Yongming >Assignee: Phil Sorber > Labels: Crash > Fix For: 7.1.0 > > Attachments: records.snap, stats.snap > > > when I running regression testing in the loop script, we come to fail to > start the server. well, the codes may handle the issue when we build without > --enable-debug, but that looks like we have something may break records snap. > open this issue in case someone need it. > {code} > [TrafficServer] using root directory '/opt/ats' > FATAL: RecMessage.cc:426: failed assert `eh->magic == REC_MESSAGE_ELE_MAGIC` > /opt/ats/bin/traffic_server - STACK TRACE: > /opt/ats/lib/libtsutil.so.3(ink_fatal_die+0x0)[0x77baeca1] > /opt/ats/lib/libtsutil.so.3(_Z12ink_get_randv+0x0)[0x77badbb8] > /opt/ats/bin/traffic_server(_Z23RecMessageUnmarshalNextP13RecMessageHdrP13RecMessageItrPP9RecRecord+0xbf)[0x6ed860] > /opt/ats/bin/traffic_server(_Z16RecReadStatsFilev+0xc1)[0x6e5e36] > /opt/ats/bin/traffic_server(_Z11RecCoreInit8RecModeTP5Diags+0xec)[0x6e254b] > /opt/ats/bin/traffic_server(_Z14RecProcessInit8RecModeTP5Diags+0x3b)[0x6e7769] > /opt/ats/bin/traffic_server[0x51d4a5] > /opt/ats/bin/traffic_server(main+0x1df)[0x51ee39] > /lib64/libc.so.6(__libc_start_main+0xed)[0x7515b60d] > /opt/ats/bin/traffic_server[0x4d8f99] > Program received signal SIGABRT, Aborted. > 0x7516ec15 in raise () from /lib64/libc.so.6 > (gdb) bt > #0 0x7516ec15 in raise () from /lib64/libc.so.6 > #1 0x7517008b in abort () from /lib64/libc.so.6 > #2 0x77baeb2c in ink_die_die_die (retval=1) at ink_error.cc:43 > #3 0x77baebfe in ink_fatal_va(int, const char *, typedef > __va_list_tag __va_list_tag *) (return_code=1, > message_format=0x77bca3e0 "%s:%d: failed assert `%s`", > ap=0x7fffc8b8) at ink_error.cc:65 > #4 0x77baeca1 in ink_fatal (return_code=1, > message_format=0x77bca3e0 "%s:%d: failed assert `%s`") at ink_error.cc:73 > #5 0x77badbb8 in _ink_assert (expression=0x76ffa0 "eh->magic == > REC_MESSAGE_ELE_MAGIC", file=0x76fe40 "RecMessage.cc", line=426) > at ink_assert.cc:38 > #6 0x006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, > itr=0x7fffca00, record=0x7fffca10) at RecMessage.cc:426 > #7 0x006e5e36 in RecReadStatsFile () at P_RecCore.i:569 > #8 0x006e254b in RecCoreInit (mode_type=RECM_STAND_ALONE, > _diags=0xfe5f70) at RecCore.cc:209 > #9 0x006e7769 in RecProcessInit (mode_type=RECM_STAND_ALONE, > _diags=0xfe5f70) at RecProcess.cc:313 > #10 0x0051d4a5 in initialize_process_manager () at Main.cc:413 > #11 0x0051ee39 in main (argc=1, argv=0x7fffdd08) at Main.cc:1409 > (gdb) f 6 > #6 0x006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, > itr=0x7fffca00, record=0x7fffca10) at RecMessage.cc:426 > 426 ink_debug_assert(eh->magic == REC_MESSAGE_ELE_MAGIC); > (gdb) l > 421 itr->ele_hdr = (RecMessageEleHdr *) ((char *) (msg) + > itr->ele_hdr->o_next); > 422 itr->next += 1; > 423 eh = itr->ele_hdr; > 424 } > 425 > 426 ink_debug_assert(eh->magic == REC_MESSAGE_ELE_MAGIC); > 427 > 428 // If the file is corrupt, ignore the the rest of the file. > 429 if (eh->magic != REC_MESSAGE_ELE_MAGIC) { > 430 Warning("Persistent statistics file records.stat is corrupted. > Ignoring the rest of the file\n"); > (gdb) p eh->magic > $1 = 0 > (gdb) p REC_MESSAGE_ELE_MAGIC > $2 = 4027445261 > (gdb) p eh > $3 = (RecMessageEleHdr *) 0xff4138 > (gdb) p *eh > $4 = {magic = 0, o_next = 0} > (gdb) f 7 > #7 0x006e5e36 in RecReadStatsFile () at P_RecCore.i:569 > 569 } while (RecMessageUnmarshalNext(m, , ) != REC_ERR_FAIL); > (gdb) l > 564 if (RecMessageUnmarshalFirst(m, , ) != REC_ERR_FAIL) { > 565 do { > 566 if ((r->name == NULL) || (!strlen(r->name))) > 567 continue; > 568 RecSetRecord(r->rec_type, r->name, r->data_type, &(r->data), > &(r->stat_meta.data_raw), false); > 569 } while (RecMessageUnmarshalNext(m, , ) != REC_ERR_FAIL); > 570 } > 571 } > 572 > 573 ink_rwlock_unlock(_records_rwlock); > (gdb) p r > $5 = (RecRecord *) 0xff4070 > (gdb) p *r > $6 = {rec_type = RECT_PROCESS, name = 0xff4118 "", data_type = RECD_INT, data > = {rec_int = 0, rec_float = 0, rec_string = 0x0,
[jira] [Updated] (TS-1545) Possible crash in records stat snap
[ https://issues.apache.org/jira/browse/TS-1545?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Bryan Call updated TS-1545: --- Summary: Possible crash in records stat snap (was: possible crash in records stat snap) > Possible crash in records stat snap > --- > > Key: TS-1545 > URL: https://issues.apache.org/jira/browse/TS-1545 > Project: Traffic Server > Issue Type: Bug > Components: Core, Metrics >Affects Versions: 3.3.0 > Environment: debug build, long time regression testing >Reporter: Zhao Yongming > Labels: Crash > Fix For: sometime > > Attachments: records.snap, stats.snap > > > when I running regression testing in the loop script, we come to fail to > start the server. well, the codes may handle the issue when we build without > --enable-debug, but that looks like we have something may break records snap. > open this issue in case someone need it. > {code} > [TrafficServer] using root directory '/opt/ats' > FATAL: RecMessage.cc:426: failed assert `eh->magic == REC_MESSAGE_ELE_MAGIC` > /opt/ats/bin/traffic_server - STACK TRACE: > /opt/ats/lib/libtsutil.so.3(ink_fatal_die+0x0)[0x77baeca1] > /opt/ats/lib/libtsutil.so.3(_Z12ink_get_randv+0x0)[0x77badbb8] > /opt/ats/bin/traffic_server(_Z23RecMessageUnmarshalNextP13RecMessageHdrP13RecMessageItrPP9RecRecord+0xbf)[0x6ed860] > /opt/ats/bin/traffic_server(_Z16RecReadStatsFilev+0xc1)[0x6e5e36] > /opt/ats/bin/traffic_server(_Z11RecCoreInit8RecModeTP5Diags+0xec)[0x6e254b] > /opt/ats/bin/traffic_server(_Z14RecProcessInit8RecModeTP5Diags+0x3b)[0x6e7769] > /opt/ats/bin/traffic_server[0x51d4a5] > /opt/ats/bin/traffic_server(main+0x1df)[0x51ee39] > /lib64/libc.so.6(__libc_start_main+0xed)[0x7515b60d] > /opt/ats/bin/traffic_server[0x4d8f99] > Program received signal SIGABRT, Aborted. > 0x7516ec15 in raise () from /lib64/libc.so.6 > (gdb) bt > #0 0x7516ec15 in raise () from /lib64/libc.so.6 > #1 0x7517008b in abort () from /lib64/libc.so.6 > #2 0x77baeb2c in ink_die_die_die (retval=1) at ink_error.cc:43 > #3 0x77baebfe in ink_fatal_va(int, const char *, typedef > __va_list_tag __va_list_tag *) (return_code=1, > message_format=0x77bca3e0 "%s:%d: failed assert `%s`", > ap=0x7fffc8b8) at ink_error.cc:65 > #4 0x77baeca1 in ink_fatal (return_code=1, > message_format=0x77bca3e0 "%s:%d: failed assert `%s`") at ink_error.cc:73 > #5 0x77badbb8 in _ink_assert (expression=0x76ffa0 "eh->magic == > REC_MESSAGE_ELE_MAGIC", file=0x76fe40 "RecMessage.cc", line=426) > at ink_assert.cc:38 > #6 0x006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, > itr=0x7fffca00, record=0x7fffca10) at RecMessage.cc:426 > #7 0x006e5e36 in RecReadStatsFile () at P_RecCore.i:569 > #8 0x006e254b in RecCoreInit (mode_type=RECM_STAND_ALONE, > _diags=0xfe5f70) at RecCore.cc:209 > #9 0x006e7769 in RecProcessInit (mode_type=RECM_STAND_ALONE, > _diags=0xfe5f70) at RecProcess.cc:313 > #10 0x0051d4a5 in initialize_process_manager () at Main.cc:413 > #11 0x0051ee39 in main (argc=1, argv=0x7fffdd08) at Main.cc:1409 > (gdb) f 6 > #6 0x006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, > itr=0x7fffca00, record=0x7fffca10) at RecMessage.cc:426 > 426 ink_debug_assert(eh->magic == REC_MESSAGE_ELE_MAGIC); > (gdb) l > 421 itr->ele_hdr = (RecMessageEleHdr *) ((char *) (msg) + > itr->ele_hdr->o_next); > 422 itr->next += 1; > 423 eh = itr->ele_hdr; > 424 } > 425 > 426 ink_debug_assert(eh->magic == REC_MESSAGE_ELE_MAGIC); > 427 > 428 // If the file is corrupt, ignore the the rest of the file. > 429 if (eh->magic != REC_MESSAGE_ELE_MAGIC) { > 430 Warning("Persistent statistics file records.stat is corrupted. > Ignoring the rest of the file\n"); > (gdb) p eh->magic > $1 = 0 > (gdb) p REC_MESSAGE_ELE_MAGIC > $2 = 4027445261 > (gdb) p eh > $3 = (RecMessageEleHdr *) 0xff4138 > (gdb) p *eh > $4 = {magic = 0, o_next = 0} > (gdb) f 7 > #7 0x006e5e36 in RecReadStatsFile () at P_RecCore.i:569 > 569 } while (RecMessageUnmarshalNext(m, , ) != REC_ERR_FAIL); > (gdb) l > 564 if (RecMessageUnmarshalFirst(m, , ) != REC_ERR_FAIL) { > 565 do { > 566 if ((r->name == NULL) || (!strlen(r->name))) > 567 continue; > 568 RecSetRecord(r->rec_type, r->name, r->data_type, &(r->data), > &(r->stat_meta.data_raw), false); > 569 } while (RecMessageUnmarshalNext(m, , ) != REC_ERR_FAIL); > 570 } > 571 } > 572 > 573 ink_rwlock_unlock(_records_rwlock); > (gdb) p r > $5 = (RecRecord *) 0xff4070 > (gdb) p *r > $6 = {rec_type = RECT_PROCESS, name = 0xff4118 "", data_type = RECD_INT, data > = {rec_int =
[jira] [Updated] (TS-1545) possible crash in records stat snap
[ https://issues.apache.org/jira/browse/TS-1545?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Leif Hedstrom updated TS-1545: -- Fix Version/s: (was: 5.2.0) sometime possible crash in records stat snap --- Key: TS-1545 URL: https://issues.apache.org/jira/browse/TS-1545 Project: Traffic Server Issue Type: Bug Components: Core, Metrics Affects Versions: 3.3.0 Environment: debug build, long time regression testing Reporter: Zhao Yongming Labels: Crash Fix For: sometime Attachments: records.snap, stats.snap when I running regression testing in the loop script, we come to fail to start the server. well, the codes may handle the issue when we build without --enable-debug, but that looks like we have something may break records snap. open this issue in case someone need it. {code} [TrafficServer] using root directory '/opt/ats' FATAL: RecMessage.cc:426: failed assert `eh-magic == REC_MESSAGE_ELE_MAGIC` /opt/ats/bin/traffic_server - STACK TRACE: /opt/ats/lib/libtsutil.so.3(ink_fatal_die+0x0)[0x77baeca1] /opt/ats/lib/libtsutil.so.3(_Z12ink_get_randv+0x0)[0x77badbb8] /opt/ats/bin/traffic_server(_Z23RecMessageUnmarshalNextP13RecMessageHdrP13RecMessageItrPP9RecRecord+0xbf)[0x6ed860] /opt/ats/bin/traffic_server(_Z16RecReadStatsFilev+0xc1)[0x6e5e36] /opt/ats/bin/traffic_server(_Z11RecCoreInit8RecModeTP5Diags+0xec)[0x6e254b] /opt/ats/bin/traffic_server(_Z14RecProcessInit8RecModeTP5Diags+0x3b)[0x6e7769] /opt/ats/bin/traffic_server[0x51d4a5] /opt/ats/bin/traffic_server(main+0x1df)[0x51ee39] /lib64/libc.so.6(__libc_start_main+0xed)[0x7515b60d] /opt/ats/bin/traffic_server[0x4d8f99] Program received signal SIGABRT, Aborted. 0x7516ec15 in raise () from /lib64/libc.so.6 (gdb) bt #0 0x7516ec15 in raise () from /lib64/libc.so.6 #1 0x7517008b in abort () from /lib64/libc.so.6 #2 0x77baeb2c in ink_die_die_die (retval=1) at ink_error.cc:43 #3 0x77baebfe in ink_fatal_va(int, const char *, typedef __va_list_tag __va_list_tag *) (return_code=1, message_format=0x77bca3e0 %s:%d: failed assert `%s`, ap=0x7fffc8b8) at ink_error.cc:65 #4 0x77baeca1 in ink_fatal (return_code=1, message_format=0x77bca3e0 %s:%d: failed assert `%s`) at ink_error.cc:73 #5 0x77badbb8 in _ink_assert (expression=0x76ffa0 eh-magic == REC_MESSAGE_ELE_MAGIC, file=0x76fe40 RecMessage.cc, line=426) at ink_assert.cc:38 #6 0x006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, itr=0x7fffca00, record=0x7fffca10) at RecMessage.cc:426 #7 0x006e5e36 in RecReadStatsFile () at P_RecCore.i:569 #8 0x006e254b in RecCoreInit (mode_type=RECM_STAND_ALONE, _diags=0xfe5f70) at RecCore.cc:209 #9 0x006e7769 in RecProcessInit (mode_type=RECM_STAND_ALONE, _diags=0xfe5f70) at RecProcess.cc:313 #10 0x0051d4a5 in initialize_process_manager () at Main.cc:413 #11 0x0051ee39 in main (argc=1, argv=0x7fffdd08) at Main.cc:1409 (gdb) f 6 #6 0x006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, itr=0x7fffca00, record=0x7fffca10) at RecMessage.cc:426 426 ink_debug_assert(eh-magic == REC_MESSAGE_ELE_MAGIC); (gdb) l 421 itr-ele_hdr = (RecMessageEleHdr *) ((char *) (msg) + itr-ele_hdr-o_next); 422 itr-next += 1; 423 eh = itr-ele_hdr; 424 } 425 426 ink_debug_assert(eh-magic == REC_MESSAGE_ELE_MAGIC); 427 428 // If the file is corrupt, ignore the the rest of the file. 429 if (eh-magic != REC_MESSAGE_ELE_MAGIC) { 430 Warning(Persistent statistics file records.stat is corrupted. Ignoring the rest of the file\n); (gdb) p eh-magic $1 = 0 (gdb) p REC_MESSAGE_ELE_MAGIC $2 = 4027445261 (gdb) p eh $3 = (RecMessageEleHdr *) 0xff4138 (gdb) p *eh $4 = {magic = 0, o_next = 0} (gdb) f 7 #7 0x006e5e36 in RecReadStatsFile () at P_RecCore.i:569 569 } while (RecMessageUnmarshalNext(m, itr, r) != REC_ERR_FAIL); (gdb) l 564 if (RecMessageUnmarshalFirst(m, itr, r) != REC_ERR_FAIL) { 565 do { 566 if ((r-name == NULL) || (!strlen(r-name))) 567 continue; 568 RecSetRecord(r-rec_type, r-name, r-data_type, (r-data), (r-stat_meta.data_raw), false); 569 } while (RecMessageUnmarshalNext(m, itr, r) != REC_ERR_FAIL); 570 } 571 } 572 573 ink_rwlock_unlock(g_records_rwlock); (gdb) p r $5 = (RecRecord *) 0xff4070 (gdb) p *r $6 = {rec_type = RECT_PROCESS, name = 0xff4118 , data_type = RECD_INT, data = {rec_int = 0, rec_float = 0, rec_string = 0x0, rec_counter = 0}, data_default = {rec_int = 0, rec_float = 0, rec_string = 0x0, rec_counter = 0}, lock =
[jira] [Updated] (TS-1545) possible crash in records stat snap
[ https://issues.apache.org/jira/browse/TS-1545?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Leif Hedstrom updated TS-1545: -- Fix Version/s: (was: 6.0.0) 4.2.0 possible crash in records stat snap --- Key: TS-1545 URL: https://issues.apache.org/jira/browse/TS-1545 Project: Traffic Server Issue Type: Bug Components: Core, Stats Affects Versions: 3.3.0 Environment: debug build, long time regression testing Reporter: Zhao Yongming Labels: Crash Fix For: 4.2.0 Attachments: records.snap, stats.snap when I running regression testing in the loop script, we come to fail to start the server. well, the codes may handle the issue when we build without --enable-debug, but that looks like we have something may break records snap. open this issue in case someone need it. {code} [TrafficServer] using root directory '/opt/ats' FATAL: RecMessage.cc:426: failed assert `eh-magic == REC_MESSAGE_ELE_MAGIC` /opt/ats/bin/traffic_server - STACK TRACE: /opt/ats/lib/libtsutil.so.3(ink_fatal_die+0x0)[0x77baeca1] /opt/ats/lib/libtsutil.so.3(_Z12ink_get_randv+0x0)[0x77badbb8] /opt/ats/bin/traffic_server(_Z23RecMessageUnmarshalNextP13RecMessageHdrP13RecMessageItrPP9RecRecord+0xbf)[0x6ed860] /opt/ats/bin/traffic_server(_Z16RecReadStatsFilev+0xc1)[0x6e5e36] /opt/ats/bin/traffic_server(_Z11RecCoreInit8RecModeTP5Diags+0xec)[0x6e254b] /opt/ats/bin/traffic_server(_Z14RecProcessInit8RecModeTP5Diags+0x3b)[0x6e7769] /opt/ats/bin/traffic_server[0x51d4a5] /opt/ats/bin/traffic_server(main+0x1df)[0x51ee39] /lib64/libc.so.6(__libc_start_main+0xed)[0x7515b60d] /opt/ats/bin/traffic_server[0x4d8f99] Program received signal SIGABRT, Aborted. 0x7516ec15 in raise () from /lib64/libc.so.6 (gdb) bt #0 0x7516ec15 in raise () from /lib64/libc.so.6 #1 0x7517008b in abort () from /lib64/libc.so.6 #2 0x77baeb2c in ink_die_die_die (retval=1) at ink_error.cc:43 #3 0x77baebfe in ink_fatal_va(int, const char *, typedef __va_list_tag __va_list_tag *) (return_code=1, message_format=0x77bca3e0 %s:%d: failed assert `%s`, ap=0x7fffc8b8) at ink_error.cc:65 #4 0x77baeca1 in ink_fatal (return_code=1, message_format=0x77bca3e0 %s:%d: failed assert `%s`) at ink_error.cc:73 #5 0x77badbb8 in _ink_assert (expression=0x76ffa0 eh-magic == REC_MESSAGE_ELE_MAGIC, file=0x76fe40 RecMessage.cc, line=426) at ink_assert.cc:38 #6 0x006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, itr=0x7fffca00, record=0x7fffca10) at RecMessage.cc:426 #7 0x006e5e36 in RecReadStatsFile () at P_RecCore.i:569 #8 0x006e254b in RecCoreInit (mode_type=RECM_STAND_ALONE, _diags=0xfe5f70) at RecCore.cc:209 #9 0x006e7769 in RecProcessInit (mode_type=RECM_STAND_ALONE, _diags=0xfe5f70) at RecProcess.cc:313 #10 0x0051d4a5 in initialize_process_manager () at Main.cc:413 #11 0x0051ee39 in main (argc=1, argv=0x7fffdd08) at Main.cc:1409 (gdb) f 6 #6 0x006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, itr=0x7fffca00, record=0x7fffca10) at RecMessage.cc:426 426 ink_debug_assert(eh-magic == REC_MESSAGE_ELE_MAGIC); (gdb) l 421 itr-ele_hdr = (RecMessageEleHdr *) ((char *) (msg) + itr-ele_hdr-o_next); 422 itr-next += 1; 423 eh = itr-ele_hdr; 424 } 425 426 ink_debug_assert(eh-magic == REC_MESSAGE_ELE_MAGIC); 427 428 // If the file is corrupt, ignore the the rest of the file. 429 if (eh-magic != REC_MESSAGE_ELE_MAGIC) { 430 Warning(Persistent statistics file records.stat is corrupted. Ignoring the rest of the file\n); (gdb) p eh-magic $1 = 0 (gdb) p REC_MESSAGE_ELE_MAGIC $2 = 4027445261 (gdb) p eh $3 = (RecMessageEleHdr *) 0xff4138 (gdb) p *eh $4 = {magic = 0, o_next = 0} (gdb) f 7 #7 0x006e5e36 in RecReadStatsFile () at P_RecCore.i:569 569 } while (RecMessageUnmarshalNext(m, itr, r) != REC_ERR_FAIL); (gdb) l 564 if (RecMessageUnmarshalFirst(m, itr, r) != REC_ERR_FAIL) { 565 do { 566 if ((r-name == NULL) || (!strlen(r-name))) 567 continue; 568 RecSetRecord(r-rec_type, r-name, r-data_type, (r-data), (r-stat_meta.data_raw), false); 569 } while (RecMessageUnmarshalNext(m, itr, r) != REC_ERR_FAIL); 570 } 571 } 572 573 ink_rwlock_unlock(g_records_rwlock); (gdb) p r $5 = (RecRecord *) 0xff4070 (gdb) p *r $6 = {rec_type = RECT_PROCESS, name = 0xff4118 , data_type = RECD_INT, data = {rec_int = 0, rec_float = 0, rec_string = 0x0, rec_counter = 0}, data_default = {rec_int = 0, rec_float = 0, rec_string = 0x0, rec_counter = 0}, lock =
[jira] [Updated] (TS-1545) possible crash in records stat snap
[ https://issues.apache.org/jira/browse/TS-1545?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Zhao Yongming updated TS-1545: -- Attachment: stats.snap records.snap the snap file which is crashing the server possible crash in records stat snap --- Key: TS-1545 URL: https://issues.apache.org/jira/browse/TS-1545 Project: Traffic Server Issue Type: Bug Components: Core, Stats Affects Versions: 3.3.0 Environment: debug build, long time regression testing Reporter: Zhao Yongming Fix For: 3.3.2 Attachments: records.snap, stats.snap when I running regression testing in the loop script, we come to fail to start the server. well, the codes may handle the issue when we build without --enable-debug, but that looks like we have something may break records snap. open this issue in case someone need it. {code} [TrafficServer] using root directory '/opt/ats' FATAL: RecMessage.cc:426: failed assert `eh-magic == REC_MESSAGE_ELE_MAGIC` /opt/ats/bin/traffic_server - STACK TRACE: /opt/ats/lib/libtsutil.so.3(ink_fatal_die+0x0)[0x77baeca1] /opt/ats/lib/libtsutil.so.3(_Z12ink_get_randv+0x0)[0x77badbb8] /opt/ats/bin/traffic_server(_Z23RecMessageUnmarshalNextP13RecMessageHdrP13RecMessageItrPP9RecRecord+0xbf)[0x6ed860] /opt/ats/bin/traffic_server(_Z16RecReadStatsFilev+0xc1)[0x6e5e36] /opt/ats/bin/traffic_server(_Z11RecCoreInit8RecModeTP5Diags+0xec)[0x6e254b] /opt/ats/bin/traffic_server(_Z14RecProcessInit8RecModeTP5Diags+0x3b)[0x6e7769] /opt/ats/bin/traffic_server[0x51d4a5] /opt/ats/bin/traffic_server(main+0x1df)[0x51ee39] /lib64/libc.so.6(__libc_start_main+0xed)[0x7515b60d] /opt/ats/bin/traffic_server[0x4d8f99] Program received signal SIGABRT, Aborted. 0x7516ec15 in raise () from /lib64/libc.so.6 (gdb) bt #0 0x7516ec15 in raise () from /lib64/libc.so.6 #1 0x7517008b in abort () from /lib64/libc.so.6 #2 0x77baeb2c in ink_die_die_die (retval=1) at ink_error.cc:43 #3 0x77baebfe in ink_fatal_va(int, const char *, typedef __va_list_tag __va_list_tag *) (return_code=1, message_format=0x77bca3e0 %s:%d: failed assert `%s`, ap=0x7fffc8b8) at ink_error.cc:65 #4 0x77baeca1 in ink_fatal (return_code=1, message_format=0x77bca3e0 %s:%d: failed assert `%s`) at ink_error.cc:73 #5 0x77badbb8 in _ink_assert (expression=0x76ffa0 eh-magic == REC_MESSAGE_ELE_MAGIC, file=0x76fe40 RecMessage.cc, line=426) at ink_assert.cc:38 #6 0x006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, itr=0x7fffca00, record=0x7fffca10) at RecMessage.cc:426 #7 0x006e5e36 in RecReadStatsFile () at P_RecCore.i:569 #8 0x006e254b in RecCoreInit (mode_type=RECM_STAND_ALONE, _diags=0xfe5f70) at RecCore.cc:209 #9 0x006e7769 in RecProcessInit (mode_type=RECM_STAND_ALONE, _diags=0xfe5f70) at RecProcess.cc:313 #10 0x0051d4a5 in initialize_process_manager () at Main.cc:413 #11 0x0051ee39 in main (argc=1, argv=0x7fffdd08) at Main.cc:1409 (gdb) f 6 #6 0x006ed860 in RecMessageUnmarshalNext (msg=0xfe6110, itr=0x7fffca00, record=0x7fffca10) at RecMessage.cc:426 426 ink_debug_assert(eh-magic == REC_MESSAGE_ELE_MAGIC); (gdb) l 421 itr-ele_hdr = (RecMessageEleHdr *) ((char *) (msg) + itr-ele_hdr-o_next); 422 itr-next += 1; 423 eh = itr-ele_hdr; 424 } 425 426 ink_debug_assert(eh-magic == REC_MESSAGE_ELE_MAGIC); 427 428 // If the file is corrupt, ignore the the rest of the file. 429 if (eh-magic != REC_MESSAGE_ELE_MAGIC) { 430 Warning(Persistent statistics file records.stat is corrupted. Ignoring the rest of the file\n); (gdb) p eh-magic $1 = 0 (gdb) p REC_MESSAGE_ELE_MAGIC $2 = 4027445261 (gdb) p eh $3 = (RecMessageEleHdr *) 0xff4138 (gdb) p *eh $4 = {magic = 0, o_next = 0} (gdb) f 7 #7 0x006e5e36 in RecReadStatsFile () at P_RecCore.i:569 569 } while (RecMessageUnmarshalNext(m, itr, r) != REC_ERR_FAIL); (gdb) l 564 if (RecMessageUnmarshalFirst(m, itr, r) != REC_ERR_FAIL) { 565 do { 566 if ((r-name == NULL) || (!strlen(r-name))) 567 continue; 568 RecSetRecord(r-rec_type, r-name, r-data_type, (r-data), (r-stat_meta.data_raw), false); 569 } while (RecMessageUnmarshalNext(m, itr, r) != REC_ERR_FAIL); 570 } 571 } 572 573 ink_rwlock_unlock(g_records_rwlock); (gdb) p r $5 = (RecRecord *) 0xff4070 (gdb) p *r $6 = {rec_type = RECT_PROCESS, name = 0xff4118 , data_type = RECD_INT, data = {rec_int = 0, rec_float = 0, rec_string = 0x0, rec_counter = 0}, data_default = {rec_int = 0, rec_float = 0, rec_string = 0x0,