Hi,
Please find the complete stack trace, the language is C and thread here was
just performing the operation of fetching records from the postgresDB using
SQL_get_tpf_rw() function for which I've also attached implementation here
as well
Stack trace of thread 966070:
#0
0x00000000f7ee1129 __kernel_vsyscall (linux-gate.so.1)
#1
0x00000000f6ba23b7 __poll (libc.so.6)
#2
0x00000000f792e5b5 __interceptor_poll (libasan.so.8)
#3
0x00000000f72b30a8 pqSocketCheck (libpq.so.5)
#4
0x00000000f72b3864 pqWaitTimed (libpq.so.5)
#5
0x00000000f72b38d2 pqWait (libpq.so.5)
#6
0x00000000f72aff03 PQgetResult (libpq.so.5)
#7
0x00000000f72b036a PQexecFinish (libpq.so.5)
#8
0x0000000008106dd4 checkLOCK (server)
#9
0x000000000811d871 SQL_get_tpf_rw (server)
#10
0x0000000008180269 get_tpf_rw (server)
#11
0x0000000008189e47 FINDC (server)
#12
0x000000000818cd20 FINWC (server)
#13
0x00000000f2d03778 _segKIP8 (kip8.so)
#14
0x000000000816ad0e call_segment (server)
#15
0x000000000816ba0b ENTRC (server)
#16
0x00000000f2d096ee _segKIP5 (kip5.so)
#17
0x000000000816ad0e call_segment (server)
#18
0x000000000816ba0b ENTRC (server)
#19
0x00000000f2d1722d _segKIP4 (kip4.so)
#20
0x000000000816ad0e call_segment (server)
#21
0x000000000816ba0b ENTRC (server)
#22
0x00000000f2d1dee1 _segKIPB (kipb.so)
#23
0x000000000816ad0e call_segment (server)
#24
0x000000000816c454 ENTNC (server)
#25
0x00000000f2d2be54 _segKIP9 (kip9.so)
#26
0x000000000816ad0e call_segment (server)
#27
0x00000000081c264d call_appl_entry (server)
#28
0x000000000807f227 extest (server)
#29
0x000000000808923f handle_COMMS_TYPE_FUNCTIONAL_MESSAGE (server)
#30
0x000000000808d849 handle_comms_message (server)
#31
0x00000000f77d6e41 worker (libuv.so.1)
#32
0x00000000f78ff67a _ZL17asan_thread_startPv (libasan.so.8)
#33
0x00000000f6b170ad start_thread (libc.so.6)
#34
0x00000000f6bb1dea __clone (libc.so.6)
Regards,
Sasmit Utkarsh
+91-7674022625
On Thu, Oct 24, 2024 at 9:47 PM Igor Korot <[email protected]> wrote:
> Hi,
> Is this a complete backtrace?
> Can you post a complete backtrace? We need to see if there is a reference
> to your application code?
>
> What language is it written in?
> What operation this thread was performing?
>
> Thank you.
>
> On Thu, Oct 24, 2024 at 11:12 AM Sasmit Utkarsh <[email protected]>
> wrote:
> >
> > Dear PostgreSQL Community Team,
> >
> > I hope this message finds you well. I am reaching out for assistance
> with an issue encountered in our application, which communicates with
> PostgreSQL using the libpq client library.
> >
> > Issue Details:
> > We have observed a problem where one of the application's threads gets
> stuck during a database operation. Below is a stack trace of the affected
> thread:
> >
> > Application Logs:
> > Oct 23 10:08:44.806235 cucmtpccu1 [email protected][966034]:
> 0966070{ef5f81a7-d35b-4604-953d-a35665e505b7.010000}KIP8-SQL_get_tpf_rw()-SQL
> read data from File Address before lock fa(-1810606079) fa(94145801) fa2
> htonl(22549652)
> > Oct 23 10:08:44.806235 cucmtpccu1 [email protected][966034]:
> 0966070{ef5f81a7-d35b-4604-953d-a35665e505b7.010000}KIP8-SQL_get_tpf_rw()
> SelectDataCommand = CALL SQL_select_data_procedure($1, $2, NULL, NULL)
> hold(0) fa(-1810606079)
> > Oct 23 10:08:44.807814 cucmtpccu1 [email protected][966034]: ***
> buffer overflow detected ***: terminated
> >
> > Stack Trace of Thread 966070:
> > #0 0x00000000f7ee1129 __kernel_vsyscall (linux-gate.so.1)
> > #1 0x00000000f6ba23b7 __poll (libc.so.6)
> > #2 0x00000000f792e5b5 __interceptor_poll (libasan.so.8)
> > #3 0x00000000f72b30a8 pqSocketCheck (libpq.so.5)
> > #4 0x00000000f72b3864 pqWaitTimed (libpq.so.5)
> > #5 0x00000000f72b38d2 pqWait (libpq.so.5)
> > #6 0x00000000f72aff03 PQgetResult (libpq.so.5)
> > #7 0x00000000f72b036a PQexecFinish (libpq.so.5)
> > #8 0x0000000008106dd4 checkLOCK (server)
> > #9 0x000000000811d871 SQL_get_tpf_rw (server)
> > ...
> >
> > The stack trace shows that the thread is stuck in a poll operation while
> waiting for socket activity within the PostgreSQL client library (libpq).
> We suspect this could be related to a network timeout or issue. However,
> the application logs indicate a buffer overflow before the crash, which
> raises questions about whether these are related.
> >
> > Questions:
> > -Could the buffer overflow be causing the crash, and if so, how is it
> related to the socket activity?
> > -Are there specific configurations or checks we should perform to
> diagnose this issue further?
> > -Any suggestions for possible solutions to resolve this problem?
> >
> > For additional context, I've verified that the specified record does
> exist in the database, and I am also attaching the implementation details
> for the checkLOCK function corresponding to the stack trace.
> >
> > Please let me know if you need any more details
> >
> > Your assistance with troubleshooting this would be highly appreciated.
> >
> > Regards,
> > Sasmit Utkarsh
> > +91-7674022625
>
int get_tpf_rw( int32_t fa, unsigned char **core_blk, size_t *st_size, unsigned
char *bid, unsigned char *rcc, int *fd, bool hold, unsigned char **mmap_ptr)
{
if(sql_db == 1)// {
return SQL_get_tpf_rw(fa, core_blk, st_size, bid, rcc, fd, hold, mmap_ptr
);
// } else //if(sql_db == 0)
return FILE_get_tpf_rw(fa, core_blk, st_size, bid, rcc, fd, hold, mmap_ptr );
}
int SQL_get_tpf_rw( int32_t fa, unsigned char **core_blk, size_t *st_size,
unsigned char *bid, unsigned char *rcc, int *fd, bool hold, unsigned char
**mmap_ptr)
{
char *p;
char *c;
int rc;
struct stat statbuf;
PGresult *res=NULL;
uint32_t fa2 = htonl(fa);
char *fa_val = (char *)&fa2;
bool hold2 = hold;
char *hold_val = (char *)&hold2;
const char *paramValues[2] = {fa_val, hold_val};
int paramLengths[2] = {sizeof(fa2), sizeof(hold2)};
int paramFormats[2] = {1, 1}; // identify parameter as binary
if(SQL_vsn10) {
paramValues[0] = fa_val;
paramValues[1] = NULL;
paramLengths[0] = sizeof(fa2);
paramLengths[1] = 0;
paramFormats[0] = 1;
paramFormats[1] = 0;
}
/**
if(shadowSQL_vsn10) {
paramValues[0] = fa_val;
paramValues[1] = NULL;
paramLengths[0] = sizeof(fa2);
paramLengths[1] = 0;
paramFormats[0] = 1;
paramFormats[1] = 0;
}
**/
int resultFormat = 1; // binary
int nFields = 0;
int nTuples = 0;
int blk_size = 0;
char *blk_size_val;
char *data_val;
if(bid)
{
LOG_TRACE("%s() SQL read data from File Address fa(%i) fa(%08X)
fa2 htonl(%i) bid (%02X%02X)",__func__,fa,fa,fa2,*bid,*(bid+1));
}
else
{
LOG_TRACE("%s() SQL read data from File Address fa(%i) fa(%08X)
fa2 htonl(%i) bid (0)",__func__,fa,fa,fa2);
}
LOG_DEBUG("%s()-SQL read data from File Address before lock fa(%i)
fa(%08X) fa2 htonl(%i)",__func__,fa,fa,fa2);
//LOG_DEBUG("%s() CALL SQL_select_data_procedure1(%i, %i,
NULL);",__func__,fa2,hold2);
//LOG_DEBUG("%s() SELECT data FROM fs_data WHERE file_address =
%i","SQL_select_data_procedure1",fa2);
bool is_data_found = false;
if(is_shadow_db)
{
LOG_DEBUG("%s() shadowSelectDataCommand = %s hold(%i)
fa(%i)",__func__,shadowSelectDataCommand,hold,fa);
LOG_DEBUG("%s() shadowSelectDataNParams =
%i",__func__,shadowSelectDataNParams);
}
else
{
LOG_DEBUG("%s() SelectDataCommand = %s hold(%i)
fa(%i)",__func__,SelectDataCommand,hold,fa);
LOG_DEBUG("%s() SelectDataNParams = %i",__func__,SelectDataNParams);
}
checkLOCK(fa);
if(is_shadow_db)
{
res = PQexecPrepared(shadow_db_conn,
shadowSelectDataName,
shadowSelectDataNParams,
paramValues,
paramLengths,
paramFormats,
resultFormat);
LOG_DEBUG("%s()- check now in shadow db for fa(%i) fa(%08X)
PQresultStatus(res):%s",__func__,fa,fa,PQresStatus(PQresultStatus(res)));
if (PQresultStatus(res) == PGRES_TUPLES_OK)
{
nTuples = PQntuples(res); // number of rows
if(nTuples == 0)
{
LOG_DEBUG("fa(%i) fa(%08X) could not be retrieved from
Shadow DB so look into base db",fa,fa);
}
else
{
blk_size_val = PQgetvalue(res, 0, 0);
data_val = PQgetvalue(res, 0, 1);
if( data_val != NULL && *data_val != '\0')
{
is_data_found = true;
LOG_DEBUG("fa(%i) fa(%08X) retrieved successfully
from Shadow DB",fa,fa);
}
}
}
else
LOG_ERROR("SELECT failed on Shadow DB: %s",
PQerrorMessage(shadow_db_conn ));
if (!is_data_found) {
PQclear(res); // Cleared result only if we are moving to
main DB query
}
}
if(is_data_found !=true)
{
res = PQexecPrepared(conn,
SelectDataName,
SelectDataNParams,
paramValues,
paramLengths,
paramFormats,
resultFormat);
}
LOG_DEBUG("%s() res = %s",__func__,PQresStatus(PQresultStatus(res)));
if ( hold )
{
if ( SQL_vsn10 )
{
SQL_lock_row(fa);
}
LOG_DEBUG("%s()-SQL read data from File Address- locked fa(%i)
fa(%08X) fa2 htonl(%i)",__func__,fa,fa,fa2);
}
checkLOCK(fa);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
LOG_ERROR("SELECT failed: %s", PQerrorMessage(conn));
LOG_DEBUG("ROLLBACK TRANSACTION AND CHAIN");
PQclear(res);
res = PQexec(conn,"ROLLBACK TRANSACTION AND CHAIN");
LOG_ERROR("INVALID_FILE_ADDRESS %08X",fa);
rc = ERR_INVALID_FILE_ADDRESS;
if ( ((uint32_t)fa)>>24 != 0x94) {
if (COUNT < 255) {
if ( bid != NULL ) {
LOG_INFO("missing data: %08x %02x
%02x", fa, *bid,*(bid+1));
char str1[8];
int16_t a=*bid;
int16_t b=*(bid+1);
sprintf(str1,"%02x%02x",a,b);
FA[COUNT]=fa;
memcpy(REC_ID1[COUNT],str1,5);
COUNT++;
} else {
bid=0x0000;
LOG_INFO("missing data: %08x", fa);
FA[COUNT++]=fa;
REC_ID1[COUNT][0]=0x00;
}
}
}
PQclear(res);
}
else
{
nFields = PQnfields(res); // number of columns
nTuples = PQntuples(res); // number of rows
LOG_DEBUG("%s() nFields(cols)=%i
nTuples(rows)=%i",__func__,nFields,nTuples);
if( nFields == 0 || nTuples == 0)
{
LOG_ERROR("SELECT failed: NO ROWS RETURNED");
PQclear(res);
res = PQexec(conn,"ROLLBACK TRANSACTION AND CHAIN");
LOG_ERROR("INVALID_FILE_ADDRESS %08X",fa);
rc = ERR_INVALID_FILE_ADDRESS;
if ( ((uint32_t)fa)>>24 != 0x94) {
if (COUNT < 255) {
if ( bid != NULL ) {
LOG_INFO("missing data: %08x
%02x %02x", fa, *bid,*(bid+1));
char str1[8];
int16_t a=*bid;
int16_t b=*(bid+1);
sprintf(str1,"%02x%02x",a,b);
FA[COUNT]=fa;
memcpy(REC_ID1[COUNT],str1,5);
COUNT++;
} else {
bid=0x0000;
LOG_INFO("missing data: %08x",
fa);
FA[COUNT++]=fa;
REC_ID1[COUNT][0]=0x00;
}
}
}
PQclear(res);
}
else
{
blk_size = ntohl(*(uint32_t *)PQgetvalue (res, 0, 0));
LOG_DEBUG("%s() blk_size returned is
%i",__func__,blk_size);
if(blk_size < 127 || blk_size > 4096)
{
LOG_ERROR("SELECT failed: NO ROWS RETURNED");
PQclear(res);
res = PQexec(conn,"ROLLBACK TRANSACTION AND
CHAIN");
LOG_ERROR("INVALID_FILE_ADDRESS %08X",fa);
rc = ERR_INVALID_FILE_ADDRESS;
if ( ((uint32_t)fa)>>24 != 0x94) {
if (COUNT < 255) {
if ( bid != NULL ) {
LOG_INFO("missing data:
%08x %02x %02x", fa, *bid,*(bid+1));
char str1[8];
int16_t a=*bid;
int16_t b=*(bid+1);
sprintf(str1,"%02x%02x",a,b);
FA[COUNT]=fa;
memcpy(REC_ID1[COUNT],str1,5);
COUNT++;
} else {
bid=0x0000;
LOG_INFO("missing data:
%08x", fa);
FA[COUNT++]=fa;
REC_ID1[COUNT][0]=0x00;
}
}
}
PQclear(res);
}
else
{
*fd = fa; // use
File Address as File Descriptor
c = PQgetvalue(res, 0, 1); //
SELECT has data as second field
//dump_hex(c,16);
//printf(" PQgetlength of data =
%i\n",PQgetlength(res,0,1));
p = c; // use same pointer for both mmap and
core_blk
LOG_DEBUG("Core block address %p for FA %08X
(RW)", p, fa);
dump_hex(p,32);
/* probably need some validation of block size?
*/
if (bid != NULL && memcmp(bid, p, 2) != 0 &&
memcmp(bid, "\x00\x00", 2) != 0) {
rc = ERR_RID_MISMATCH;
int b1 = *(unsigned char *)p;
int b2 = *(unsigned char *)(p+1);
int a1 = *(unsigned char *)(bid);
int a2 = *(unsigned char *)(bid+1);
LOG_ERROR("Error record id mismatch
%02X%02X != %02X%02X FA %08X", b1, b2, a1, a2, fa);
} else if (rcc != NULL && *rcc != 0 &&
memcmp(rcc, p+2, 1) != 0) {
rc = ERR_RCC_MISMATCH;
int b1 = *(unsigned char *)(p+2);
int a1 = *(unsigned char *)(rcc);
LOG_ERROR("Error rcc mismatch %02X !=
%02X FA %08X", b1, a1, fa);
} else {
rc = 0;
}
*core_blk = (unsigned char *)malloc(blk_size);
if (*core_blk == NULL) {
LOG_ERROR("Memory allocation failed");
PQclear(res);
return ERR_IO_HARDWARE_ERROR;
}
memcpy(*core_blk,c , blk_size);
LOG_TRACE("Core block address %p and mmap_ptr
%p for FA %08X (RW)", core_blk, mmap_ptr,fa);
//*mmap_ptr = c;
//*core_blk = p;
*st_size = blk_size;
PQclear(res);
LOG_DEBUG("%s: cleared the result from query in
the end",__func__);
//dump_hex(*mmap_ptr,1055);
}
}
}
//LOG_DEBUG("%s() rc = %i",__func__,rc);
return rc;
}