Hi Iwata-san I reviewed v10-0001-libpq-trace.patch. (But I don't check recent discussion...) I found some bugs. I'm suggesting some refactoring.
**** @@ -6809,7 +6809,17 @@ PQtrace(PGconn *conn, FILE *debug_port) + if (pqTraceInit(conn)) + { + conn->Pfdebug = debug_port; + setlinebuf(conn->Pfdebug); If debug_port is NULL, setlinebuf() causes segmentation fault. We should have policy that libpq-trace framework works only when Pfdebug is not NULL. For example, we should also think that PQtrace(conn, NULL) has same effect as PQuntrace(conn). **** + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* \x65 ... \0x6d */ There should be 9 zero, but 15 zero. **** @@ -85,7 +228,7 @@ pqGetc(char *result, PGconn *conn) if (conn->Pfdebug) - fprintf(conn->Pfdebug, "From backend> %c\n", *result); + pqLogMsgByte1(conn, *result, FROM_BACKEND); I suggest to move confirming Pfdebug to pqLogMsgByte1() and other logging functions. If you want to avoid overhead of calling function, use macro function like the following: #define pqLogMsgByte1(CONN, CH, SOURCE) \ ((CONN)->Pfdebug ? pqLogMsgByte1(CONN, CH, SOURCE) : 0) **** @@ -168,7 +310,7 @@ pqPuts(const char *s, PGconn *conn) if (pqPutMsgBytes(s, strlen(s) + 1, conn)) return EOF; if (conn->Pfdebug) - fprintf(conn->Pfdebug, "To backend> \"%s\"\n", s); + pqStoreFrontendMsg(conn, LOG_STRING, strlen(s) + 1); It's better that you store strlen(s) to local variable and use it. strlen(s) is not cost-free. **** @@ -301,11 +431,15 @@ pqPutInt(int value, size_t bytes, PGconn *conn) tmp2 = pg_hton16((uint16) value); if (pqPutMsgBytes((const char *) &tmp2, 2, conn)) return EOF; + if (conn->Pfdebug) + pqStoreFrontendMsg(conn, LOG_INT16, 2); break; case 4: tmp4 = pg_hton32((uint32) value); if (pqPutMsgBytes((const char *) &tmp4, 4, conn)) return EOF; + if (conn->Pfdebug) + pqStoreFrontendMsg(conn, LOG_INT32, 4); break It's better to make the style same as pqGetInt(). (It is not important.) switch(bytes) { case 2: type = LOG_INT16; break; case 4: type = LOG_INT32; break; } pqStoreFrontendMsg(conn, type, bytes); **** +pqTraceInit(PGconn *conn) +{ + conn->fe_msg = malloc(MAX_FRONTEND_MSGS * sizeof(pqFrontendMessage)); + if (conn->fe_msg == NULL) + { + free(conn->be_msg); + return false; Maybe, we need to clear conn->be_msg by NULL for next calling pqTraceInit(). **** +pqTraceInit(PGconn *conn) +{ + conn->fe_msg = malloc(MAX_FRONTEND_MSGS * sizeof(pqFrontendMessage)); + conn->n_fe_msgs = 0; Data structure design is odd. Frontend message is basically constructed as the following: - message type - message length - field - field - field So the deisign may be as the follwong and remove n_fe_msg from pg_conn. typedef struct pqFrontendMessageField { int offset_in_buffer; message_addr is not real address. int length; message_length is wrong name. it is length of FIELD. } pqFrontendMessageField; typedef struct pqFrontendMessage { PGLogMsgDataType type; int field_num; pqFrontendMessageField fields[FLEXIBLE_ARRAY_MEMBER]; } pqFrontendMessage; And then pqTraceInit() is as the following. conn->fe_msg = malloc(sizeof(pqFrontendMessage) + MAX_FRONTEND_MSG_FIELDS * sizeof(pqFrontendMessage)); conn->fe_msg->field_num = 0; **** +pqGetProtocolMsgType(unsigned char c, PGCommSource commsource) + return "UnknownCommand"; It is "UnknownMessageType", isn't it? **** + * XXX -- ?? + * Message length is added at the last if message is sent by the frontend. + * To arrange the log output format, frontend message contents are stored in the list. I understand as the following: * The message length is fixed after putting the last field, * but message length should be print before printing any field. * So we must store the field data in memory. **** +pqStoreFrontendMsg(PGconn *conn, PGLogMsgDataType type, int length) + case LOG_STRING: + memcpy(&message, conn->outBuffer + conn->outMsgEnd - length, length); + fprintf(conn->Pfdebug, "To backend> \"%c\"\n", message); %s is correct. At least, %c is not correct. **** +pqStoreFrontendMsg(PGconn *conn, PGLogMsgDataType type, int length) + case LOG_BYTE1: + memcpy(&message, conn->outBuffer + conn->outMsgEnd - length, length); It is unnecessary to call memcpy(). LOG_BYTE1, LOG_STRING, and LOG_NCHAR can be passed its pointer directly to fprintf(). You can also pass LOG_INT* data with casting to fprintf() without memcpy(), but I think either is fine. **** + case LOG_INT16: + fprintf(conn->Pfdebug, "To backend (#%d)> %c\n", length, result); + case LOG_INT32: + fprintf(conn->Pfdebug, "To backend (#%d)> %c\n", length, result); %d is correct. At least, %c is not correct. **** +pqStoreFrontendMsg(PGconn *conn, PGLogMsgDataType type, int length) + char message; + uint16 result16 = 0; + uint32 result32 = 0; + int result = 0; It's better that these variables declared in each block. Initializing result* is unnecessary. case LOG_INT16: { uint16 wk; memcpy(&wk, conn->outBuffer + conn->outMsgEnd - 2, 2); wk = pg_ntoh16(wk); fprintf(conn->Pfdebug, "To backend (#%d)> %h\n", 2, wk); break; } **** +pqLogFrontendMsg(PGconn *conn) + int message_addr; + int length; + char message; + uint16 result16 = 0; + uint32 result32 = 0; + int result = 0; Same as pqStoreFrontendMsg(). + memcpy(&message, conn->outBuffer + message_addr, length); Same as pqStoreFrontendMsg(). **** +pqLogMsgByte1(PGconn *conn, char v, PGCommSource commsource) + char *message_source = commsource == FROM_BACKEND ? "<" : ">"; I understand that 'commsource' means source, but message_source means direction of sending. It is better that the variable is named message_direction. **** +pqLogMsgByte1(PGconn *conn, char v, PGCommSource commsource) + case LOG_FIRST_BYTE: + if (v == '\0') + return; Maybe is it needed for packets whose msg_type is '\0'? I get the following output because pqLogMsgnchar() is called at conn->be_msg->state == LENGTH. I attach a program for reproduction. 2020-12-25 00:58:48 UTC > StartupMessage :::Invalid Protocol I think confusing transition of state causes it. I suggest refactoring instead of fixing the above bug. msg_type of Startup packet, SSLRequest packet, and GSSNegotiate packet is '\0'. (I guess GSSNegotiate packet may be forgotten). At these packet, Byte1 is not sent actually, but if libpq-trace framework consider that it is sent, the transition may become more clear like the following: pqLogMsgByte1(PGconn *conn, char v, PGCommSource commsource) : fprintf(conn->Pfdebug, "%s %s ", timestr, message_source); /* * True type of message tagged '\0' is known when next 4 bytes is checked. * So we delay printing message type to pqLogMsgInt(). */ if (v != '\0') fprintf(conn->Pfdebug, "%s ", pqGetProtocolMsgType((unsigned char) v, commsource)); conn->be_msg->state = LOG_LENGTH; conn->be_msg->command = v; pqLogMsgInt(PGconn *conn, int v, int length, PGCommSource commsource) : /* remove case LOG_FIRST_BYTE and... */ case LOG_LENGTH: if (conn->be_msg->command == '\0') { /* We delayed to print message type for special message. */ message_addr = conn->fe_msg[0].message_addr; memcpy(&result32, conn->outBuffer + message_addr, 4); result = (int) pg_ntoh32(result32); if (result == NEGOTIATE_SSL_CODE) message_type = "SSLRequest"; else if (result == NEGOTIATE_GSS_CODE) message_type = "GSSRequest"; else message_type = "StartupMessage"; fprintf(conn->Pfdebug, "%s ", message_type); } fprintf(conn->Pfdebug, "%d", v); conn->be_msg->length = v - length; conn->be_msg->state = LOG_CONTENTS; pqTraceMaybeBreakLine(0, conn); break; **** +pqLogMsgByte1(PGconn *conn, char v, PGCommSource commsource) + case LOG_CONTENTS: + /* Suppresses printing terminating zero byte */ + if (v == '\0') + fprintf(conn->Pfdebug, "0"); Is the above code for end of 'E' or 'N' message? We should comment because it is special case. Additionally, we may confuse whether 0 is numerical character or '\0'. /* * 'E' or 'N' message format is as the following: * 'E' len [byte1-tag string(null-terminated)]* \0(eof-tag) * * So we detect eof-tag at pqLogMsgByte1() with LOG_CONTENTS state. * The eof-tag is data so we should print it. * On the way, we care other non-ascii character. */ if (!isascii(v)) fprintf(conn->Pfdebug, "\\x%02x", v); **** +pqLogMsgByte1(PGconn *conn, char v, PGCommSource commsource) + if (PG_PROTOCOL_MAJOR(conn->pversion) >= 3) + else + fprintf(conn->Pfdebug, "FROM backend> %c\n", v); Umm. Also see the following. +pqLogMsgInt(PGconn *conn, int v, int length, PGCommSource commsource) + if (PG_PROTOCOL_MAJOR(conn->pversion) >= 3) + else + fprintf(conn->Pfdebug, "To backend (#%d)> %d\n", length, v); In the later case, "FROM backend" is correct. The bug is caused by confusing design. I suggest to refactor that: - Move callings fprintf() in pqStoreFrontendMsg() to each pqLogMsgXXX() - pqStoreFrontendMsg() calls each pqLogMsgXXX(). **** +pqLogMsgString(PGconn *conn, const char *v, int length, PGCommSource commsource) +{ + if (length < 0) + length = strlen(v) + 1; I cannot find length==-1 route. **** +pqLogMsgnchar(PGconn *conn, const char *v, int length, PGCommSource commsource) + fprintf(conn->Pfdebug, "\'"); + fwrite(v, 1, length, conn->Pfdebug); + fprintf(conn->Pfdebug, "\'"); We should consider that 'v' is binary data. So it is better to convert non-ascii character to ascii (e.g. x%02x format). e.g. case of StartupPacket 2020-12-25 08:07:42 UTC > StartupMessage 40 'userk5userdatabasetemplate1' The length of StartupPacket includes length itselft. The length of 'user...tepmlate1' is only 27. We could not find 8 bytes(40 - 4 - 27 = 8). If non-ascii character is printed, we can find them. 2020-12-25 08:29:19 UTC > StartupMessage 40 '\x00\x03\x00\x00user\x00k5user\x00database\x00template1\x00\x00' static void pqLogMsgBinary(PGconn *conn, const char *v, int length, PGCommSource commsource) { int i, pin; for (pin = i = 0; i < length; ++i) { if (isprint(v[i])) continue; else { fwrite(v + pin, 1, i - pin, conn->Pfdebug); fprintf(conn->Pfdebug, "\\x%02x", v[i]); pin = i + 1; } } if (pin < length) fwrite(v + pin, 1, length - pin, conn->Pfdebug); } **** @@ -123,6 +123,9 @@ pqParseInput3(PGconn *conn) + /* Terminate a half-finished logging message */ + if (conn->Pfdebug) + pqTraceMaybeBreakLine(msgLength, conn); return; I understand that the above code only want to do [conn->be_msg->length = 0]. If it is true, you should create new wrapper function like pqTraceForcelyTerminateMessage() and handleSyncLoss() itself should call pqTraceForcelyTerminateMessage(). pqTraceForcelyTerminateMessage() { if (conn->be_msg->length > 0) fprintf(conn->Pfdebug, "\n"); pqTraceResetBeMsg(conn); } **** pqSaveParameterStatus(PGconn *conn, const char *name, const char *value) if (conn->Pfdebug) fprintf(conn->Pfdebug, "pqSaveParameterStatus: '%s' = '%s'\n", name, value); I think the above code has become to be unnecessary because new pqTrace framework become to print it. Regards Ryo Matsumura
#include <stdio.h> #include "libpq-fe.h" int main(int ac, char **av) { PGconn *conn = PQconnectStart(av[1]); PQtrace(conn, stdout); while (1) { PQconnectPoll(conn); if (PQstatus(conn) == CONNECTION_OK) break; else if (PQstatus(conn) == CONNECTION_BAD) { fprintf(stderr, "could not connect: %s\n", PQerrorMessage(conn)); break; } } PQfinish(conn); return 0; }