On Tue, Jun 2, 2015 at 4:19 PM, Michael Paquier <michael.paqu...@gmail.com> wrote:
> On Sun, May 24, 2015 at 2:43 AM, Noah Misch <n...@leadboat.com> wrote: > > It would be good to purge the code of precisions on "s" conversion > specifiers, > > then Assert(!pointflag) in fmtstr() to catch new introductions. I won't > plan > > to do it myself, but it would be a nice little defensive change. > > This sounds like a good protection idea, but as it impacts existing > backend code relying in sprintf's port version we should only do the > assertion in HEAD in my opinion, and mention it in the release notes of the > next major version at the time a patch in this area is applied. I guess > that we had better backpatch the places using .*s though on back-branches. > Attached is a patch purging a bunch of places from using %.*s, this will make the code more solid when facing non-ASCII strings. I let pg_upgrade and pg_basebackup code paths alone as it reduces the code lisibility by moving out of this separator. We may want to fix them though if file path names have non-ASCII characters, but it seems less critical. Thoughts? -- Michael
diff --git a/src/backend/nodes/read.c b/src/backend/nodes/read.c index 0dabfa7..910c124 100644 --- a/src/backend/nodes/read.c +++ b/src/backend/nodes/read.c @@ -326,8 +326,18 @@ nodeRead(char *token, int tok_len) break; val = (int) strtol(token, &endptr, 10); if (endptr != token + tok_len) - elog(ERROR, "unrecognized integer: \"%.*s\"", - tok_len, token); + { + /* + * Cannot use %.*s here because some machines + * interpret precision of %s sometimes in + * characters or in bytes. + */ + char *buf = (char *) palloc(tok_len + 1); + memcpy(buf, token, tok_len); + buf[tok_len] = '\0'; + elog(ERROR, "unrecognized integer: \"%s\"", + buf); + } l = lappend_int(l, val); } } @@ -346,8 +356,17 @@ nodeRead(char *token, int tok_len) break; val = (Oid) strtoul(token, &endptr, 10); if (endptr != token + tok_len) - elog(ERROR, "unrecognized OID: \"%.*s\"", - tok_len, token); + { + /* + * Cannot use %.*s here because some machines + * interpret precision of %s sometimes in + * characters or in bytes. + */ + char *buf = (char *) palloc(tok_len + 1); + memcpy(buf, token, tok_len); + buf[tok_len] = '\0'; + elog(ERROR, "unrecognized OID: \"%s\"", buf); + } l = lappend_oid(l, val); } } @@ -380,7 +399,14 @@ nodeRead(char *token, int tok_len) } else { - elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token); + /* + * Cannot use %.*s here because some machines interpret + * precision of %s sometimes in characters or in bytes. + */ + char *buf = (char *) palloc(tok_len + 1); + memcpy(buf, token, tok_len); + buf[tok_len] = '\0'; + elog(ERROR, "unrecognized token: \"%s\"", buf); result = NULL; /* keep compiler happy */ } break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index f5a40fb..444b54d 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -142,6 +142,13 @@ #define nullable_string(token,length) \ ((length) == 0 ? NULL : debackslash(token, length)) +#define error_token(message, token, len) \ + do { \ + char *buf = palloc(len + 1); \ + memcpy(buf, token, len); \ + buf[len] = '\0'; \ + elog(ERROR, message, buf); \ + } while (0); static Datum readDatum(bool typbyval); @@ -159,13 +166,13 @@ _readBitmapset(void) if (token == NULL) elog(ERROR, "incomplete Bitmapset structure"); if (length != 1 || token[0] != '(') - elog(ERROR, "unrecognized token: \"%.*s\"", length, token); + error_token("unrecognized token: \"%s\"", token, length); token = pg_strtok(&length); if (token == NULL) elog(ERROR, "incomplete Bitmapset structure"); if (length != 1 || token[0] != 'b') - elog(ERROR, "unrecognized token: \"%.*s\"", length, token); + error_token("unrecognized token: \"%s\"", token, length); for (;;) { @@ -179,7 +186,7 @@ _readBitmapset(void) break; val = (int) strtol(token, &endptr, 10); if (endptr != token + length) - elog(ERROR, "unrecognized integer: \"%.*s\"", length, token); + error_token("unrecognized token: \"%s\"", token, length); result = bms_add_member(result, val); } @@ -803,7 +810,7 @@ _readBoolExpr(void) else if (strncmp(token, "not", 3) == 0) local_node->boolop = NOT_EXPR; else - elog(ERROR, "unrecognized boolop \"%.*s\"", length, token); + error_token("unrecognized boolop: \"%s\"", token, length); READ_NODE_FIELD(args); READ_LOCATION_FIELD(location); @@ -1534,7 +1541,10 @@ parseNodeString(void) return_value = _readDeclareCursorStmt(); else { - elog(ERROR, "badly formatted node string \"%.32s\"...", token); + char buf[33]; + memcpy(buf, token, 32); + buf[33] = '\0'; + elog(ERROR, "badly formatted node string \"%s\"...", buf); return_value = NULL; /* keep compiler quiet */ } diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index 18ff9e2..b8fafd1 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -329,13 +329,17 @@ TParserInit(char *str, int len) prs->state->state = TPS_Base; #ifdef WPARSER_TRACE - - /* - * Use of %.*s here is a bit risky since it can misbehave if the data is - * not in what libc thinks is the prevailing encoding. However, since - * this is just a debugging aid, we choose to live with that. - */ - fprintf(stderr, "parsing \"%.*s\"\n", len, str); + { + /* + * %.*s is not used here to avoid hazards with libc's prevailing encoding + * where precision can be counted as bytes or as characters. + */ + char *buf = (char *) palloc(prs->lenstr + 1); + memcpy(buf, prs->str, prs->lenstr); + buf[prs->str] = '\0'; + fprintf(stderr, "parsing \"%s\"\n", buf); + pfree(buf); + } #endif return prs; @@ -374,8 +378,14 @@ TParserCopyInit(const TParser *orig) prs->state->state = TPS_Base; #ifdef WPARSER_TRACE - /* See note above about %.*s */ - fprintf(stderr, "parsing copy of \"%.*s\"\n", prs->lenstr, prs->str); + { + char *buf = (char *) palloc(prs->lenstr + 1); + memcpy(buf, prs->str, prs->lenstr); + buf[prs->str] = '\0'; + /* See note above about not using %.*s */ + fprintf(stderr, "parsing copy of \"%s\"\n", buf); + pfree(buf); + } #endif return prs; diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c index 2a44b6e..0146e9f 100644 --- a/src/backend/utils/adt/datetime.c +++ b/src/backend/utils/adt/datetime.c @@ -4005,15 +4005,21 @@ EncodeDateTime(struct pg_tm * tm, fsec_t fsec, bool print_tz, int tz, const char AppendTimestampSeconds(str + strlen(str), tm, fsec); /* - * Note: the uses of %.*s in this function would be risky if the - * timezone names ever contain non-ASCII characters. However, all - * TZ abbreviations in the Olson database are plain ASCII. + * Note: the use of %.*s in this function would be risky if the + * timezone names ever contain non-ASCII characters. All TZ + * abbreviations in the Olson database are plain ASCII, still + * its use is avoided. */ if (print_tz) { if (tzn) - sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn); + { + char buf[MAXTZLEN + 1]; + memcpy(buf, tzn, MAXTZLEN); + buf[MAXTZLEN] = '\0'; + sprintf(str + strlen(str), " %s", buf); + } else EncodeTimezone(str, tz, style); } @@ -4036,7 +4042,12 @@ EncodeDateTime(struct pg_tm * tm, fsec_t fsec, bool print_tz, int tz, const char if (print_tz) { if (tzn) - sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn); + { + char buf[MAXTZLEN + 1]; + memcpy(buf, tzn, MAXTZLEN); + buf[MAXTZLEN] = '\0'; + sprintf(str + strlen(str), " %s", buf); + } else EncodeTimezone(str, tz, style); } @@ -4070,7 +4081,12 @@ EncodeDateTime(struct pg_tm * tm, fsec_t fsec, bool print_tz, int tz, const char if (print_tz) { if (tzn) - sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn); + { + char buf[MAXTZLEN + 1]; + memcpy(buf, tzn, MAXTZLEN); + buf[MAXTZLEN] = '\0'; + sprintf(str + strlen(str), " %s", buf); + } else { /* @@ -4368,10 +4384,11 @@ CheckDateTokenTable(const char *tablename, const datetkn *base, int nel) /* check for token strings that don't fit */ if (strlen(base[i].token) > TOKMAXLEN) { - /* %.*s is safe since all our tokens are ASCII */ - elog(LOG, "token too long in %s table: \"%.*s\"", - tablename, - TOKMAXLEN + 1, base[i].token); + char buf[TOKMAXLEN + 1]; + memcpy(buf, base[i].token, TOKMAXLEN); + buf[TOKMAXLEN] = '\0'; + elog(LOG, "token too long in %s table: \"%s\"", + tablename, buf); ok = false; break; /* don't risk applying strcmp */ } diff --git a/src/interfaces/ecpg/ecpglib/error.c b/src/interfaces/ecpg/ecpglib/error.c index 715bedb..a40152f 100644 --- a/src/interfaces/ecpg/ecpglib/error.c +++ b/src/interfaces/ecpg/ecpglib/error.c @@ -257,8 +257,8 @@ ecpg_raise_backend(int line, PGresult *result, PGconn *conn, int compat) sqlca->sqlcode = ECPG_PGSQL; /* %.*s is safe here as long as sqlstate is all-ASCII */ - ecpg_log("raising sqlstate %.*s (sqlcode %ld): %s\n", - (int) sizeof(sqlca->sqlstate), sqlca->sqlstate, sqlca->sqlcode, sqlca->sqlerrm.sqlerrmc); + ecpg_log("raising sqlstate %s (sqlcode %ld): %s\n", + sqlstate, sqlca->sqlcode, sqlca->sqlerrm.sqlerrmc); /* free all memory we have allocated for the user */ ECPGfree_auto_mem(); diff --git a/src/interfaces/ecpg/pgtypeslib/dt_common.c b/src/interfaces/ecpg/pgtypeslib/dt_common.c index 7fe2982..b554201 100644 --- a/src/interfaces/ecpg/pgtypeslib/dt_common.c +++ b/src/interfaces/ecpg/pgtypeslib/dt_common.c @@ -851,16 +851,15 @@ EncodeDateTime(struct tm * tm, fsec_t fsec, bool print_tz, int tz, const char *t if (tm->tm_year <= 0) sprintf(str + strlen(str), " BC"); - /* - * Note: the uses of %.*s in this function would be risky if the - * timezone names ever contain non-ASCII characters. However, all - * TZ abbreviations in the Olson database are plain ASCII. - */ - if (print_tz) { if (tzn) - sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn); + { + char buf[MAXTZLEN + 1]; + memcpy(buf, tzn, MAXTZLEN); + buf[MAXTZLEN] = '\0'; + sprintf(str + strlen(str), " %s", buf); + } else { hour = -(tz / SECS_PER_HOUR); @@ -909,7 +908,12 @@ EncodeDateTime(struct tm * tm, fsec_t fsec, bool print_tz, int tz, const char *t if (print_tz) { if (tzn) - sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn); + { + char buf[MAXTZLEN + 1]; + memcpy(buf, tzn, MAXTZLEN); + buf[MAXTZLEN] = '\0'; + sprintf(str + strlen(str), " %s", buf); + } else { hour = -(tz / SECS_PER_HOUR); @@ -968,7 +972,12 @@ EncodeDateTime(struct tm * tm, fsec_t fsec, bool print_tz, int tz, const char *t if (print_tz) { if (tzn) - sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn); + { + char buf[MAXTZLEN + 1]; + memcpy(buf, tzn, MAXTZLEN); + buf[MAXTZLEN] = '\0'; + sprintf(str + strlen(str), " %s", buf); + } else { /*
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers