On Tue, Jun 2, 2015 at 4:19 PM, Michael Paquier <[email protected]>
wrote:
> On Sun, May 24, 2015 at 2:43 AM, Noah Misch <[email protected]> wrote:
> > It would be good to purge the code of precisions on "s" conversion
> specifiers,
> > then Assert(!pointflag) in fmtstr() to catch new introductions. I won't
> plan
> > to do it myself, but it would be a nice little defensive change.
>
> This sounds like a good protection idea, but as it impacts existing
> backend code relying in sprintf's port version we should only do the
> assertion in HEAD in my opinion, and mention it in the release notes of the
> next major version at the time a patch in this area is applied. I guess
> that we had better backpatch the places using .*s though on back-branches.
>
Attached is a patch purging a bunch of places from using %.*s, this will
make the code more solid when facing non-ASCII strings. I let pg_upgrade
and pg_basebackup code paths alone as it reduces the code lisibility by
moving out of this separator. We may want to fix them though if file path
names have non-ASCII characters, but it seems less critical.
Thoughts?
--
Michael
diff --git a/src/backend/nodes/read.c b/src/backend/nodes/read.c
index 0dabfa7..910c124 100644
--- a/src/backend/nodes/read.c
+++ b/src/backend/nodes/read.c
@@ -326,8 +326,18 @@ nodeRead(char *token, int tok_len)
break;
val = (int) strtol(token, &endptr, 10);
if (endptr != token + tok_len)
- elog(ERROR, "unrecognized integer: \"%.*s\"",
- tok_len, token);
+ {
+ /*
+ * Cannot use %.*s here because some machines
+ * interpret precision of %s sometimes in
+ * characters or in bytes.
+ */
+ char *buf = (char *) palloc(tok_len + 1);
+ memcpy(buf, token, tok_len);
+ buf[tok_len] = '\0';
+ elog(ERROR, "unrecognized integer: \"%s\"",
+ buf);
+ }
l = lappend_int(l, val);
}
}
@@ -346,8 +356,17 @@ nodeRead(char *token, int tok_len)
break;
val = (Oid) strtoul(token, &endptr, 10);
if (endptr != token + tok_len)
- elog(ERROR, "unrecognized OID: \"%.*s\"",
- tok_len, token);
+ {
+ /*
+ * Cannot use %.*s here because some machines
+ * interpret precision of %s sometimes in
+ * characters or in bytes.
+ */
+ char *buf = (char *) palloc(tok_len + 1);
+ memcpy(buf, token, tok_len);
+ buf[tok_len] = '\0';
+ elog(ERROR, "unrecognized OID: \"%s\"", buf);
+ }
l = lappend_oid(l, val);
}
}
@@ -380,7 +399,14 @@ nodeRead(char *token, int tok_len)
}
else
{
- elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token);
+ /*
+ * Cannot use %.*s here because some machines interpret
+ * precision of %s sometimes in characters or in bytes.
+ */
+ char *buf = (char *) palloc(tok_len + 1);
+ memcpy(buf, token, tok_len);
+ buf[tok_len] = '\0';
+ elog(ERROR, "unrecognized token: \"%s\"", buf);
result = NULL; /* keep compiler happy */
}
break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index f5a40fb..444b54d 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -142,6 +142,13 @@
#define nullable_string(token,length) \
((length) == 0 ? NULL : debackslash(token, length))
+#define error_token(message, token, len) \
+ do { \
+ char *buf = palloc(len + 1); \
+ memcpy(buf, token, len); \
+ buf[len] = '\0'; \
+ elog(ERROR, message, buf); \
+ } while (0);
static Datum readDatum(bool typbyval);
@@ -159,13 +166,13 @@ _readBitmapset(void)
if (token == NULL)
elog(ERROR, "incomplete Bitmapset structure");
if (length != 1 || token[0] != '(')
- elog(ERROR, "unrecognized token: \"%.*s\"", length, token);
+ error_token("unrecognized token: \"%s\"", token, length);
token = pg_strtok(&length);
if (token == NULL)
elog(ERROR, "incomplete Bitmapset structure");
if (length != 1 || token[0] != 'b')
- elog(ERROR, "unrecognized token: \"%.*s\"", length, token);
+ error_token("unrecognized token: \"%s\"", token, length);
for (;;)
{
@@ -179,7 +186,7 @@ _readBitmapset(void)
break;
val = (int) strtol(token, &endptr, 10);
if (endptr != token + length)
- elog(ERROR, "unrecognized integer: \"%.*s\"", length, token);
+ error_token("unrecognized token: \"%s\"", token, length);
result = bms_add_member(result, val);
}
@@ -803,7 +810,7 @@ _readBoolExpr(void)
else if (strncmp(token, "not", 3) == 0)
local_node->boolop = NOT_EXPR;
else
- elog(ERROR, "unrecognized boolop \"%.*s\"", length, token);
+ error_token("unrecognized boolop: \"%s\"", token, length);
READ_NODE_FIELD(args);
READ_LOCATION_FIELD(location);
@@ -1534,7 +1541,10 @@ parseNodeString(void)
return_value = _readDeclareCursorStmt();
else
{
- elog(ERROR, "badly formatted node string \"%.32s\"...", token);
+ char buf[33];
+ memcpy(buf, token, 32);
+ buf[33] = '\0';
+ elog(ERROR, "badly formatted node string \"%s\"...", buf);
return_value = NULL; /* keep compiler quiet */
}
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 18ff9e2..b8fafd1 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -329,13 +329,17 @@ TParserInit(char *str, int len)
prs->state->state = TPS_Base;
#ifdef WPARSER_TRACE
-
- /*
- * Use of %.*s here is a bit risky since it can misbehave if the data is
- * not in what libc thinks is the prevailing encoding. However, since
- * this is just a debugging aid, we choose to live with that.
- */
- fprintf(stderr, "parsing \"%.*s\"\n", len, str);
+ {
+ /*
+ * %.*s is not used here to avoid hazards with libc's prevailing encoding
+ * where precision can be counted as bytes or as characters.
+ */
+ char *buf = (char *) palloc(prs->lenstr + 1);
+ memcpy(buf, prs->str, prs->lenstr);
+ buf[prs->str] = '\0';
+ fprintf(stderr, "parsing \"%s\"\n", buf);
+ pfree(buf);
+ }
#endif
return prs;
@@ -374,8 +378,14 @@ TParserCopyInit(const TParser *orig)
prs->state->state = TPS_Base;
#ifdef WPARSER_TRACE
- /* See note above about %.*s */
- fprintf(stderr, "parsing copy of \"%.*s\"\n", prs->lenstr, prs->str);
+ {
+ char *buf = (char *) palloc(prs->lenstr + 1);
+ memcpy(buf, prs->str, prs->lenstr);
+ buf[prs->str] = '\0';
+ /* See note above about not using %.*s */
+ fprintf(stderr, "parsing copy of \"%s\"\n", buf);
+ pfree(buf);
+ }
#endif
return prs;
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 2a44b6e..0146e9f 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -4005,15 +4005,21 @@ EncodeDateTime(struct pg_tm * tm, fsec_t fsec, bool print_tz, int tz, const char
AppendTimestampSeconds(str + strlen(str), tm, fsec);
/*
- * Note: the uses of %.*s in this function would be risky if the
- * timezone names ever contain non-ASCII characters. However, all
- * TZ abbreviations in the Olson database are plain ASCII.
+ * Note: the use of %.*s in this function would be risky if the
+ * timezone names ever contain non-ASCII characters. All TZ
+ * abbreviations in the Olson database are plain ASCII, still
+ * its use is avoided.
*/
if (print_tz)
{
if (tzn)
- sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn);
+ {
+ char buf[MAXTZLEN + 1];
+ memcpy(buf, tzn, MAXTZLEN);
+ buf[MAXTZLEN] = '\0';
+ sprintf(str + strlen(str), " %s", buf);
+ }
else
EncodeTimezone(str, tz, style);
}
@@ -4036,7 +4042,12 @@ EncodeDateTime(struct pg_tm * tm, fsec_t fsec, bool print_tz, int tz, const char
if (print_tz)
{
if (tzn)
- sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn);
+ {
+ char buf[MAXTZLEN + 1];
+ memcpy(buf, tzn, MAXTZLEN);
+ buf[MAXTZLEN] = '\0';
+ sprintf(str + strlen(str), " %s", buf);
+ }
else
EncodeTimezone(str, tz, style);
}
@@ -4070,7 +4081,12 @@ EncodeDateTime(struct pg_tm * tm, fsec_t fsec, bool print_tz, int tz, const char
if (print_tz)
{
if (tzn)
- sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn);
+ {
+ char buf[MAXTZLEN + 1];
+ memcpy(buf, tzn, MAXTZLEN);
+ buf[MAXTZLEN] = '\0';
+ sprintf(str + strlen(str), " %s", buf);
+ }
else
{
/*
@@ -4368,10 +4384,11 @@ CheckDateTokenTable(const char *tablename, const datetkn *base, int nel)
/* check for token strings that don't fit */
if (strlen(base[i].token) > TOKMAXLEN)
{
- /* %.*s is safe since all our tokens are ASCII */
- elog(LOG, "token too long in %s table: \"%.*s\"",
- tablename,
- TOKMAXLEN + 1, base[i].token);
+ char buf[TOKMAXLEN + 1];
+ memcpy(buf, base[i].token, TOKMAXLEN);
+ buf[TOKMAXLEN] = '\0';
+ elog(LOG, "token too long in %s table: \"%s\"",
+ tablename, buf);
ok = false;
break; /* don't risk applying strcmp */
}
diff --git a/src/interfaces/ecpg/ecpglib/error.c b/src/interfaces/ecpg/ecpglib/error.c
index 715bedb..a40152f 100644
--- a/src/interfaces/ecpg/ecpglib/error.c
+++ b/src/interfaces/ecpg/ecpglib/error.c
@@ -257,8 +257,8 @@ ecpg_raise_backend(int line, PGresult *result, PGconn *conn, int compat)
sqlca->sqlcode = ECPG_PGSQL;
/* %.*s is safe here as long as sqlstate is all-ASCII */
- ecpg_log("raising sqlstate %.*s (sqlcode %ld): %s\n",
- (int) sizeof(sqlca->sqlstate), sqlca->sqlstate, sqlca->sqlcode, sqlca->sqlerrm.sqlerrmc);
+ ecpg_log("raising sqlstate %s (sqlcode %ld): %s\n",
+ sqlstate, sqlca->sqlcode, sqlca->sqlerrm.sqlerrmc);
/* free all memory we have allocated for the user */
ECPGfree_auto_mem();
diff --git a/src/interfaces/ecpg/pgtypeslib/dt_common.c b/src/interfaces/ecpg/pgtypeslib/dt_common.c
index 7fe2982..b554201 100644
--- a/src/interfaces/ecpg/pgtypeslib/dt_common.c
+++ b/src/interfaces/ecpg/pgtypeslib/dt_common.c
@@ -851,16 +851,15 @@ EncodeDateTime(struct tm * tm, fsec_t fsec, bool print_tz, int tz, const char *t
if (tm->tm_year <= 0)
sprintf(str + strlen(str), " BC");
- /*
- * Note: the uses of %.*s in this function would be risky if the
- * timezone names ever contain non-ASCII characters. However, all
- * TZ abbreviations in the Olson database are plain ASCII.
- */
-
if (print_tz)
{
if (tzn)
- sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn);
+ {
+ char buf[MAXTZLEN + 1];
+ memcpy(buf, tzn, MAXTZLEN);
+ buf[MAXTZLEN] = '\0';
+ sprintf(str + strlen(str), " %s", buf);
+ }
else
{
hour = -(tz / SECS_PER_HOUR);
@@ -909,7 +908,12 @@ EncodeDateTime(struct tm * tm, fsec_t fsec, bool print_tz, int tz, const char *t
if (print_tz)
{
if (tzn)
- sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn);
+ {
+ char buf[MAXTZLEN + 1];
+ memcpy(buf, tzn, MAXTZLEN);
+ buf[MAXTZLEN] = '\0';
+ sprintf(str + strlen(str), " %s", buf);
+ }
else
{
hour = -(tz / SECS_PER_HOUR);
@@ -968,7 +972,12 @@ EncodeDateTime(struct tm * tm, fsec_t fsec, bool print_tz, int tz, const char *t
if (print_tz)
{
if (tzn)
- sprintf(str + strlen(str), " %.*s", MAXTZLEN, tzn);
+ {
+ char buf[MAXTZLEN + 1];
+ memcpy(buf, tzn, MAXTZLEN);
+ buf[MAXTZLEN] = '\0';
+ sprintf(str + strlen(str), " %s", buf);
+ }
else
{
/*
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers