This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 884e3088295c2032962454271e4ae5c3fbb1b748
Author: Andres Freund <[email protected]>
AuthorDate: Mon Feb 10 10:03:40 2025 -0500

    Specify the encoding of input to fmtId()
    
    This commit adds fmtIdEnc() and fmtQualifiedIdEnc(), which allow to specify
    the encoding as an explicit argument.  Additionally setFmtEncoding() is
    provided, which defines the encoding when no explicit encoding is provided, 
to
    avoid breaking all code using fmtId().
    
    All users of fmtId()/fmtQualifiedId() are either converted to the explicit
    version or a call to setFmtEncoding() has been added.
    
    This commit does not yet utilize the now well-defined encoding, that will
    happen in a subsequent commit.
    
    Reviewed-by: Noah Misch <[email protected]>
    Reviewed-by: Tom Lane <[email protected]>
    Backpatch-through: 13
    Security: CVE-2025-1094
---
 src/bin/pg_dump/pg_backup_archiver.c |  1 +
 src/bin/pg_dump/pg_dump.c            |  1 +
 src/bin/pg_dump/pg_dumpall.c         |  1 +
 src/bin/psql/command.c               |  3 ++
 src/bin/scripts/common.c             |  5 ++-
 src/bin/scripts/createdb.c           |  2 +
 src/bin/scripts/createuser.c         |  2 +
 src/bin/scripts/dropdb.c             | 13 +++---
 src/bin/scripts/dropuser.c           |  3 +-
 src/bin/scripts/reindexdb.c          | 11 +++--
 src/bin/scripts/vacuumdb.c           |  5 ++-
 src/fe_utils/string_utils.c          | 85 +++++++++++++++++++++++++++++++++---
 src/include/fe_utils/string_utils.h  |  3 ++
 13 files changed, 113 insertions(+), 22 deletions(-)

diff --git a/src/bin/pg_dump/pg_backup_archiver.c 
b/src/bin/pg_dump/pg_backup_archiver.c
index 4435f0a1a9..5a75eedbea 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2723,6 +2723,7 @@ processEncodingEntry(ArchiveHandle *AH, TocEntry *te)
                        fatal("unrecognized encoding \"%s\"",
                                  ptr1);
                AH->public.encoding = encoding;
+               setFmtEncoding(encoding);
        }
        else
                fatal("invalid ENCODING item: %s",
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index ad75cd2d2e..972b2ff028 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -1435,6 +1435,7 @@ setup_connection(Archive *AH, const char *dumpencoding,
         * we know how to escape strings.
         */
        AH->encoding = PQclientEncoding(conn);
+       setFmtEncoding(AH->encoding);
 
        std_strings = PQparameterStatus(conn, "standard_conforming_strings");
        AH->std_strings = (std_strings && strcmp(std_strings, "on") == 0);
diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c
index 7aae962152..516b45fe5e 100644
--- a/src/bin/pg_dump/pg_dumpall.c
+++ b/src/bin/pg_dump/pg_dumpall.c
@@ -577,6 +577,7 @@ main(int argc, char *argv[])
         * we know how to escape strings.
         */
        encoding = PQclientEncoding(conn);
+       setFmtEncoding(encoding);
        std_strings = PQparameterStatus(conn, "standard_conforming_strings");
        if (!std_strings)
                std_strings = "off";
diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c
index cd2820f73c..4215504dc1 100644
--- a/src/bin/psql/command.c
+++ b/src/bin/psql/command.c
@@ -1280,6 +1280,7 @@ exec_command_encoding(PsqlScanState scan_state, bool 
active_branch)
                                /* save encoding info into psql internal data */
                                pset.encoding = PQclientEncoding(pset.db);
                                pset.popt.topt.encoding = pset.encoding;
+                               setFmtEncoding(pset.encoding);
                                SetVariable(pset.vars, "ENCODING",
                                                        
pg_encoding_to_char(pset.encoding));
                        }
@@ -3676,6 +3677,8 @@ SyncVariables(void)
        pset.popt.topt.encoding = pset.encoding;
        pset.sversion = PQserverVersion(pset.db);
 
+       setFmtEncoding(pset.encoding);
+
        SetVariable(pset.vars, "DBNAME", PQdb(pset.db));
        SetVariable(pset.vars, "USER", PQuser(pset.db));
        SetVariable(pset.vars, "HOST", PQhost(pset.db));
diff --git a/src/bin/scripts/common.c b/src/bin/scripts/common.c
index 1180e2f68b..0228c470de 100644
--- a/src/bin/scripts/common.c
+++ b/src/bin/scripts/common.c
@@ -113,8 +113,9 @@ appendQualifiedRelation(PQExpBuffer buf, const char *spec,
                exit(1);
        }
        appendPQExpBufferStr(buf,
-                                                fmtQualifiedId(PQgetvalue(res, 
0, 1),
-                                                                               
PQgetvalue(res, 0, 0)));
+                                                
fmtQualifiedIdEnc(PQgetvalue(res, 0, 1),
+                                                                               
   PQgetvalue(res, 0, 0),
+                                                                               
   PQclientEncoding(conn)));
        appendPQExpBufferStr(buf, columns);
        PQclear(res);
        termPQExpBuffer(&sql);
diff --git a/src/bin/scripts/createdb.c b/src/bin/scripts/createdb.c
index 041454f075..56cad2c928 100644
--- a/src/bin/scripts/createdb.c
+++ b/src/bin/scripts/createdb.c
@@ -191,6 +191,8 @@ main(int argc, char *argv[])
 
        conn = connectMaintenanceDatabase(&cparams, progname, echo);
 
+       setFmtEncoding(PQclientEncoding(conn));
+
        initPQExpBuffer(&sql);
 
        appendPQExpBuffer(&sql, "CREATE DATABASE %s",
diff --git a/src/bin/scripts/createuser.c b/src/bin/scripts/createuser.c
index ef7e0e549f..33a378ab7d 100644
--- a/src/bin/scripts/createuser.c
+++ b/src/bin/scripts/createuser.c
@@ -263,6 +263,8 @@ main(int argc, char *argv[])
 
        conn = connectMaintenanceDatabase(&cparams, progname, echo);
 
+       setFmtEncoding(PQclientEncoding(conn));
+
        initPQExpBuffer(&sql);
 
        printfPQExpBuffer(&sql, "CREATE ROLE %s", fmtId(newuser));
diff --git a/src/bin/scripts/dropdb.c b/src/bin/scripts/dropdb.c
index b154ed1bb6..1d1756ab9b 100644
--- a/src/bin/scripts/dropdb.c
+++ b/src/bin/scripts/dropdb.c
@@ -128,13 +128,6 @@ main(int argc, char *argv[])
                        exit(0);
        }
 
-       initPQExpBuffer(&sql);
-
-       appendPQExpBuffer(&sql, "DROP DATABASE %s%s%s;",
-                                         (if_exists ? "IF EXISTS " : ""),
-                                         fmtId(dbname),
-                                         force ? " WITH (FORCE)" : "");
-
        /* Avoid trying to drop postgres db while we are connected to it. */
        if (maintenance_db == NULL && strcmp(dbname, "postgres") == 0)
                maintenance_db = "template1";
@@ -148,6 +141,12 @@ main(int argc, char *argv[])
 
        conn = connectMaintenanceDatabase(&cparams, progname, echo);
 
+       initPQExpBuffer(&sql);
+       appendPQExpBuffer(&sql, "DROP DATABASE %s%s%s;",
+                                         (if_exists ? "IF EXISTS " : ""),
+                                         fmtIdEnc(dbname, 
PQclientEncoding(conn)),
+                                         force ? " WITH (FORCE)" : "");
+
        if (echo)
                printf("%s\n", sql.data);
        result = PQexec(conn, sql.data);
diff --git a/src/bin/scripts/dropuser.c b/src/bin/scripts/dropuser.c
index 61b8557bc7..bc13bf3028 100644
--- a/src/bin/scripts/dropuser.c
+++ b/src/bin/scripts/dropuser.c
@@ -142,7 +142,8 @@ main(int argc, char *argv[])
 
        initPQExpBuffer(&sql);
        appendPQExpBuffer(&sql, "DROP ROLE %s%s;",
-                                         (if_exists ? "IF EXISTS " : ""), 
fmtId(dropuser));
+                                         (if_exists ? "IF EXISTS " : ""),
+                                         fmtIdEnc(dropuser, 
PQclientEncoding(conn)));
 
        if (echo)
                printf("%s\n", sql.data);
diff --git a/src/bin/scripts/reindexdb.c b/src/bin/scripts/reindexdb.c
index 2a2f09b5da..73b72524f5 100644
--- a/src/bin/scripts/reindexdb.c
+++ b/src/bin/scripts/reindexdb.c
@@ -536,7 +536,8 @@ run_reindex_command(PGconn *conn, ReindexType type, const 
char *name,
 
        if (tablespace)
        {
-               appendPQExpBuffer(&sql, "%sTABLESPACE %s", sep, 
fmtId(tablespace));
+               appendPQExpBuffer(&sql, "%sTABLESPACE %s", sep,
+                                                 fmtIdEnc(tablespace, 
PQclientEncoding(conn)));
                sep = comma;
        }
 
@@ -576,7 +577,8 @@ run_reindex_command(PGconn *conn, ReindexType type, const 
char *name,
        {
                case REINDEX_DATABASE:
                case REINDEX_SYSTEM:
-                       appendPQExpBufferStr(&sql, fmtId(name));
+                       appendPQExpBufferStr(&sql,
+                                                                fmtIdEnc(name, 
PQclientEncoding(conn)));
                        break;
                case REINDEX_INDEX:
                case REINDEX_TABLE:
@@ -744,8 +746,9 @@ get_parallel_object_list(PGconn *conn, ReindexType type,
        for (i = 0; i < ntups; i++)
        {
                appendPQExpBufferStr(&buf,
-                                                        
fmtQualifiedId(PQgetvalue(res, i, 1),
-                                                                               
        PQgetvalue(res, i, 0)));
+                                                        
fmtQualifiedIdEnc(PQgetvalue(res, i, 1),
+                                                                               
           PQgetvalue(res, i, 0),
+                                                                               
           PQclientEncoding(conn)));
 
                simple_string_list_append(tables, buf.data);
                resetPQExpBuffer(&buf);
diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c
index 04a7273b0a..fc53d0d7a8 100644
--- a/src/bin/scripts/vacuumdb.c
+++ b/src/bin/scripts/vacuumdb.c
@@ -692,8 +692,9 @@ vacuum_one_database(ConnParams *cparams,
        for (i = 0; i < ntups; i++)
        {
                appendPQExpBufferStr(&buf,
-                                                        
fmtQualifiedId(PQgetvalue(res, i, 1),
-                                                                               
        PQgetvalue(res, i, 0)));
+                                                        
fmtQualifiedIdEnc(PQgetvalue(res, i, 1),
+                                                                               
           PQgetvalue(res, i, 0),
+                                                                               
           PQclientEncoding(conn)));
 
                if (tables_listed && !PQgetisnull(res, i, 2))
                        appendPQExpBufferStr(&buf, PQgetvalue(res, i, 2));
diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c
index bec73cca22..ae769012e8 100644
--- a/src/fe_utils/string_utils.c
+++ b/src/fe_utils/string_utils.c
@@ -19,6 +19,7 @@
 
 #include "common/keywords.h"
 #include "fe_utils/string_utils.h"
+#include "mb/pg_wchar.h"
 
 static PQExpBuffer defaultGetLocalPQExpBuffer(void);
 
@@ -26,6 +27,8 @@ static PQExpBuffer defaultGetLocalPQExpBuffer(void);
 int                    quote_all_identifiers = 0;
 PQExpBuffer (*getLocalPQExpBuffer) (void) = defaultGetLocalPQExpBuffer;
 
+static int     fmtIdEncoding = -1;
+
 
 /*
  * Returns a temporary PQExpBuffer, valid until the next call to the function.
@@ -54,14 +57,48 @@ defaultGetLocalPQExpBuffer(void)
        return id_return;
 }
 
+/*
+ * Set the encoding that fmtId() and fmtQualifiedId() use.
+ *
+ * This is not safe against multiple connections having different encodings,
+ * but there is no real other way to address the need to know the encoding for
+ * fmtId()/fmtQualifiedId() input for safe escaping. Eventually we should get
+ * rid of fmtId().
+ */
+void
+setFmtEncoding(int encoding)
+{
+       fmtIdEncoding = encoding;
+}
+
+/*
+ * Return the currently configured encoding for fmtId() and fmtQualifiedId().
+ */
+static int
+getFmtEncoding(void)
+{
+       if (fmtIdEncoding != -1)
+               return fmtIdEncoding;
+
+       /*
+        * In assertion builds it seems best to fail hard if the encoding was 
not
+        * set, to make it easier to find places with missing calls. But in
+        * production builds that seems like a bad idea, thus we instead just
+        * default to UTF-8.
+        */
+       Assert(fmtIdEncoding != -1);
+
+       return PG_UTF8;
+}
+
 /*
  *     Quotes input string if it's not a legitimate SQL identifier as-is.
  *
- *     Note that the returned string must be used before calling fmtId again,
+ *     Note that the returned string must be used before calling fmtIdEnc 
again,
  *     since we re-use the same return buffer each time.
  */
 const char *
-fmtId(const char *rawid)
+fmtIdEnc(const char *rawid, int encoding)
 {
        PQExpBuffer id_return = getLocalPQExpBuffer();
 
@@ -134,7 +171,24 @@ fmtId(const char *rawid)
 }
 
 /*
- * fmtQualifiedId - construct a schema-qualified name, with quoting as needed.
+ *     Quotes input string if it's not a legitimate SQL identifier as-is.
+ *
+ *     Note that the returned string must be used before calling fmtId again,
+ *     since we re-use the same return buffer each time.
+ *
+ *  NB: This assumes setFmtEncoding() previously has been called to configure
+ *  the encoding of rawid. It is preferable to use fmtIdEnc() with an
+ *  explicit encoding.
+ */
+const char *
+fmtId(const char *rawid)
+{
+       return fmtIdEnc(rawid, getFmtEncoding());
+}
+
+/*
+ * fmtQualifiedIdEnc - construct a schema-qualified name, with quoting as
+ * needed.
  *
  * Like fmtId, use the result before calling again.
  *
@@ -142,7 +196,7 @@ fmtId(const char *rawid)
  * use that buffer until we're finished with calling fmtId().
  */
 const char *
-fmtQualifiedId(const char *schema, const char *id)
+fmtQualifiedIdEnc(const char *schema, const char *id, int encoding)
 {
        PQExpBuffer id_return;
        PQExpBuffer lcl_pqexp = createPQExpBuffer();
@@ -150,9 +204,9 @@ fmtQualifiedId(const char *schema, const char *id)
        /* Some callers might fail to provide a schema name */
        if (schema && *schema)
        {
-               appendPQExpBuffer(lcl_pqexp, "%s.", fmtId(schema));
+               appendPQExpBuffer(lcl_pqexp, "%s.", fmtIdEnc(schema, encoding));
        }
-       appendPQExpBufferStr(lcl_pqexp, fmtId(id));
+       appendPQExpBufferStr(lcl_pqexp, fmtIdEnc(id, encoding));
 
        id_return = getLocalPQExpBuffer();
 
@@ -162,6 +216,25 @@ fmtQualifiedId(const char *schema, const char *id)
        return id_return->data;
 }
 
+/*
+ * fmtQualifiedId - construct a schema-qualified name, with quoting as needed.
+ *
+ * Like fmtId, use the result before calling again.
+ *
+ * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
+ * use that buffer until we're finished with calling fmtId().
+ *
+ * NB: This assumes setFmtEncoding() previously has been called to configure
+ * the encoding of schema/id. It is preferable to use fmtQualifiedIdEnc()
+ * with an explicit encoding.
+ */
+const char *
+fmtQualifiedId(const char *schema, const char *id)
+{
+       return fmtQualifiedIdEnc(schema, id, getFmtEncoding());
+}
+
+
 /*
  * Format a Postgres version number (in the PG_VERSION_NUM integer format
  * returned by PQserverVersion()) as a string.  This exists mainly to
diff --git a/src/include/fe_utils/string_utils.h 
b/src/include/fe_utils/string_utils.h
index e77ff4bcf2..3bd6f26dcd 100644
--- a/src/include/fe_utils/string_utils.h
+++ b/src/include/fe_utils/string_utils.h
@@ -25,7 +25,10 @@ extern PQExpBuffer (*getLocalPQExpBuffer) (void);
 
 /* Functions */
 extern const char *fmtId(const char *identifier);
+extern const char *fmtIdEnc(const char *identifier, int encoding);
 extern const char *fmtQualifiedId(const char *schema, const char *id);
+extern const char *fmtQualifiedIdEnc(const char *schema, const char *id, int 
encoding);
+extern void setFmtEncoding(int encoding);
 
 extern char *formatPGVersionNumber(int version_number, bool include_minor,
                                                                   char *buf, 
size_t buflen);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to