The mbvalidate() function was called on the PGresult strings, and it 
changes the strings (removes unknown characters).

I've change the validation function to one that either returns a new
string when needed or NULL when the string already was validating. The
normal case is to have validating strings so in most cases no new strings
are created.

I have not included changes to avoid sending non validating strings to the
server. It's not clear what is the best way to solve it. Maybe one should
just do a stupid fix for 7.4 and validate the strings like above and just
delete non validating strings. It is possible to solve it in a better way,
but it's more complicated. We have 4 different charsets to think about in
psql. The server, the client, the terminal and the message catalog (the
last two usually matches, but not always). I would prefer to find a good
solution instead of just patching one small problem after another.

-- 
/Dennis
Index: mbprint.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/mbprint.c,v
retrieving revision 1.6
diff -u -r1.6 mbprint.c
--- mbprint.c   18 Mar 2003 22:15:44 -0000      1.6
+++ mbprint.c   21 Jun 2003 14:34:53 -0000
@@ -192,7 +192,7 @@
 /* mb_utf_wcwidth : calculate column length for the utf8 string pwcs
  */
 static int
-mb_utf_wcswidth(unsigned char *pwcs, size_t len)
+mb_utf_wcswidth(const unsigned char *pwcs, size_t len)
 {
        int                     w,
                                l = 0;
@@ -269,29 +269,42 @@
        return -1;
 }
 
+static bool
+mb_utf_is_valid(const unsigned char *pwcs)
+{
+       while (*pwcs)
+       {
+               if (utf_charcheck(pwcs++) < 0)
+                       return FALSE;
+       }
+
+       return TRUE;
+}
+
 static unsigned char *
-mb_utf_validate(unsigned char *pwcs)
+mb_mk_valid_utf_string(const unsigned char *pwcs)
 {
+       /* Exact length we need for p is unknown.
+        * All we know is that it's shorter then pwcs.
+        */
        int                     l = 0;
-       unsigned char *p = pwcs;
-       unsigned char *p0 = pwcs;
+       unsigned char *p = malloc(strlen(pwcs));
+       unsigned char *p0 = p;
+
+       if (!p)
+       {
+               perror("malloc");
+               exit(EXIT_FAILURE);
+       }
 
        while (*pwcs)
        {
                if ((l = utf_charcheck(pwcs)) > 0)
                {
-                       if (p != pwcs)
-                       {
-                               int                     i;
-
-                               for (i = 0; i < l; i++)
-                                       *p++ = *pwcs++;
-                       }
-                       else
-                       {
-                               pwcs += l;
-                               p += l;
-                       }
+                       int                     i;
+
+                       for (i = 0; i < l; i++)
+                               *p++ = *pwcs++;
                }
                else
                {
@@ -299,8 +312,9 @@
                        pwcs++;
                }
        }
-       if (p != pwcs)
-               *p = '\0';
+
+       *p = '\0';
+
        return p0;
 }
 
@@ -309,7 +323,7 @@
  */
 
 int
-pg_wcswidth(unsigned char *pwcs, size_t len, int encoding)
+pg_wcswidth(const unsigned char *pwcs, size_t len, int encoding)
 {
        if (encoding == PG_UTF8)
                return mb_utf_wcswidth(pwcs, len);
@@ -323,17 +337,27 @@
        }
 }
 
+/*
+ * This function either returns a new allocated string
+ * that is valid or it returns NULL which indicates that
+ * the original string was already valid (the common case).
+ */
 unsigned char *
-mbvalidate(unsigned char *pwcs, int encoding)
+mbvalidate(const unsigned char *pwcs, int encoding)
 {
        if (encoding == PG_UTF8)
-               return mb_utf_validate(pwcs);
+       {
+               if (mb_utf_is_valid(pwcs))
+                       return NULL;
+               else
+                       return mb_mk_valid_utf_string(pwcs);
+       }
        else
        {
                /*
                 * other encodings needing validation should add their own
                 * routines here
                 */
-               return pwcs;
+               return NULL;
        }
 }
Index: mbprint.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/mbprint.h,v
retrieving revision 1.6
diff -u -r1.6 mbprint.h
--- mbprint.h   18 Mar 2003 22:15:44 -0000      1.6
+++ mbprint.h   21 Jun 2003 14:34:53 -0000
@@ -6,8 +6,8 @@
 
 pg_wchar       utf2ucs(const unsigned char *c);
 
-unsigned char *mbvalidate(unsigned char *pwcs, int encoding);
+unsigned char *mbvalidate(const unsigned char *pwcs, int encoding);
 
-int                    pg_wcswidth(unsigned char *pwcs, size_t len, int encoding);
+int                    pg_wcswidth(const unsigned char *pwcs, size_t len, int 
encoding);
 
 #endif   /* MBPRINT_H */
Index: print.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/print.c,v
retrieving revision 1.39
diff -u -r1.39 print.c
--- print.c     12 Jun 2003 08:15:28 -0000      1.39
+++ print.c     21 Jun 2003 14:34:53 -0000
@@ -1123,41 +1123,85 @@
 {
        int                     nfields;
        const char **headers;
+       const char **header_copies;
+       const char **header_copies_ptr;
        const char **cells;
+       const char **cell_copies;
+       const char **cell_copies_ptr;
+       const char **ptr;
        char      **footers;
        char       *align;
        int                     i;
+       int                     size;
 
 
        /* extract headers */
 
        nfields = PQnfields(result);
+       size = nfields + 1;
 
-       headers = calloc(nfields + 1, sizeof(*headers));
+       headers = calloc(size, sizeof(*headers));
        if (!headers)
        {
                perror("calloc");
                exit(EXIT_FAILURE);
        }
 
-       for (i = 0; i < nfields; i++)
-               headers[i] = mbvalidate(PQfname(result, i), opt->topt.encoding);
+       header_copies = malloc(size * sizeof(*header_copies));
+       if (!header_copies)
+       {
+               perror("malloc");
+               exit(EXIT_FAILURE);
+       }
+       header_copies_ptr = header_copies;
 
-       /* set cells */
+       for (i = 0; i < nfields; i++) {
+               unsigned char *str;
+
+               headers[i] = PQfname(result, i);
+
+               str = mbvalidate(headers[i], opt->topt.encoding);
+               if (str)
+               {
+                       headers[i] = str;
+                       *header_copies_ptr++ = str;
+               }
+       }
 
-       cells = calloc(nfields * PQntuples(result) + 1, sizeof(*cells));
+       /* set cells */
+       size = nfields * PQntuples(result) + 1;
+       cells = calloc(size, sizeof(*cells));
        if (!cells)
        {
                perror("calloc");
                exit(EXIT_FAILURE);
        }
 
+       cell_copies = malloc(size * sizeof(*cells));
+       if (!cell_copies)
+       {
+               perror("malloc");
+               exit(EXIT_FAILURE);
+       }
+       cell_copies_ptr = cell_copies;
+
        for (i = 0; i < nfields * PQntuples(result); i++)
        {
                if (PQgetisnull(result, i / nfields, i % nfields))
                        cells[i] = opt->nullPrint ? opt->nullPrint : "";
                else
-                       cells[i] = mbvalidate(PQgetvalue(result, i / nfields, i % 
nfields), opt->topt.encoding);
+               {
+                       unsigned char *str;
+
+                       cells[i] = PQgetvalue(result, i / nfields, i % nfields);
+
+                       str = mbvalidate(cells[i], opt->topt.encoding);
+                       if (str)
+                       {
+                               cells[i] = str;
+                               *cell_copies_ptr++ = str;
+                       }
+               }                       
        }
 
        /* set footers */
@@ -1215,8 +1259,16 @@
                           footers ? (const char *const *) footers : (const char 
*const *) (opt->footers),
                           align, &opt->topt, fout);
 
+       for (ptr=header_copies; ptr != header_copies_ptr; ptr++)
+               free ((void *)*ptr);
+
+       for (ptr=cell_copies; ptr != cell_copies_ptr; ptr++)
+               free ((void *)*ptr);
+
        free((void *) headers);
+       free((void *) header_copies);
        free((void *) cells);
+       free((void *) cell_copies);
        if (footers)
        {
                free(footers[0]);
---------------------------(end of broadcast)---------------------------
TIP 7: don't forget to increase your free space map settings

Reply via email to