The mbvalidate() function was called on the PGresult strings, and it
changes the strings (removes unknown characters).
I've change the validation function to one that either returns a new
string when needed or NULL when the string already was validating. The
normal case is to have validating strings so in most cases no new strings
are created.
I have not included changes to avoid sending non validating strings to the
server. It's not clear what is the best way to solve it. Maybe one should
just do a stupid fix for 7.4 and validate the strings like above and just
delete non validating strings. It is possible to solve it in a better way,
but it's more complicated. We have 4 different charsets to think about in
psql. The server, the client, the terminal and the message catalog (the
last two usually matches, but not always). I would prefer to find a good
solution instead of just patching one small problem after another.
--
/Dennis
Index: mbprint.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/mbprint.c,v
retrieving revision 1.6
diff -u -r1.6 mbprint.c
--- mbprint.c 18 Mar 2003 22:15:44 -0000 1.6
+++ mbprint.c 21 Jun 2003 14:34:53 -0000
@@ -192,7 +192,7 @@
/* mb_utf_wcwidth : calculate column length for the utf8 string pwcs
*/
static int
-mb_utf_wcswidth(unsigned char *pwcs, size_t len)
+mb_utf_wcswidth(const unsigned char *pwcs, size_t len)
{
int w,
l = 0;
@@ -269,29 +269,42 @@
return -1;
}
+static bool
+mb_utf_is_valid(const unsigned char *pwcs)
+{
+ while (*pwcs)
+ {
+ if (utf_charcheck(pwcs++) < 0)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
static unsigned char *
-mb_utf_validate(unsigned char *pwcs)
+mb_mk_valid_utf_string(const unsigned char *pwcs)
{
+ /* Exact length we need for p is unknown.
+ * All we know is that it's shorter then pwcs.
+ */
int l = 0;
- unsigned char *p = pwcs;
- unsigned char *p0 = pwcs;
+ unsigned char *p = malloc(strlen(pwcs));
+ unsigned char *p0 = p;
+
+ if (!p)
+ {
+ perror("malloc");
+ exit(EXIT_FAILURE);
+ }
while (*pwcs)
{
if ((l = utf_charcheck(pwcs)) > 0)
{
- if (p != pwcs)
- {
- int i;
-
- for (i = 0; i < l; i++)
- *p++ = *pwcs++;
- }
- else
- {
- pwcs += l;
- p += l;
- }
+ int i;
+
+ for (i = 0; i < l; i++)
+ *p++ = *pwcs++;
}
else
{
@@ -299,8 +312,9 @@
pwcs++;
}
}
- if (p != pwcs)
- *p = '\0';
+
+ *p = '\0';
+
return p0;
}
@@ -309,7 +323,7 @@
*/
int
-pg_wcswidth(unsigned char *pwcs, size_t len, int encoding)
+pg_wcswidth(const unsigned char *pwcs, size_t len, int encoding)
{
if (encoding == PG_UTF8)
return mb_utf_wcswidth(pwcs, len);
@@ -323,17 +337,27 @@
}
}
+/*
+ * This function either returns a new allocated string
+ * that is valid or it returns NULL which indicates that
+ * the original string was already valid (the common case).
+ */
unsigned char *
-mbvalidate(unsigned char *pwcs, int encoding)
+mbvalidate(const unsigned char *pwcs, int encoding)
{
if (encoding == PG_UTF8)
- return mb_utf_validate(pwcs);
+ {
+ if (mb_utf_is_valid(pwcs))
+ return NULL;
+ else
+ return mb_mk_valid_utf_string(pwcs);
+ }
else
{
/*
* other encodings needing validation should add their own
* routines here
*/
- return pwcs;
+ return NULL;
}
}
Index: mbprint.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/mbprint.h,v
retrieving revision 1.6
diff -u -r1.6 mbprint.h
--- mbprint.h 18 Mar 2003 22:15:44 -0000 1.6
+++ mbprint.h 21 Jun 2003 14:34:53 -0000
@@ -6,8 +6,8 @@
pg_wchar utf2ucs(const unsigned char *c);
-unsigned char *mbvalidate(unsigned char *pwcs, int encoding);
+unsigned char *mbvalidate(const unsigned char *pwcs, int encoding);
-int pg_wcswidth(unsigned char *pwcs, size_t len, int encoding);
+int pg_wcswidth(const unsigned char *pwcs, size_t len, int
encoding);
#endif /* MBPRINT_H */
Index: print.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/print.c,v
retrieving revision 1.39
diff -u -r1.39 print.c
--- print.c 12 Jun 2003 08:15:28 -0000 1.39
+++ print.c 21 Jun 2003 14:34:53 -0000
@@ -1123,41 +1123,85 @@
{
int nfields;
const char **headers;
+ const char **header_copies;
+ const char **header_copies_ptr;
const char **cells;
+ const char **cell_copies;
+ const char **cell_copies_ptr;
+ const char **ptr;
char **footers;
char *align;
int i;
+ int size;
/* extract headers */
nfields = PQnfields(result);
+ size = nfields + 1;
- headers = calloc(nfields + 1, sizeof(*headers));
+ headers = calloc(size, sizeof(*headers));
if (!headers)
{
perror("calloc");
exit(EXIT_FAILURE);
}
- for (i = 0; i < nfields; i++)
- headers[i] = mbvalidate(PQfname(result, i), opt->topt.encoding);
+ header_copies = malloc(size * sizeof(*header_copies));
+ if (!header_copies)
+ {
+ perror("malloc");
+ exit(EXIT_FAILURE);
+ }
+ header_copies_ptr = header_copies;
- /* set cells */
+ for (i = 0; i < nfields; i++) {
+ unsigned char *str;
+
+ headers[i] = PQfname(result, i);
+
+ str = mbvalidate(headers[i], opt->topt.encoding);
+ if (str)
+ {
+ headers[i] = str;
+ *header_copies_ptr++ = str;
+ }
+ }
- cells = calloc(nfields * PQntuples(result) + 1, sizeof(*cells));
+ /* set cells */
+ size = nfields * PQntuples(result) + 1;
+ cells = calloc(size, sizeof(*cells));
if (!cells)
{
perror("calloc");
exit(EXIT_FAILURE);
}
+ cell_copies = malloc(size * sizeof(*cells));
+ if (!cell_copies)
+ {
+ perror("malloc");
+ exit(EXIT_FAILURE);
+ }
+ cell_copies_ptr = cell_copies;
+
for (i = 0; i < nfields * PQntuples(result); i++)
{
if (PQgetisnull(result, i / nfields, i % nfields))
cells[i] = opt->nullPrint ? opt->nullPrint : "";
else
- cells[i] = mbvalidate(PQgetvalue(result, i / nfields, i %
nfields), opt->topt.encoding);
+ {
+ unsigned char *str;
+
+ cells[i] = PQgetvalue(result, i / nfields, i % nfields);
+
+ str = mbvalidate(cells[i], opt->topt.encoding);
+ if (str)
+ {
+ cells[i] = str;
+ *cell_copies_ptr++ = str;
+ }
+ }
}
/* set footers */
@@ -1215,8 +1259,16 @@
footers ? (const char *const *) footers : (const char
*const *) (opt->footers),
align, &opt->topt, fout);
+ for (ptr=header_copies; ptr != header_copies_ptr; ptr++)
+ free ((void *)*ptr);
+
+ for (ptr=cell_copies; ptr != cell_copies_ptr; ptr++)
+ free ((void *)*ptr);
+
free((void *) headers);
+ free((void *) header_copies);
free((void *) cells);
+ free((void *) cell_copies);
if (footers)
{
free(footers[0]);
---------------------------(end of broadcast)---------------------------
TIP 7: don't forget to increase your free space map settings