Hello! bytea-test.cxx -- contains detailed description of the bug and test plan and code.
varlena.c.diff -- patch for PostgreSQL 7.5devel src/backend/utils/adt/varlena.c#byteaout fe-exec.c.diff -- patch (optional) for PostgreSQL 7.5devel src/interface/libpq/fe-exec.c#PQescapeBytea test.data -- contains test data :-)) -- Sergey N. Yatskevich <[EMAIL PROTECTED]> GosNIIAS
/**
* @file
* @brief This test show bug in byteaout PostgreSQL code
* @author Sergey N. Yatskevich
*
* If we have a different client and server encodings and client and server locales are
* not C(ASCII) then path of bytea string for client->server transfer is:
* -# encode binary data on client with PQescapeBytea (encode in \\ooo form all
symbols with
* code == 0x0 && code >= 0x80)
* -# send encoded data to the server
* -# decode recieved characters with pg_client_to_server in
* src/backend/libpq/pqformat.c#pq_getmsgstring
* -# decode bytea C-string with byteain
* .
* and path for server->client transfer is:
* -# encode bytea into C-string with byteaout (encode in \\ooo form all symbols with
* !isprint(symbol code) for current server locale)
* -# encode query result characters with pq_server_to_client in
* src/backend/libpq/pqformat.c#pq_sendcountedtext
* -# send encoded data to the client
* -# decode recieved data with PQunescapeBytea
*
* If we have the next client-server configuration:
*
@verbatim
+---------------------+
| KOI8 (ru_RU.KOI8-R) |
| |
| Server |
+--+----------------+-+
^ |
| |
| v
+----------+----------+ +------------------+
| KOI8 (ru_RU.KOI8-R) | | WIN (CP1251) |
| | | |
| Client (Linux) | | Client (Windows) |
+---------------------+ +------------------+
@endverbatim
*
* then, for example, symbol RUSSIAN_A (code 255) from Linux-client will be translated
* in database into the symbol with code 255, and then for Windows client --- into the
* symbol with code 192, that is wrong for bytea data type !!!!
*
* In case when database has UNICODE encoding some parts of bytea strings from server
will
* not be even send to the client, because some symbol chains in current server locale
don't
* present valid utf8 sequence and can't be translated to client encoding properly.
*
* Simplest way to solve this problem is to replace isprint check in byteaout and
* >= 0x80 check in PQescapeBytea procedures with isascii && isprint checks, because
* ASCII symbols for all locales (and database encodings) have the same byte codes.
*
* Or you may do in byteaout the same symbol check as in PQescapeBytea (encode in \\ooo
* form all symbols with code >= 0x80).
*
* But I prefer first way (with isascii && isprint check in both byteaout and
PQesacpeBytea),
* because it produce nice printable and editable ASCII dump output for debug :-)).
*
* Test steps:
* -# compile program with command (for gcc 3.2.3):
* <tt>g++ -Wall -pedantic -I`pg_config --includedir` bytea-test.cxx -o
bytea-test -lpq</tt>
* -# init database cluster with non C(ASCII) locale (for example ru_RU.KOI8-R)
* -# create test database with non SQL_ASCII encoding (for example KOI8)
* -# create test table in this database with command: <tt>CREATE TABLE bytea_test
(data BYTEA);</tt>
* -# run test with command: <tt>./bytea-test test.data</tt>
* -# try the three prevous steps with UNICODE database
*
* Then apply patches (at least varlena.c.diff) and run test again. All must be done
* successfully.
*
* @note @c bytea_test table must be available for deleting, inserting and selecting
*
* @note Attatched test file (test.data) contains sequence of all 256 8-bit symbols.
*
* @bug I am very sorry for my bad english, but I hope you understand me :-))
*/
#include <iostream>
#include <fstream>
#include <iterator>
#include <vector>
#include <libpq-fe.h>
using namespace std;
int
main (int _argc, char **_argv) {
// Check arguments count
if (_argc != 3) {
cerr << "Usage: " << _argv[0] << " <database_name> <test_file>\n";
return 1;
}
// Set up the database connection
PGconn *conn = PQsetdb (NULL, NULL, NULL, NULL, _argv[1]);
if (PQstatus (conn) == CONNECTION_BAD) {
cerr << "Can't connect to database " << _argv[1] << " (" <<
PQerrorMessage (conn) << ")\n";
PQfinish (conn);
return 1;
}
// First client encoding
PQsetClientEncoding (conn, "KOI8");
if (PQstatus (conn) == CONNECTION_BAD) {
cerr << "Can't set client encoding for database " << _argv[1] << " ("
<< PQerrorMessage (conn) << ")\n";
PQfinish (conn);
return 1;
}
// Open binary file stream
ifstream is (_argv[2], ios::binary);
if (!is.good ()) {
cerr << "Can't open test file (" << _argv[2] << ")\n";
PQfinish (conn);
return 1;
}
// Load binary file into memory
vector<u_int8_t> bin;
copy (istream_iterator<u_int8_t> (is), istream_iterator<u_int8_t> (),
back_inserter (bin));
// Clean test table
PGresult *res = PQexec (conn, "DELETE FROM bytea_test");
if (PQresultStatus (res) != PGRES_COMMAND_OK) {
PQclear (res);
cerr << "Can't create test table (" << PQresultErrorMessage (res) <<
")\n";
PQfinish (conn);
return 1;
}
PQclear (res);
// Convert bin array into escaped string
size_t escaped_bin_len = 0;
unsigned char *escaped_bin = PQescapeBytea (&bin.front (), bin.size (),
&escaped_bin_len);
cout << "\nSend to server: " << escaped_bin << endl << flush;
// Construct insert query
string insert_q = string ("INSERT INTO bytea_test VALUES ('") +
string (reinterpret_cast<char *> (escaped_bin),
escaped_bin_len - 1) +
string ("')");
// Free no more need memory
free (escaped_bin);
// Insert bytea data into database
res = PQexec (conn, insert_q.data ());
if (PQresultStatus (res) != PGRES_COMMAND_OK) {
PQclear (res);
cerr << "Can't insert data into test table (" << PQresultErrorMessage
(res) << ")\n";
PQfinish (conn);
return 1;
}
PQclear (res);
// Second client encoding
PQsetClientEncoding (conn, "WIN");
if (PQstatus (conn) == CONNECTION_BAD) {
cerr << "Can't set client encoding for database " << _argv[1] << " ("
<< PQerrorMessage (conn) << ")\n";
PQfinish (conn);
return 1;
}
// Get back bin array from database
res = PQexec (conn, "SELECT data FROM bytea_test");
if ((PQresultStatus (res) != PGRES_TUPLES_OK) || (PQntuples (res) == 0)) {
PQclear (res);
cerr << "Can't get data from test table (" << PQresultErrorMessage
(res) << ")\n";
PQfinish (conn);
return 1;
}
cout << "\nRecieve from server: " << PQgetvalue (res, 0, 0) << endl << flush;
// Convert result into binary form
size_t unescaped_bin_len = 0;
unsigned char *unescaped_bin = PQunescapeBytea (reinterpret_cast<unsigned char
*> (PQgetvalue (res, 0, 0)), &unescaped_bin_len);
// Construct binary array
vector<u_int8_t> bin2;
copy (unescaped_bin, unescaped_bin + unescaped_bin_len, back_inserter (bin2));
// Free no more need memory
free (unescaped_bin);
// Clear result
PQclear (res);
// Close connection
PQfinish (conn);
// Compare binary arrays size
if (bin.size () != bin2.size ()) {
cerr << "ERROR: Binary arrays have different size\n";
return 1;
}
// Compare binary arrays data
for (size_t i = 0; i < bin.size (); i++) {
if (bin[i] != bin2[i]) {
cerr << "ERROR: Binary arrays have different content in [" <<
i << "] "
<< (unsigned int)bin[i] << " != " << (unsigned
int)bin2[i] << endl;
return 1;
}
}
//////////////////////////////////////
cout << "\nTest successfully done.\n";
//////////////////////////////////////
return 0;
}
test.data
Description: Binary data
--- varlena.c 2003-09-26 02:54:52 +0400
+++ varlena.new.c 2003-11-18 18:44:58 +0300
@@ -186,7 +186,7 @@
{
if (*vp == '\\')
len += 2;
- else if (isprint((unsigned char) *vp))
+ else if (isprint((unsigned char) *vp) && isascii((unsigned char) *vp))
len++;
else
len += 4;
@@ -200,7 +200,7 @@
*rp++ = '\\';
*rp++ = '\\';
}
- else if (isprint((unsigned char) *vp))
+ else if (isprint((unsigned char) *vp) && isascii((unsigned char) *vp))
*rp++ = *vp;
else
{
--- fe-exec.c 2003-11-02 23:42:41 +0300
+++ fe-exec.new.c 2003-11-18 18:52:43 +0300
@@ -2258,10 +2258,9 @@
* INSERT statement with a bytea type column as the target.
*
* The following transformations are applied
- * '\0' == ASCII 0 == \\000
* '\'' == ASCII 39 == \'
* '\\' == ASCII 92 == \\\\
- * anything >= 0x80 ---> \\ooo (where ooo is an octal expression)
+ * anything !isprint || !isascii ---> \\ooo (where ooo is an octal expression)
*/
unsigned char *
PQescapeBytea(const unsigned char *bintext, size_t binlen, size_t *bytealen)
@@ -2280,7 +2279,7 @@
vp = bintext;
for (i = binlen; i > 0; i--, vp++)
{
- if (*vp == 0 || *vp >= 0x80)
+ if (!isprint(*vp) || !isascii(*vp))
len += 5; /* '5' is for '\\ooo' */
else if (*vp == '\'')
len += 2;
@@ -2299,7 +2298,7 @@
for (i = binlen; i > 0; i--, vp++)
{
- if (*vp == 0 || *vp >= 0x80)
+ if (!isprint(*vp) || !isascii(*vp))
{
(void) sprintf(rp, "\\\\%03o", *vp);
rp += 5;
---------------------------(end of broadcast)---------------------------
TIP 3: if posting/reading through Usenet, please send an appropriate
subscribe-nomail command to [EMAIL PROTECTED] so that your
message can get through to the mailing list cleanly
