I wrote:
> Not easily: there could be newlines embedded in data strings or SQL
> identifiers.  I'm not seeing any way around this except to restore the
> minimal lexing capability.  One thing we could probably do is to
> restrict it to be used only when reading table data, and continue to
> assume that object creation commands can be emitted as-is.  That would
> at least get us out of needing to parse dollar-quoted literals, which
> aren't used in the INSERT commands.

Attached is a patch that restores a much-simplified version of the mini
lexer; it deals only with the sorts of things that dumpTableData_insert
actually emits.  Fixing the standard_conforming_strings issue turns out
to be really a one-liner, because the setting is in fact readily
available to this code.  Maybe we should have done it that way to begin
with :-( ... though I admit to being glad to have gotten rid of the very
questionable dollar-quote-recognition code that was there before.

Barring objections to the approach, I'll apply and back-patch this
tomorrow.

                        regards, tom lane

diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index d9edebb0f48308787bf263cd90504d3c5ed80e8b..234e50fb734573f9f3287d952821702a0838c4fd 100644
*** a/src/bin/pg_dump/pg_backup_archiver.c
--- b/src/bin/pg_dump/pg_backup_archiver.c
*************** restore_toc_entry(ArchiveHandle *AH, Toc
*** 620,639 ****
  					if (te->copyStmt && strlen(te->copyStmt) > 0)
  					{
  						ahprintf(AH, "%s", te->copyStmt);
! 						AH->writingCopyData = true;
  					}
  
  					(*AH->PrintTocDataPtr) (AH, te, ropt);
  
  					/*
  					 * Terminate COPY if needed.
  					 */
! 					if (AH->writingCopyData)
! 					{
! 						if (RestoringToDB(AH))
! 							EndDBCopyMode(AH, te);
! 						AH->writingCopyData = false;
! 					}
  
  					/* close out the transaction started above */
  					if (is_parallel && te->created)
--- 620,639 ----
  					if (te->copyStmt && strlen(te->copyStmt) > 0)
  					{
  						ahprintf(AH, "%s", te->copyStmt);
! 						AH->outputKind = OUTPUT_COPYDATA;
  					}
+ 					else
+ 						AH->outputKind = OUTPUT_OTHERDATA;
  
  					(*AH->PrintTocDataPtr) (AH, te, ropt);
  
  					/*
  					 * Terminate COPY if needed.
  					 */
! 					if (AH->outputKind == OUTPUT_COPYDATA &&
! 						RestoringToDB(AH))
! 						EndDBCopyMode(AH, te);
! 					AH->outputKind = OUTPUT_SQLCMDS;
  
  					/* close out the transaction started above */
  					if (is_parallel && te->created)
*************** _allocAH(const char *FileSpec, const Arc
*** 1975,1980 ****
--- 1975,1982 ----
  	AH->mode = mode;
  	AH->compression = compression;
  
+ 	memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
+ 
  	/* Open stdout with no compression for AH output handle */
  	AH->gzOut = 0;
  	AH->OF = stdout;
*************** CloneArchive(ArchiveHandle *AH)
*** 4194,4200 ****
  	clone = (ArchiveHandle *) pg_malloc(sizeof(ArchiveHandle));
  	memcpy(clone, AH, sizeof(ArchiveHandle));
  
! 	/* Handle format-independent fields ... none at the moment */
  
  	/* The clone will have its own connection, so disregard connection state */
  	clone->connection = NULL;
--- 4196,4203 ----
  	clone = (ArchiveHandle *) pg_malloc(sizeof(ArchiveHandle));
  	memcpy(clone, AH, sizeof(ArchiveHandle));
  
! 	/* Handle format-independent fields */
! 	memset(&(clone->sqlparse), 0, sizeof(clone->sqlparse));
  
  	/* The clone will have its own connection, so disregard connection state */
  	clone->connection = NULL;
*************** DeCloneArchive(ArchiveHandle *AH)
*** 4227,4233 ****
  	/* Clear format-specific state */
  	(AH->DeClonePtr) (AH);
  
! 	/* Clear state allocated by CloneArchive ... none at the moment */
  
  	/* Clear any connection-local state */
  	if (AH->currUser)
--- 4230,4238 ----
  	/* Clear format-specific state */
  	(AH->DeClonePtr) (AH);
  
! 	/* Clear state allocated by CloneArchive */
! 	if (AH->sqlparse.curCmd)
! 		destroyPQExpBuffer(AH->sqlparse.curCmd);
  
  	/* Clear any connection-local state */
  	if (AH->currUser)
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 7a4fd360737ab3c48fdd8776879f804e0a1062a4..6dd5158ab4d9f7ee815e555029839190f335d9a1 100644
*** a/src/bin/pg_dump/pg_backup_archiver.h
--- b/src/bin/pg_dump/pg_backup_archiver.h
*************** typedef size_t (*CustomOutPtr) (struct _
*** 134,139 ****
--- 134,153 ----
  
  typedef enum
  {
+ 	SQL_SCAN = 0,				/* normal */
+ 	SQL_IN_SINGLE_QUOTE,		/* '...' literal */
+ 	SQL_IN_DOUBLE_QUOTE			/* "..." identifier */
+ } sqlparseState;
+ 
+ typedef struct
+ {
+ 	sqlparseState state;		/* see above */
+ 	bool		backSlash;		/* next char is backslash quoted? */
+ 	PQExpBuffer curCmd;			/* incomplete line (NULL if not created) */
+ } sqlparseInfo;
+ 
+ typedef enum
+ {
  	STAGE_NONE = 0,
  	STAGE_INITIALIZING,
  	STAGE_PROCESSING,
*************** typedef enum
*** 142,147 ****
--- 156,168 ----
  
  typedef enum
  {
+ 	OUTPUT_SQLCMDS = 0,			/* emitting general SQL commands */
+ 	OUTPUT_COPYDATA,			/* writing COPY data */
+ 	OUTPUT_OTHERDATA			/* writing data as INSERT commands */
+ } ArchiverOutput;
+ 
+ typedef enum
+ {
  	REQ_SCHEMA = 1,
  	REQ_DATA = 2,
  	REQ_ALL = REQ_SCHEMA + REQ_DATA
*************** typedef struct _archiveHandle
*** 167,172 ****
--- 188,195 ----
  								 * Added V1.7 */
  	ArchiveFormat format;		/* Archive format */
  
+ 	sqlparseInfo sqlparse;		/* state for parsing INSERT data */
+ 
  	time_t		createDate;		/* Date archive created */
  
  	/*
*************** typedef struct _archiveHandle
*** 217,223 ****
  	PGconn	   *connection;
  	int			connectToDB;	/* Flag to indicate if direct DB connection is
  								 * required */
! 	bool		writingCopyData;	/* True when we are sending COPY data */
  	bool		pgCopyIn;		/* Currently in libpq 'COPY IN' mode. */
  
  	int			loFd;			/* BLOB fd */
--- 240,246 ----
  	PGconn	   *connection;
  	int			connectToDB;	/* Flag to indicate if direct DB connection is
  								 * required */
! 	ArchiverOutput outputKind;	/* Flag for what we're currently writing */
  	bool		pgCopyIn;		/* Currently in libpq 'COPY IN' mode. */
  
  	int			loFd;			/* BLOB fd */
diff --git a/src/bin/pg_dump/pg_backup_db.c b/src/bin/pg_dump/pg_backup_db.c
index bd1b8efac8285b4df741b4b814b889a0f6c1d124..62c8b3356c6f5e8c164ba4bc36dcbb16072d5ca2 100644
*** a/src/bin/pg_dump/pg_backup_db.c
--- b/src/bin/pg_dump/pg_backup_db.c
*************** ExecuteSqlCommand(ArchiveHandle *AH, con
*** 365,378 ****
  
  
  /*
   * Implement ahwrite() for direct-to-DB restore
   */
  int
  ExecuteSqlCommandBuf(ArchiveHandle *AH, const char *buf, size_t bufLen)
  {
! 	if (AH->writingCopyData)
  	{
  		/*
  		 * We drop the data on the floor if libpq has failed to enter COPY
  		 * mode; this allows us to behave reasonably when trying to continue
  		 * after an error in a COPY command.
--- 365,456 ----
  
  
  /*
+  * Process non-COPY table data (that is, INSERT commands).
+  *
+  * The commands have been run together as one long string for compressibility,
+  * and we are receiving them in bufferloads with arbitrary boundaries, so we
+  * have to locate command boundaries and save partial commands across calls.
+  * All state must be kept in AH->sqlparse, not in local variables of this
+  * routine.  We assume that AH->sqlparse was filled with zeroes when created.
+  *
+  * We have to lex the data to the extent of identifying literals and quoted
+  * identifiers, so that we can recognize statement-terminating semicolons.
+  * We assume that INSERT data will not contain SQL comments, E'' literals,
+  * or dollar-quoted strings, so this is much simpler than a full SQL lexer.
+  */
+ static void
+ ExecuteInsertCommands(ArchiveHandle *AH, const char *buf, size_t bufLen)
+ {
+ 	const char *qry = buf;
+ 	const char *eos = buf + bufLen;
+ 
+ 	/* initialize command buffer if first time through */
+ 	if (AH->sqlparse.curCmd == NULL)
+ 		AH->sqlparse.curCmd = createPQExpBuffer();
+ 
+ 	for (; qry < eos; qry++)
+ 	{
+ 		char	ch = *qry;
+ 
+ 		/* For neatness, we skip any newlines between commands */
+ 		if (!(ch == '\n' && AH->sqlparse.curCmd->len == 0))
+ 			appendPQExpBufferChar(AH->sqlparse.curCmd, ch);
+ 
+ 		switch (AH->sqlparse.state)
+ 		{
+ 			case SQL_SCAN:		/* Default state == 0, set in _allocAH */
+ 				if (ch == ';')
+ 				{
+ 					/*
+ 					 * We've found the end of a statement. Send it and reset
+ 					 * the buffer.
+ 					 */
+ 					ExecuteSqlCommand(AH, AH->sqlparse.curCmd->data,
+ 									  "could not execute query");
+ 					resetPQExpBuffer(AH->sqlparse.curCmd);
+ 				}
+ 				else if (ch == '\'')
+ 				{
+ 					AH->sqlparse.state = SQL_IN_SINGLE_QUOTE;
+ 					AH->sqlparse.backSlash = false;
+ 				}
+ 				else if (ch == '"')
+ 				{
+ 					AH->sqlparse.state = SQL_IN_DOUBLE_QUOTE;
+ 				}
+ 				break;
+ 
+ 			case SQL_IN_SINGLE_QUOTE:
+ 				/* We needn't handle '' specially */
+ 				if (ch == '\'' && !AH->sqlparse.backSlash)
+ 					AH->sqlparse.state = SQL_SCAN;
+ 				else if (ch == '\\' && !AH->public.std_strings)
+ 					AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
+ 				else
+ 					AH->sqlparse.backSlash = false;
+ 				break;
+ 
+ 			case SQL_IN_DOUBLE_QUOTE:
+ 				/* We needn't handle "" specially */
+ 				if (ch == '"')
+ 					AH->sqlparse.state = SQL_SCAN;
+ 				break;
+ 		}
+ 	}
+ }
+ 
+ 
+ /*
   * Implement ahwrite() for direct-to-DB restore
   */
  int
  ExecuteSqlCommandBuf(ArchiveHandle *AH, const char *buf, size_t bufLen)
  {
! 	if (AH->outputKind == OUTPUT_COPYDATA)
  	{
  		/*
+ 		 * COPY data.
+ 		 *
  		 * We drop the data on the floor if libpq has failed to enter COPY
  		 * mode; this allows us to behave reasonably when trying to continue
  		 * after an error in a COPY command.
*************** ExecuteSqlCommandBuf(ArchiveHandle *AH, 
*** 382,390 ****
--- 460,478 ----
  			die_horribly(AH, modulename, "error returned by PQputCopyData: %s",
  						 PQerrorMessage(AH->connection));
  	}
+ 	else if (AH->outputKind == OUTPUT_OTHERDATA)
+ 	{
+ 		/*
+ 		 * Table data expressed as INSERT commands.
+ 		 */
+ 		ExecuteInsertCommands(AH, buf, bufLen);
+ 	}
  	else
  	{
  		/*
+ 		 * General SQL commands; we assume that commands will not be split
+ 		 * across calls.
+ 		 *
  		 * In most cases the data passed to us will be a null-terminated
  		 * string, but if it's not, we have to add a trailing null.
  		 */
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 8db4071684780ca5c34f3089fbb02564e220f27e..d1598ea4e98f8db05a39b0de42c9bc3465bb8353 100644
*** a/src/bin/pg_dump/pg_dump.c
--- b/src/bin/pg_dump/pg_dump.c
*************** dumpTableData_copy(Archive *fout, void *
*** 1399,1404 ****
--- 1399,1412 ----
  	return 1;
  }
  
+ /*
+  * Dump table data using INSERT commands.
+  *
+  * Caution: when we restore from an archive file direct to database, the
+  * INSERT commands emitted by this function have to be parsed by
+  * pg_backup_db.c's ExecuteInsertCommands(), which will not handle comments,
+  * E'' strings, or dollar-quoted strings.  So don't emit anything like that.
+  */
  static int
  dumpTableData_insert(Archive *fout, void *dcontext)
  {
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to