Re: [PATCH] Add CANONICAL option to xmlserialize

Jim Jones Fri, 09 Feb 2024 05:19:42 -0800

On 05.10.23 09:38, Jim Jones wrote:
>
> v8 attached changes de default behaviour to WITH COMMENTS.
v9 attached with rebase due to changes done to primnodes.h in 615f5f6


-- 
Jim

From fe51a1826b75b778c21f559236b23d340a10d703 Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jo...@uni-muenster.de>
Date: Fri, 9 Feb 2024 13:51:44 +0100
Subject: [PATCH v9] Add CANONICAL output format to xmlserialize

This patch introduces the CANONICAL option to xmlserialize, which
serializes xml documents in their canonical form - as described in
the W3C Canonical XML Version 1.1 specification. This option can
be used with the additional parameter WITH [NO] COMMENTS to keep
or remove xml comments from the canonical xml output. In case no
parameter is provided, WITH COMMENTS will be used as default. This
feature is based on the function xmlC14NDocDumpMemory from the C14N
module of libxml2.

This patch also includes regression tests and documentation.
---
 doc/src/sgml/datatype.sgml            |  41 +++-
 src/backend/executor/execExprInterp.c |   2 +-
 src/backend/parser/gram.y             |  21 +-
 src/backend/parser/parse_expr.c       |   2 +-
 src/backend/utils/adt/xml.c           | 272 ++++++++++++++++----------
 src/include/nodes/parsenodes.h        |   1 +
 src/include/nodes/primnodes.h         |  11 ++
 src/include/parser/kwlist.h           |   1 +
 src/include/utils/xml.h               |   2 +-
 src/test/regress/expected/xml.out     | 114 +++++++++++
 src/test/regress/expected/xml_1.out   | 108 ++++++++++
 src/test/regress/expected/xml_2.out   | 114 +++++++++++
 src/test/regress/sql/xml.sql          |  63 ++++++
 src/tools/pgindent/typedefs.list      |   1 +
 14 files changed, 643 insertions(+), 110 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 66510ee031..ec2f1137c8 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4472,7 +4472,7 @@ xml '<foo>bar</foo>'
     <type>xml</type>, uses the function
     <function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
 <synopsis>
-XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ [ NO ] INDENT ] )
+XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ { [ NO ] INDENT ] | CANONICAL [ WITH [NO] COMMENTS ]})
 </synopsis>
     <replaceable>type</replaceable> can be
     <type>character</type>, <type>character varying</type>, or
@@ -4489,6 +4489,45 @@ XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <repla
     type likewise produces the original string.
    </para>
 
+   <para>
+    The option <type>CANONICAL</type> converts a given
+    XML document to its <ulink url="https://www.w3.org/TR/xml-c14n11/#Terminology";>canonical form</ulink>
+    based on the <ulink url="https://www.w3.org/TR/xml-c14n11/";>W3C Canonical XML 1.1 Specification</ulink>.
+    It is basically designed to provide applications the ability to compare xml documents or test if they
+    have been changed. The optional parameters <type>WITH COMMENTS</type> (which is the default) or
+    <type>WITH NO COMMENTS</type>, respectively, keep or remove XML comments from the given document.
+    </para>
+
+     <para>
+     Example:
+
+<screen><![CDATA[
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL);
+                                xmlserialize
+-----------------------------------------------------------------------------
+ <foo><!-- a comment --><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+SELECT
+  xmlserialize(DOCUMENT
+    '<foo>
+       <!-- a comment -->
+       <bar c="3" b="2" a="1">42</bar>
+       <empty/>
+     </foo>'::xml AS text CANONICAL WITH NO COMMENTS);
+                       xmlserialize
+-----------------------------------------------------------
+ <foo><bar a="1" b="2" c="3">42</bar><empty></empty></foo>
+(1 row)
+
+]]></screen>
+   </para>
    <para>
     When a character string value is cast to or from type
     <type>xml</type> without going through <type>XMLPARSE</type> or
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 3f20f1dd31..315ee53274 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -4019,7 +4019,7 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
 				*op->resvalue =
 					PointerGetDatum(xmltotext_with_options(DatumGetXmlP(value),
 														   xexpr->xmloption,
-														   xexpr->indent));
+														   xexpr->format));
 				*op->resnull = false;
 			}
 			break;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 130f7fc7c3..f54858dfe9 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -616,12 +616,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <node>	xml_root_version opt_xml_root_standalone
 %type <node>	xmlexists_argument
 %type <ival>	document_or_content
-%type <boolean>	xml_indent_option xml_whitespace_option
+%type <boolean>	xml_whitespace_option
 %type <list>	xmltable_column_list xmltable_column_option_list
 %type <node>	xmltable_column_el
 %type <defelt>	xmltable_column_option_el
 %type <list>	xml_namespace_list
 %type <target>	xml_namespace_el
+%type <ival> 	opt_xml_serialize_format
 
 %type <node>	func_application func_expr_common_subexpr
 %type <node>	func_expr func_expr_windowless
@@ -693,7 +694,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
 	BOOLEAN_P BOTH BREADTH BY
 
-	CACHE CALL CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
+	CACHE CALL CALLED CANONICAL CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLLATION COLUMN COLUMNS COMMENT COMMENTS COMMIT
 	COMMITTED COMPRESSION CONCURRENTLY CONFIGURATION CONFLICT
@@ -15677,14 +15678,14 @@ func_expr_common_subexpr:
 					$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
 									 list_make3($3, $5, $6), @1);
 				}
-			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename xml_indent_option ')'
+			| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename opt_xml_serialize_format ')'
 				{
 					XmlSerialize *n = makeNode(XmlSerialize);
 
 					n->xmloption = $3;
 					n->expr = $4;
 					n->typeName = $6;
-					n->indent = $7;
+					n->format = $7;
 					n->location = @1;
 					$$ = (Node *) n;
 				}
@@ -15840,9 +15841,13 @@ document_or_content: DOCUMENT_P						{ $$ = XMLOPTION_DOCUMENT; }
 			| CONTENT_P								{ $$ = XMLOPTION_CONTENT; }
 		;
 
-xml_indent_option: INDENT							{ $$ = true; }
-			| NO INDENT								{ $$ = false; }
-			| /*EMPTY*/								{ $$ = false; }
+opt_xml_serialize_format:
+			INDENT									{ $$ = XMLSERIALIZE_INDENT; }
+			| NO INDENT								{ $$ = XMLSERIALIZE_NO_FORMAT; }
+			| CANONICAL								{ $$ = XMLSERIALIZE_CANONICAL; }
+			| CANONICAL WITH NO COMMENTS			{ $$ = XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS; }
+			| CANONICAL WITH COMMENTS				{ $$ = XMLSERIALIZE_CANONICAL; }
+			| /*EMPTY*/								{ $$ = XMLSERIALIZE_NO_FORMAT; }
 		;
 
 xml_whitespace_option: PRESERVE WHITESPACE_P		{ $$ = true; }
@@ -17160,6 +17165,7 @@ unreserved_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CATALOG_P
@@ -17694,6 +17700,7 @@ bare_label_keyword:
 			| CACHE
 			| CALL
 			| CALLED
+			| CANONICAL
 			| CASCADE
 			| CASCADED
 			| CASE
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 9300c7b9ab..cf7b98364b 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -2443,7 +2443,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
 	typenameTypeIdAndMod(pstate, xs->typeName, &targetType, &targetTypmod);
 
 	xexpr->xmloption = xs->xmloption;
-	xexpr->indent = xs->indent;
+	xexpr->format = xs->format;
 	xexpr->location = xs->location;
 	/* We actually only need these to be able to parse back the expression. */
 	xexpr->type = targetType;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 3e24aba546..c311504b16 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -58,6 +58,7 @@
 #include <libxml/xmlwriter.h>
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
+#include <libxml/c14n.h>
 
 /*
  * We used to check for xmlStructuredErrorContext via a configure test; but
@@ -654,7 +655,7 @@ xmltotext(PG_FUNCTION_ARGS)
 
 
 text *
-xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
+xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, XmlSerializeFormat format)
 {
 #ifdef USE_LIBXML
 	text	   *volatile result;
@@ -667,7 +668,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	PgXmlErrorContext *xmlerrcxt;
 #endif
 
-	if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
+	if (xmloption_arg != XMLOPTION_DOCUMENT && format == XMLSERIALIZE_NO_FORMAT)
 	{
 		/*
 		 * We don't actually need to do anything, so just return the
@@ -678,10 +679,23 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 #ifdef USE_LIBXML
-	/* Parse the input according to the xmloption */
-	doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
-					&parsed_xmloptiontype, &content_nodes,
-					(Node *) &escontext);
+	/*
+	 * Parse the input according to the xmloption. XML canonical expects
+	 * a well-formed XML input, so here in case of XMLSERIALIZE_CANONICAL
+	 * or XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS we force xml_parse() to parse
+	 * 'data' as XMLOPTION_DOCUMENT despite of the XmlOptionType given in
+	 * 'xmloption_arg'. This enables the canonicalization of CONTENT fragments
+	 * if they contain a singly-rooted XML - xml_parse() will thrown an error
+	 * otherwise.
+	 */
+	if(format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
+		doc = xml_parse(data, XMLOPTION_DOCUMENT, false,
+						GetDatabaseEncoding(), NULL, NULL, NULL);
+	else
+		doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
+						&parsed_xmloptiontype, &content_nodes,
+						(Node *) &escontext);
+
 	if (doc == NULL || escontext.error_occurred)
 	{
 		if (doc)
@@ -693,7 +707,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	}
 
 	/* If we weren't asked to indent, we're done. */
-	if (!indent)
+	if (format == XMLSERIALIZE_NO_FORMAT)
 	{
 		xmlFreeDoc(doc);
 		return (text *) data;
@@ -702,128 +716,188 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	/* Otherwise, we gotta spin up some error handling. */
 	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
-	PG_TRY();
+	if(format == XMLSERIALIZE_INDENT)
 	{
-		size_t		decl_len = 0;
-
-		/* The serialized data will go into this buffer. */
-		buf = xmlBufferCreate();
-
-		if (buf == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlBuffer");
-
-		/* Detect whether there's an XML declaration */
-		parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
-
-		/*
-		 * Emit declaration only if the input had one.  Note: some versions of
-		 * xmlSaveToBuffer leak memory if a non-null encoding argument is
-		 * passed, so don't do that.  We don't want any encoding conversion
-		 * anyway.
-		 */
-		if (decl_len == 0)
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
-		else
-			ctxt = xmlSaveToBuffer(buf, NULL,
-								   XML_SAVE_FORMAT);
-
-		if (ctxt == NULL || xmlerrcxt->err_occurred)
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-						"could not allocate xmlSaveCtxt");
-
-		if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
-		{
-			/* If it's a document, saving is easy. */
-			if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
-				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-							"could not save document to xmlBuffer");
-		}
-		else if (content_nodes != NULL)
+		PG_TRY();
 		{
-			/*
-			 * Deal with the case where we have non-singly-rooted XML.
-			 * libxml's dump functions don't work well for that without help.
-			 * We build a fake root node that serves as a container for the
-			 * content nodes, and then iterate over the nodes.
-			 */
-			xmlNodePtr	root;
-			xmlNodePtr	newline;
+			size_t		decl_len = 0;
+
+			/* The serialized data will go into this buffer. */
+			buf = xmlBufferCreate();
 
-			root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-			if (root == NULL || xmlerrcxt->err_occurred)
+			if (buf == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlBuffer");
 
-			/* This attaches root to doc, so we need not free it separately. */
-			xmlDocSetRootElement(doc, root);
-			xmlAddChild(root, content_nodes);
+			/* Detect whether there's an XML declaration */
+			parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
 
 			/*
-			 * We use this node to insert newlines in the dump.  Note: in at
-			 * least some libxml versions, xmlNewDocText would not attach the
-			 * node to the document even if we passed it.  Therefore, manage
-			 * freeing of this node manually, and pass NULL here to make sure
-			 * there's not a dangling link.
+			 * Emit declaration only if the input had one.  Note: some versions of
+			 * xmlSaveToBuffer leak memory if a non-null encoding argument is
+			 * passed, so don't do that.  We don't want any encoding conversion
+			 * anyway.
 			 */
-			newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
-			if (newline == NULL || xmlerrcxt->err_occurred)
+			if (decl_len == 0)
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
+			else
+				ctxt = xmlSaveToBuffer(buf, NULL,
+									XML_SAVE_FORMAT);
+
+			if (ctxt == NULL || xmlerrcxt->err_occurred)
 				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-							"could not allocate xml node");
+							"could not allocate xmlSaveCtxt");
 
-			for (xmlNodePtr node = root->children; node; node = node->next)
+			if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
 			{
-				/* insert newlines between nodes */
-				if (node->type != XML_TEXT_NODE && node->prev != NULL)
+				/* If it's a document, saving is easy. */
+				if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+								"could not save document to xmlBuffer");
+			}
+			else if (content_nodes != NULL)
+			{
+				/*
+				 * Deal with the case where we have non-singly-rooted XML.
+				 * libxml's dump functions don't work well for that without help.
+				 * We build a fake root node that serves as a container for the
+				 * content nodes, and then iterate over the nodes.
+				 */
+				xmlNodePtr	root;
+				xmlNodePtr	newline;
+
+				root = xmlNewNode(NULL, (const xmlChar *) "content-root");
+				if (root == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				/* This attaches root to doc, so we need not free it separately. */
+				xmlDocSetRootElement(doc, root);
+				xmlAddChild(root, content_nodes);
+
+				/*
+				 * We use this node to insert newlines in the dump.  Note: in at
+				 * least some libxml versions, xmlNewDocText would not attach the
+				 * node to the document even if we passed it.  Therefore, manage
+				 * freeing of this node manually, and pass NULL here to make sure
+				 * there's not a dangling link.
+				 */
+				newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
+				if (newline == NULL || xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+								"could not allocate xml node");
+
+				for (xmlNodePtr node = root->children; node; node = node->next)
 				{
-					if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+					/* insert newlines between nodes */
+					if (node->type != XML_TEXT_NODE && node->prev != NULL)
+					{
+						if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
+						{
+							xmlFreeNode(newline);
+							xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+										"could not save newline to xmlBuffer");
+						}
+					}
+
+					if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
 					{
 						xmlFreeNode(newline);
 						xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-									"could not save newline to xmlBuffer");
+									"could not save content to xmlBuffer");
 					}
 				}
 
-				if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
-				{
-					xmlFreeNode(newline);
-					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-								"could not save content to xmlBuffer");
-				}
+				xmlFreeNode(newline);
 			}
 
-			xmlFreeNode(newline);
-		}
+			if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			{
+				ctxt = NULL;		/* don't try to close it again */
+				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+							"could not close xmlSaveCtxtPtr");
+			}
 
-		if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
+			result = (text *) xmlBuffer_to_xmltype(buf);
+		}
+		PG_CATCH();
 		{
-			ctxt = NULL;		/* don't try to close it again */
-			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
-						"could not close xmlSaveCtxtPtr");
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (buf)
+				xmlBufferFree(buf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
 		}
+		PG_END_TRY();
+
+		xmlBufferFree(buf);
+		xmlFreeDoc(doc);
 
-		result = (text *) xmlBuffer_to_xmltype(buf);
+		pg_xml_done(xmlerrcxt, false);
 	}
-	PG_CATCH();
+	else if (format == XMLSERIALIZE_CANONICAL || format == XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS)
 	{
-		if (ctxt)
-			xmlSaveClose(ctxt);
-		if (buf)
-			xmlBufferFree(buf);
-		if (doc)
-			xmlFreeDoc(doc);
+		xmlChar    *xmlbuf = NULL;
+		int         nbytes;
+		int         with_comments = 0; /* 0 = no xml comments (default) */
 
-		pg_xml_done(xmlerrcxt, true);
+		PG_TRY();
+		{
+			/* 1 = keeps xml comments */
+			if (format == XMLSERIALIZE_CANONICAL)
+				with_comments = 1;
 
-		PG_RE_THROW();
-	}
-	PG_END_TRY();
+			if (doc == NULL || escontext.error_occurred)
+			{
+				if (doc)
+					xmlFreeDoc(doc);
+				/* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
+				ereport(ERROR,
+						(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
+						errmsg("not an XML document")));
+			}
 
-	xmlBufferFree(buf);
-	xmlFreeDoc(doc);
+			/*
+			 * This dumps the canonicalized XML doc into the xmlChar* buffer.
+			 * mode = 2 means the doc will be canonicalized using the C14N 1.1 standard.
+			 */
+			nbytes = xmlC14NDocDumpMemory(doc, NULL, 2, NULL, with_comments, &xmlbuf);
 
-	pg_xml_done(xmlerrcxt, false);
+			if(nbytes < 0 || escontext.error_occurred)
+				ereport(ERROR,
+						(errcode(ERRCODE_INTERNAL_ERROR),
+						errmsg("could not canonicalize the given XML document")));
+
+			result = cstring_to_text_with_len((const char *) xmlbuf, nbytes);
+		}
+		PG_CATCH();
+		{
+			if (ctxt)
+				xmlSaveClose(ctxt);
+			if (xmlbuf)
+				xmlFree(xmlbuf);
+			if (doc)
+				xmlFreeDoc(doc);
+
+			pg_xml_done(xmlerrcxt, true);
+
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
+
+		xmlFreeDoc(doc);
+		xmlFree(xmlbuf);
+
+		pg_xml_done(xmlerrcxt, false);
+	}
+	else
+		elog(ERROR,"invalid xmlserialize option");
 
 	return result;
 #else
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 476d55dd24..4f00d94322 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -833,6 +833,7 @@ typedef struct XmlSerialize
 	Node	   *expr;
 	TypeName   *typeName;
 	bool		indent;			/* [NO] INDENT */
+	XmlSerializeFormat	format;	/* serialization format */
 	int			location;		/* token location, or -1 if unknown */
 } XmlSerialize;
 
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 4a154606d2..b8ee584bbb 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1528,6 +1528,14 @@ typedef enum XmlOptionType
 	XMLOPTION_CONTENT,
 } XmlOptionType;
 
+typedef enum XmlSerializeFormat
+{
+	XMLSERIALIZE_INDENT,                       /* pretty-printed xml serialization  */
+	XMLSERIALIZE_CANONICAL,                    /* canonical form with xml comments */
+	XMLSERIALIZE_CANONICAL_WITH_NO_COMMENTS,   /* canonical form without xml comments */
+	XMLSERIALIZE_NO_FORMAT                     /* unformatted xml representation */
+} XmlSerializeFormat;
+
 typedef struct XmlExpr
 {
 	Expr		xpr;
@@ -1550,6 +1558,9 @@ typedef struct XmlExpr
 	int32		typmod pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	int			location;
+	/* serialization format: XMLCANONICAL, XMLCANONICAL_WITH_COMMENTS, XMLINDENT */
+	XmlSerializeFormat format pg_node_attr(query_jumble_ignore);
+
 } XmlExpr;
 
 /*
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 2331acac09..187e711476 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -68,6 +68,7 @@ PG_KEYWORD("by", BY, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cache", CACHE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("call", CALL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("called", CALLED, UNRESERVED_KEYWORD, BARE_LABEL)
+PG_KEYWORD("canonical", CANONICAL, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascade", CASCADE, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("cascaded", CASCADED, UNRESERVED_KEYWORD, BARE_LABEL)
 PG_KEYWORD("case", CASE, RESERVED_KEYWORD, BARE_LABEL)
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index ed20e21375..05f11c0517 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -78,7 +78,7 @@ extern xmltype *xmlpi(const char *target, text *arg, bool arg_is_null, bool *res
 extern xmltype *xmlroot(xmltype *data, text *version, int standalone);
 extern bool xml_is_document(xmltype *arg);
 extern text *xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg,
-									bool indent);
+									XmlSerializeFormat format);
 extern char *escape_xml(const char *str);
 
 extern char *map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, bool escape_period);
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 6500cff885..3f9fce8a77 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -672,6 +672,120 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org";
+      xmlns:a="http://www.w3.org";
+      xmlns="http://example.org";>
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org";     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                                                                                                                                                                                                                                                  xmlserialize                                                                                                                                                                                                                                                                                                                                                                                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              +
+ <foo xmlns="http://example.org"; xmlns:a="http://www.w3.org"; xmlns:b="http://www.ietf.org"; attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside doc -->
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org"; xmlns:a="http://www.w3.org"; xmlns:b="http://www.ietf.org"; attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index 9323b84ae2..6a76a4fd9a 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -443,6 +443,114 @@ ERROR:  unsupported XML feature
 LINE 1: SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val><...
                                      ^
 DETAIL:  This functionality requires the server to be built with libxml support.
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org";
+      xmlns:a="http://www.w3.org";
+      xmlns="http://example.org";>
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org";     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+ERROR:  unsupported XML feature
+LINE 2:   (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+             ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+ xmlserialize 
+--------------
+(0 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+(0 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 30
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  unsupported XML feature at character 29
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 ERROR:  unsupported XML feature
 LINE 1: SELECT xml '<foo>bar</foo>' IS DOCUMENT;
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index e1d165c6c9..b47fb44b7a 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -652,6 +652,120 @@ SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text
  t
 (1 row)
 
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org";
+      xmlns:a="http://www.w3.org";
+      xmlns="http://example.org";>
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org";     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                                                                                                                                                                                                                                                  xmlserialize                                                                                                                                                                                                                                                                                                                                                                                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <!-- attributes and namespces will be sorted -->                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              +
+ <foo xmlns="http://example.org"; xmlns:a="http://www.w3.org"; xmlns:b="http://www.ietf.org"; attr="I am" attr2="all" b:attr="sorted" a:attr="out"><!-- Normalization of whitespace in start and end tags --><!-- Elimination of superfluous namespace declarations, as already declared in <foo> --><bar xmlns="">42</bar><!-- empty element will be converted to start-end tag pair --><empty></empty><!-- text will be transcoded to UTF-8 --><transcode>1</transcode><!-- default attribute will be added --><!-- whitespace inside tag will be preserved --><whitespace> 321 </whitespace><!-- empty namespace will be removed of child tag --><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><!-- CDATA section will be replaced by its value --><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>+
+ <!-- comment outside doc -->
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+                                                                                                                                                                                     xmlserialize                                                                                                                                                                                     
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ <foo xmlns="http://example.org"; xmlns:a="http://www.w3.org"; xmlns:b="http://www.ietf.org"; attr="I am" attr2="all" b:attr="sorted" a:attr="out"><bar xmlns="">42</bar><empty></empty><transcode>1</transcode><whitespace> 321 </whitespace><emptyns xmlns=""><emptyns_child></emptyns_child></emptyns><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+                            xmlserialize                             
+---------------------------------------------------------------------
+ <foo><bar><!-- important comment --><val x="y">42</val></bar></foo>
+(1 row)
+
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+ xmlserialize 
+--------------
+ 
+(1 row)
+
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+ERROR:  invalid XML document
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 75
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+ERROR:  syntax error at or near "INDENT" at character 74
+\set VERBOSITY default
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
  ?column? 
 ----------
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 953bac09e4..a2f1c3566d 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -168,6 +168,69 @@ SELECT xmlserialize(CONTENT  '<foo><bar></bar></foo>' AS text INDENT);
 -- 'no indent' = not using 'no indent'
 SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text) = xmlserialize(CONTENT '<foo><bar><val x="y">42</val></bar></foo>' AS text NO INDENT);
+-- xmlserialize: canonical
+CREATE TABLE xmltest_serialize (id int, doc xml);
+INSERT INTO xmltest_serialize VALUES
+  (1,'<?xml version="1.0" encoding="ISO-8859-1"?>
+  <!DOCTYPE doc SYSTEM "doc.dtd" [
+                  <!ENTITY val "42">
+      <!ATTLIST xyz attr CDATA "default">
+  ]>
+
+  <!-- attributes and namespces will be sorted -->
+  <foo a:attr="out" b:attr="sorted" attr2="all" attr="I am"
+      xmlns:b="http://www.ietf.org";
+      xmlns:a="http://www.w3.org";
+      xmlns="http://example.org";>
+
+    <!-- Normalization of whitespace in start and end tags -->
+    <!-- Elimination of superfluous namespace declarations, as already declared in <foo> -->
+    <bar     xmlns="" xmlns:a="http://www.w3.org";     >&val;</bar     >
+
+    <!-- empty element will be converted to start-end tag pair -->
+    <empty/>
+
+    <!-- text will be transcoded to UTF-8 -->
+    <transcode>&#49;</transcode>
+
+    <!-- default attribute will be added -->
+    <!-- whitespace inside tag will be preserved -->
+    <whitespace> 321 </whitespace>
+
+    <!-- empty namespace will be removed of child tag -->
+    <emptyns  xmlns="" >
+       <emptyns_child xmlns=""></emptyns_child>
+    </emptyns>
+
+    <!-- CDATA section will be replaced by its value -->
+    <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+  </foo>
+  <!-- comment outside doc -->'::xml),
+  (2,'<foo>
+        <bar>
+          <!-- important comment -->
+          <val x="y">42</val>
+        </bar>
+    </foo>   '::xml);
+
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(DOCUMENT doc AS text CANONICAL) = xmlserialize(DOCUMENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH NO COMMENTS) FROM xmltest_serialize WHERE id = 1;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize WHERE id = 2;
+SELECT xmlserialize(CONTENT doc AS text CANONICAL) = xmlserialize(CONTENT doc AS text CANONICAL WITH COMMENTS) FROM xmltest_serialize;
+SELECT xmlserialize(DOCUMENT NULL AS text CANONICAL);
+SELECT xmlserialize(CONTENT NULL AS text CANONICAL);
+\set VERBOSITY terse
+SELECT xmlserialize(DOCUMENT '' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '  ' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '' AS text CANONICAL);
+SELECT xmlserialize(CONTENT '  ' AS text CANONICAL);
+SELECT xmlserialize(CONTENT 'foo' AS text CANONICAL);
+SELECT xmlserialize(DOCUMENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+SELECT xmlserialize(CONTENT '<foo><bar>73</bar></foo>' AS text CANONICAL INDENT);
+\set VERBOSITY default
 
 SELECT xml '<foo>bar</foo>' IS DOCUMENT;
 SELECT xml '<foo>bar</foo><bar>foo</bar>' IS DOCUMENT;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 91433d439b..5d791ac732 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3141,6 +3141,7 @@ XmlExpr
 XmlExprOp
 XmlOptionType
 XmlSerialize
+XmlSerializeFormat
 XmlTableBuilderData
 YYLTYPE
 YYSTYPE
-- 
2.34.1

Re: [PATCH] Add CANONICAL option to xmlserialize

Reply via email to