On 24.04.23 03:18, Tom Lane wrote:
I wouldn't actually *use* pchomp here, because that induces an unnecessary
copy of the result string.  I had in mind more like copying pchomp's code
to count up the trailing newline(s) and then pass a corrected length
to cstring_to_text_with_len.
Changed.
You could simplify matters by doing that in all cases, too.  It should
never find anything to remove in the non-indented case, but the check
should be of negligible cost in context.

I'm not sure I understood it correctly.

The non-indented cases should never find anything and indented cases with CONTENT strings do not add trailing newlines, so this is only applicable with DOCUMENT .. INDENT, right?

Something like this would suffice?

if(xmloption_arg != XMLOPTION_DOCUMENT)
    result = (text *) xmlBuffer_to_xmltype(buf);
else
{
    int    len = xmlBufferLength(buf);
    const char *xmloutput = (const char *) xmlBufferContent(buf);

    while (len > 0 && xmloutput[len - 1] == '\n')
        len--;

    result = cstring_to_text_with_len(xmloutput, len);
}

If we really agree on manually removing the trailing newlines I will open a CF entry for this.

Best, Jim

From aa5eafb319da04d2e67a1540af0d088af6d82edb Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jo...@uni-muenster.de>
Date: Mon, 24 Apr 2023 10:02:32 +0200
Subject: [PATCH v1] Remove trailing newlines from xmlserialize indent output

This removes the trailing newlines added to xmlserialize indent
output from xml strings of type DOCUMENT.

Reported by: Pavel Stehule
Discussion : https://www.postgresql.org/message-id/CAFj8pRCNTi2yHBXcdYf-cYZ63R8Laf9L49Q_uxt%2BA5WXKPPhxg%40mail.gmail.com
---
 src/backend/utils/adt/xml.c         | 20 +++++++++++++++++++-
 src/test/regress/expected/xml.out   | 15 +++++----------
 src/test/regress/expected/xml_2.out | 15 +++++----------
 3 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 15adbd6a01..404a2f455d 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -771,7 +771,25 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 						"could not close xmlSaveCtxtPtr");
 		}
 
-		result = (text *) xmlBuffer_to_xmltype(buf);
+		/*
+		* This is necessary to remove the trailing newline created
+		* by xmlSaveDoc - it only affects DOCUMENT xml strings.
+		* The fragments of CONTENT strings are stored into the
+		* xmlBufferPtr using xmlSaveTree, which does not add a
+		* trailing newline.
+		*/
+		if(xmloption_arg != XMLOPTION_DOCUMENT)
+			result = (text *) xmlBuffer_to_xmltype(buf);
+		else
+		{
+			int	len = xmlBufferLength(buf);
+			const char *xmloutput = (const char *) xmlBufferContent(buf);
+
+			while (len > 0 && xmloutput[len - 1] == '\n')
+				len--;
+
+			result = cstring_to_text_with_len(xmloutput, len);
+		}
 	}
 	PG_CATCH();
 	{
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 398345ca67..b689f86fe6 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -494,8 +494,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text
    <bar>                +
      <val x="y">42</val>+
    </bar>               +
- </foo>                 +
- 
+ </foo>
 (1 row)
 
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text INDENT);
@@ -555,8 +554,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val><val x="y">text node<
      <val x="y">42</val>                    +
      <val x="y">text node<val>73</val></val>+
    </bar>                                   +
- </foo>                                     +
- 
+ </foo>
 (1 row)
 
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val><val x="y">text node<val>73</val></val></bar></foo>' AS text INDENT);
@@ -610,8 +608,7 @@ SELECT xmlserialize(DOCUMENT '<?xml version="1.0" encoding="UTF-8"?><foo><bar><v
    <bar>                               +
      <val>73</val>                     +
    </bar>                              +
- </foo>                                +
- 
+ </foo>
 (1 row)
 
 SELECT xmlserialize(CONTENT  '<?xml version="1.0" encoding="UTF-8"?><foo><bar><val>73</val></bar></foo>' AS text INDENT);
@@ -629,8 +626,7 @@ SELECT xmlserialize(DOCUMENT '<!DOCTYPE a><a/>' AS text INDENT);
  xmlserialize 
 --------------
  <!DOCTYPE a>+
- <a/>        +
- 
+ <a/>
 (1 row)
 
 SELECT xmlserialize(CONTENT  '<!DOCTYPE a><a/>' AS text INDENT);
@@ -647,8 +643,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar></bar></foo>' AS text INDENT);
 --------------
  <foo>       +
    <bar/>    +
- </foo>      +
- 
+ </foo>
 (1 row)
 
 SELECT xmlserialize(CONTENT  '<foo><bar></bar></foo>' AS text INDENT);
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index 43c2558352..a2eeff8369 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -474,8 +474,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val></bar></foo>' AS text
    <bar>                +
      <val x="y">42</val>+
    </bar>               +
- </foo>                 +
- 
+ </foo>
 (1 row)
 
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val></bar></foo>' AS text INDENT);
@@ -535,8 +534,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar><val x="y">42</val><val x="y">text node<
      <val x="y">42</val>                    +
      <val x="y">text node<val>73</val></val>+
    </bar>                                   +
- </foo>                                     +
- 
+ </foo>
 (1 row)
 
 SELECT xmlserialize(CONTENT  '<foo><bar><val x="y">42</val><val x="y">text node<val>73</val></val></bar></foo>' AS text INDENT);
@@ -590,8 +588,7 @@ SELECT xmlserialize(DOCUMENT '<?xml version="1.0" encoding="UTF-8"?><foo><bar><v
    <bar>                               +
      <val>73</val>                     +
    </bar>                              +
- </foo>                                +
- 
+ </foo>
 (1 row)
 
 SELECT xmlserialize(CONTENT  '<?xml version="1.0" encoding="UTF-8"?><foo><bar><val>73</val></bar></foo>' AS text INDENT);
@@ -609,8 +606,7 @@ SELECT xmlserialize(DOCUMENT '<!DOCTYPE a><a/>' AS text INDENT);
  xmlserialize 
 --------------
  <!DOCTYPE a>+
- <a/>        +
- 
+ <a/>
 (1 row)
 
 SELECT xmlserialize(CONTENT  '<!DOCTYPE a><a/>' AS text INDENT);
@@ -627,8 +623,7 @@ SELECT xmlserialize(DOCUMENT '<foo><bar></bar></foo>' AS text INDENT);
 --------------
  <foo>       +
    <bar/>    +
- </foo>      +
- 
+ </foo>
 (1 row)
 
 SELECT xmlserialize(CONTENT  '<foo><bar></bar></foo>' AS text INDENT);
-- 
2.25.1

Reply via email to