I wrote:
> It's kind of sad that this issue is still present in the wild ten
> years later. But anyway, I wonder if we could dodge the issue
> simply by modifying these error-provoking cases to have some
> whitespace at the end of the XML input? I don't think I have
> a problematic version of libxml2 to test that with, though.
Hah, this idea does mostly work. Finding that no system I had at
hand had a libxml2 old enough to exhibit the problem, I installed
libxml2 2.9.3 from source. (That's the first version containing
the problematic CVE fix, and it does behave as xml_2.out expects.)
I found that four of the inconsistent messages could be fixed this
way. The other three problem cases have input like '<wrong' or
all-spaces, and evidently we need the error to be recognized before
reaching end-of-input.
Now, it seems to me that we are not especially interested in whether
libxml2 produces one error message or another one for given input;
what we need to test is only that Postgres reports the error
properly. So I don't have a problem with tweaking those bad inputs
a bit more aggressively to make them be something that different
libxml2 versions will report identically. The result of my
experiments is attached: it results in expected/xml.out that matches
the output of both 2.9.3 and recent versions. So if we do this
we could drop xml_2.out.
This patch is WIP because I've not updated xml_1.out to match.
But that's trivial, and dropping xml_2.out is equally boring,
so I left those parts out of this demo patch.
regards, tom lane
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 3e80a7ff465..d3c7f626738 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -4,13 +4,13 @@ CREATE TABLE xmltest (
);
INSERT INTO xmltest VALUES (1, '<value>one</value>');
INSERT INTO xmltest VALUES (2, '<value>two</value>');
-INSERT INTO xmltest VALUES (3, '<wrong');
+INSERT INTO xmltest VALUES (3, '<value>two</wrong> ');
ERROR: invalid XML content
-LINE 1: INSERT INTO xmltest VALUES (3, '<wrong');
+LINE 1: INSERT INTO xmltest VALUES (3, '<value>two</wrong> ');
^
-DETAIL: line 1: Couldn't find end of Start Tag wrong line 1
-<wrong
- ^
+DETAIL: line 1: Opening and ending tag mismatch: value line 1 and wrong
+<value>two</wrong>
+ ^
SELECT * FROM xmltest;
id | data
----+--------------------
@@ -89,13 +89,13 @@ SELECT xmlconcat(1, 2);
ERROR: argument of XMLCONCAT must be type xml, not type integer
LINE 1: SELECT xmlconcat(1, 2);
^
-SELECT xmlconcat('bad', '<syntax');
+SELECT xmlconcat('bad', '<wrong></syntax> ');
ERROR: invalid XML content
-LINE 1: SELECT xmlconcat('bad', '<syntax');
+LINE 1: SELECT xmlconcat('bad', '<wrong></syntax> ');
^
-DETAIL: line 1: Couldn't find end of Start Tag syntax line 1
-<syntax
- ^
+DETAIL: line 1: Opening and ending tag mismatch: wrong line 1 and syntax
+<wrong></syntax>
+ ^
SELECT xmlconcat('<foo/>', NULL, '<?xml version="1.1" standalone="no"?><bar/>');
xmlconcat
--------------
@@ -271,13 +271,13 @@ SELECT xmlparse(content '<relativens xmlns=''relative''/>');
<relativens xmlns='relative'/>
(1 row)
-SELECT xmlparse(content '<twoerrors>&idontexist;</unbalanced>');
+SELECT xmlparse(content '<twoerrors>&idontexist;</unbalanced> ');
ERROR: invalid XML content
DETAIL: line 1: Entity 'idontexist' not defined
-<twoerrors>&idontexist;</unbalanced>
+<twoerrors>&idontexist;</unbalanced>
^
line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced
-<twoerrors>&idontexist;</unbalanced>
+<twoerrors>&idontexist;</unbalanced>
^
SELECT xmlparse(content '<nosuchprefix:tag/>');
xmlparse
@@ -285,11 +285,11 @@ SELECT xmlparse(content '<nosuchprefix:tag/>');
<nosuchprefix:tag/>
(1 row)
-SELECT xmlparse(document ' ');
+SELECT xmlparse(document '!');
ERROR: invalid XML document
DETAIL: line 1: Start tag expected, '<' not found
-
- ^
+!
+^
SELECT xmlparse(document 'abc');
ERROR: invalid XML document
DETAIL: line 1: Start tag expected, '<' not found
@@ -301,21 +301,21 @@ SELECT xmlparse(document '<abc>x</abc>');
<abc>x</abc>
(1 row)
-SELECT xmlparse(document '<invalidentity>&</abc>');
+SELECT xmlparse(document '<invalidentity>&</abc> ');
ERROR: invalid XML document
DETAIL: line 1: xmlParseEntityRef: no name
-<invalidentity>&</abc>
+<invalidentity>&</abc>
^
line 1: Opening and ending tag mismatch: invalidentity line 1 and abc
-<invalidentity>&</abc>
+<invalidentity>&</abc>
^
-SELECT xmlparse(document '<undefinedentity>&idontexist;</abc>');
+SELECT xmlparse(document '<undefinedentity>&idontexist;</abc> ');
ERROR: invalid XML document
DETAIL: line 1: Entity 'idontexist' not defined
-<undefinedentity>&idontexist;</abc>
+<undefinedentity>&idontexist;</abc>
^
line 1: Opening and ending tag mismatch: undefinedentity line 1 and abc
-<undefinedentity>&idontexist;</abc>
+<undefinedentity>&idontexist;</abc>
^
SELECT xmlparse(document '<invalidns xmlns=''<''/>');
xmlparse
@@ -329,13 +329,13 @@ SELECT xmlparse(document '<relativens xmlns=''relative''/>');
<relativens xmlns='relative'/>
(1 row)
-SELECT xmlparse(document '<twoerrors>&idontexist;</unbalanced>');
+SELECT xmlparse(document '<twoerrors>&idontexist;</unbalanced> ');
ERROR: invalid XML document
DETAIL: line 1: Entity 'idontexist' not defined
-<twoerrors>&idontexist;</unbalanced>
+<twoerrors>&idontexist;</unbalanced>
^
line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced
-<twoerrors>&idontexist;</unbalanced>
+<twoerrors>&idontexist;</unbalanced>
^
SELECT xmlparse(document '<nosuchprefix:tag/>');
xmlparse
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 2b8445e499e..0366b776119 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -5,7 +5,7 @@ CREATE TABLE xmltest (
INSERT INTO xmltest VALUES (1, '<value>one</value>');
INSERT INTO xmltest VALUES (2, '<value>two</value>');
-INSERT INTO xmltest VALUES (3, '<wrong');
+INSERT INTO xmltest VALUES (3, '<value>two</wrong> ');
SELECT * FROM xmltest;
@@ -30,7 +30,7 @@ SELECT xmlconcat(xmlcomment('hello'),
SELECT xmlconcat('hello', 'you');
SELECT xmlconcat(1, 2);
-SELECT xmlconcat('bad', '<syntax');
+SELECT xmlconcat('bad', '<wrong></syntax> ');
SELECT xmlconcat('<foo/>', NULL, '<?xml version="1.1" standalone="no"?><bar/>');
SELECT xmlconcat('<?xml version="1.1"?><foo/>', NULL, '<?xml version="1.1" standalone="no"?><bar/>');
SELECT xmlconcat(NULL);
@@ -75,17 +75,17 @@ SELECT xmlparse(content '<invalidentity>&</invalidentity>');
SELECT xmlparse(content '<undefinedentity>&idontexist;</undefinedentity>');
SELECT xmlparse(content '<invalidns xmlns=''<''/>');
SELECT xmlparse(content '<relativens xmlns=''relative''/>');
-SELECT xmlparse(content '<twoerrors>&idontexist;</unbalanced>');
+SELECT xmlparse(content '<twoerrors>&idontexist;</unbalanced> ');
SELECT xmlparse(content '<nosuchprefix:tag/>');
-SELECT xmlparse(document ' ');
+SELECT xmlparse(document '!');
SELECT xmlparse(document 'abc');
SELECT xmlparse(document '<abc>x</abc>');
-SELECT xmlparse(document '<invalidentity>&</abc>');
-SELECT xmlparse(document '<undefinedentity>&idontexist;</abc>');
+SELECT xmlparse(document '<invalidentity>&</abc> ');
+SELECT xmlparse(document '<undefinedentity>&idontexist;</abc> ');
SELECT xmlparse(document '<invalidns xmlns=''<''/>');
SELECT xmlparse(document '<relativens xmlns=''relative''/>');
-SELECT xmlparse(document '<twoerrors>&idontexist;</unbalanced>');
+SELECT xmlparse(document '<twoerrors>&idontexist;</unbalanced> ');
SELECT xmlparse(document '<nosuchprefix:tag/>');