Package: release.debian.org
Severity: normal
Tags: bookworm
X-Debbugs-Cc: [email protected], 
[email protected], [email protected], [email protected], 
[email protected], [email protected]
Control: affects -1 + src:libxml-libxml-perl
User: [email protected]
Usertags: pu

Hi,

[ Reason ]
libxml-libxml-perl is prone to CVE-2026-8177, #1136300, it is not
severe enough to warrant a security advisory.

[ Impact ]
Remains open to CVE-2026-8177

[ Tests ]
Extensive test suite and in addition done a debusine upload as per
https://debusine.debian.net/debian/developers/work-request/896109/ .

[ Risks ]
Patch taken upstream. Fix exposed for roughly a month in unstable (and
migrated to testing).

[ Checklist ]
  [x] *all* changes are documented in the d/changelog
  [x] I reviewed all changes and I approve them
  [x] attach debdiff against the package in (old)stable
  [x] the issue is verified as fixed in unstable

[ Changes ]
Fix issue by replacing domParseChar with xmlValidateName to prevent
OOB UTF-8 read.

[ Other info ]
None.

Regards,
Salvatore
diff -Nru libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/changelog 
libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/changelog
--- libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/changelog       
2022-02-11 19:29:49.000000000 +0000
+++ libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/changelog       
2026-06-27 11:41:54.000000000 +0000
@@ -1,3 +1,11 @@
+libxml-libxml-perl (2.0207+dfsg+really+2.0134-1+deb12u1) bookworm; 
urgency=medium
+
+  * Team upload.
+  * fix: replace domParseChar with xmlValidateName to prevent OOB UTF-8 read
+    (CVE-2026-8177) (Closes: #1136300)
+
+ -- Salvatore Bonaccorso <[email protected]>  Sat, 27 Jun 2026 13:41:54 +0200
+
 libxml-libxml-perl (2.0207+dfsg+really+2.0134-1) unstable; urgency=medium
 
   * Team upload
diff -Nru 
libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/fix-replace-domParseChar-with-xmlValidateName-to-pre.patch
 
libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/fix-replace-domParseChar-with-xmlValidateName-to-pre.patch
--- 
libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/fix-replace-domParseChar-with-xmlValidateName-to-pre.patch
      1970-01-01 00:00:00.000000000 +0000
+++ 
libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/fix-replace-domParseChar-with-xmlValidateName-to-pre.patch
      2026-06-27 11:41:54.000000000 +0000
@@ -0,0 +1,327 @@
+From: Toddr Bot <[email protected]>
+Date: Tue, 19 May 2026 19:32:08 +0000
+Subject: fix: replace domParseChar with xmlValidateName to prevent OOB UTF-8
+ read
+Origin: 
https://github.com/cpan-authors/XML-LibXML/commit/059abf5f9336e2213794b5b545c707394cca3ac7
+Bug-Debian: https://bugs.debian.org/1136300
+Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2026-8177
+
+domParseChar() read continuation bytes for multi-byte UTF-8 sequences without
+validating they exist or have required 10xxxxxx form. Truncated sequences like
+"a\xF0" caused heap reads past NUL terminator. Replace with libxml2's
+xmlValidateName(), which correctly handles all UTF-8 edge cases.
+
+Adds 65 regression tests covering truncation points, invalid continuations, and
+5 affected DOM entry points (createElement, createAttribute, setNodeName, etc).
+
+Fixes https://github.com/cpan-authors/XML-LibXML/issues/146
+---
+ .gitignore                     |  1 +
+ LibXML.xs                      | 33 +------------
+ MANIFEST                       |  1 +
+ dom.c                          | 88 ---------------------------------
+ dom.h                          | 32 ------------
+ t/48_security_oob_utf8_gh146.t | 90 ++++++++++++++++++++++++++++++++++
+ 6 files changed, 94 insertions(+), 151 deletions(-)
+ create mode 100644 t/48_security_oob_utf8_gh146.t
+
+diff --git a/LibXML.xs b/LibXML.xs
+index df23bc7d292e..6f18e89c83e8 100644
+--- a/LibXML.xs
++++ b/LibXML.xs
+@@ -1025,40 +1025,11 @@ LibXML_cleanup_parser() {
+ int
+ LibXML_test_node_name( xmlChar * name )
+ {
+-    xmlChar * cur = name;
+-    int tc  = 0;
+-    int len = 0;
+-
+-    if ( cur == NULL || *cur == 0 ) {
+-        /* warn("name is empty" ); */
+-        return(0);
+-    }
+-
+-    tc = domParseChar( cur, &len );
+-
+-    if ( !( IS_LETTER( tc ) || (tc == '_') || (tc == ':')) ) {
+-        /* warn( "is not a letter\n" ); */
++    if ( name == NULL || *name == 0 ) {
+         return(0);
+     }
+ 
+-    tc  =  0;
+-    cur += len;
+-
+-    while (*cur != 0 ) {
+-        tc = domParseChar( cur, &len );
+-
+-        if (!(IS_LETTER(tc) || IS_DIGIT(tc) || (tc == '_') ||
+-             (tc == '-') || (tc == ':') || (tc == '.') ||
+-             IS_COMBINING(tc) || IS_EXTENDER(tc)) ) {
+-            /* warn( "is not a letter\n" ); */
+-            return(0);
+-        }
+-        tc = 0;
+-        cur += len;
+-    }
+-
+-    /* warn("name is ok"); */
+-    return(1);
++    return xmlValidateName( name, 0 ) == 0;
+ }
+ 
+ /* Assumes that the node has a proxy. */
+diff --git a/MANIFEST b/MANIFEST
+index 55e093298936..9427babae6fe 100644
+--- a/MANIFEST
++++ b/MANIFEST
+@@ -170,6 +170,7 @@ t/48_replaceNode_DTD_nodes_rT_80521.t
+ t/48_rt123379_setNamespace.t
+ t/48_rt55000.t
+ t/48_rt93429_recover_2_in_html_parsing.t
++t/48_security_oob_utf8_gh146.t
+ t/48importing_nodes_IDs_rt_69520.t
+ t/49_load_html.t
+ t/49callbacks_returning_undef.t
+diff --git a/dom.c b/dom.c
+index 94518b0aea29..a93cce30d940 100644
+--- a/dom.c
++++ b/dom.c
+@@ -237,94 +237,6 @@ domReconcileNs(xmlNodePtr tree)
+                 xmlFreeNsList(unused);
+ }
+ 
+-/**
+- * NAME domParseChar
+- * TYPE function
+- * SYNOPSIS
+- *   int utf8char = domParseChar( curchar, &len );
+- *
+- * The current char value, if using UTF-8 this may actually span
+- * multiple bytes in the given string. This function parses an utf8
+- * character from a string into a UTF8 character (an integer). It uses
+- * a slightly modified version of libxml2's character parser. libxml2
+- * itself does not provide any function to parse characters dircetly
+- * from a string and test if they are valid utf8 characters.
+- *
+- * XML::LibXML uses this function rather than perls native UTF8
+- * support for two reasons:
+- * 1) perls UTF8 handling functions often lead to encoding errors,
+- *    which partly comes, that they are badly documented.
+- * 2) not all perl versions XML::LibXML intends to run with have native
+- *    UTF8 support.
+- *
+- * domParseChar() allows to use the very same code with all versions
+- * of perl :)
+- *
+- * Returns the current char value and its length
+- *
+- * NOTE: If the character passed to this function is not a UTF
+- * character, the return value will be 0 and the length of the
+- * character is -1!
+- */
+-int
+-domParseChar( xmlChar *cur, int *len )
+-{
+-    unsigned char c;
+-        unsigned int val;
+-
+-        /*
+-         * We are supposed to handle UTF8, check it's valid
+-         * From rfc2044: encoding of the Unicode values on UTF-8:
+-         *
+-         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
+-         * 0000 0000-0000 007F   0xxxxxxx
+-         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
+-         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
+-         *
+-         * Check for the 0x110000 limit too
+-         */
+-
+-    if ( cur == NULL || *cur == 0 ) {
+-        *len = 0;
+-        return(0);
+-    }
+-
+-    c = *cur;
+-    if ( c & 0x80 ) {
+-        if ((c & 0xe0) == 0xe0) {
+-            if ((c & 0xf0) == 0xf0) {
+-                /* 4-byte code */
+-                *len = 4;
+-                val = (cur[0] & 0x7) << 18;
+-                val |= (cur[1] & 0x3f) << 12;
+-                val |= (cur[2] & 0x3f) << 6;
+-                val |= cur[3] & 0x3f;
+-            } else {
+-                /* 3-byte code */
+-                *len = 3;
+-                val = (cur[0] & 0xf) << 12;
+-                val |= (cur[1] & 0x3f) << 6;
+-                val |= cur[2] & 0x3f;
+-            }
+-            } else {
+-            /* 2-byte code */
+-            *len = 2;
+-            val = (cur[0] & 0x1f) << 6;
+-            val |= cur[1] & 0x3f;
+-            }
+-        if ( !IS_CHAR(val) ) {
+-            *len = -1;
+-            return(0);
+-        }
+-            return(val);
+-    }
+-    else {
+-        /* 1-byte code */
+-            *len = 1;
+-        return((int)c);
+-    }
+-}
+-
+ /**
+  * Name: domReadWellBalancedString
+  * Synopsis: xmlNodePtr domReadWellBalancedString( xmlDocPtr doc, xmlChar 
*string )
+diff --git a/dom.h b/dom.h
+index 428b685e1e7a..32936ee3ab3a 100644
+--- a/dom.h
++++ b/dom.h
+@@ -53,38 +53,6 @@ extern "C" {
+ void
+ domReconcileNs(xmlNodePtr tree);
+ 
+-/**
+- * NAME domParseChar
+- * TYPE function
+- * SYNOPSIS
+- *   int utf8char = domParseChar( curchar, &len );
+- *
+- * The current char value, if using UTF-8 this may actually span
+- * multiple bytes in the given string. This function parses an utf8
+- * character from a string into a UTF8 character (an integer). It uses
+- * a slightly modified version of libxml2's character parser. libxml2
+- * itself does not provide any function to parse characters dircetly
+- * from a string and test if they are valid utf8 characters.
+- *
+- * XML::LibXML uses this function rather than perls native UTF8
+- * support for two reasons:
+- * 1) perls UTF8 handling functions often lead to encoding errors,
+- *    which partly comes, that they are badly documented.
+- * 2) not all perl versions XML::LibXML intends to run with have native
+- *    UTF8 support.
+- *
+- * domParseChar() allows to use the very same code with all versions
+- * of perl :)
+- *
+- * Returns the current char value and its length
+- *
+- * NOTE: If the character passed to this function is not a UTF
+- * character, the return value will be 0 and the length of the
+- * character is -1!
+- */
+-int
+-domParseChar( xmlChar *characters, int *len );
+-
+ xmlNodePtr
+ domReadWellBalancedString( xmlDocPtr doc, xmlChar* string, int repair );
+ 
+diff --git a/t/48_security_oob_utf8_gh146.t b/t/48_security_oob_utf8_gh146.t
+new file mode 100644
+index 000000000000..74fb6d45697d
+--- /dev/null
++++ b/t/48_security_oob_utf8_gh146.t
+@@ -0,0 +1,90 @@
++# Security regression test for GitHub issue #146:
++# Out-of-bounds heap read via hand-rolled UTF-8 walker on truncated sequences.
++#
++# The original domParseChar() read continuation bytes for multi-byte UTF-8
++# sequences without verifying they exist or are valid. A truncated sequence
++# (e.g., "a\xF0") caused reads past the NUL terminator into uninitialized
++# heap memory. This affects all DOM methods that validate node names via
++# LibXML_test_node_name(): createElement, createAttribute, setNodeName,
++# createElementNS, createAttributeNS, etc.
++#
++# Impact: denial of service (crash on unmapped memory) and potential
++# information disclosure (reading adjacent heap allocations).
++#
++# Fixed by replacing the hand-rolled UTF-8 walker (domParseChar) with
++# libxml2's own xmlValidateName(), which correctly handles all UTF-8
++# edge cases.
++#
++# NOTE: This test verifies that malformed UTF-8 does not crash the process
++# (the actual security issue). Whether a given sequence is rejected depends
++# on the linked libxml2 version — older 2.9.x builds may accept some
++# sequences that newer versions reject.
++
++use strict;
++use warnings;
++
++use Test::More;
++use XML::LibXML;
++
++# Truncated UTF-8 sequences that previously caused OOB heap reads.
++# Each entry: [ bytes, description ]
++my @truncated_sequences = (
++    [ "a\xC0",             "truncated 2-byte (leader only)" ],
++    [ "a\xC2",             "truncated 2-byte (valid leader, missing 
continuation)" ],
++    [ "a\xE0",             "truncated 3-byte (leader only)" ],
++    [ "a\xE0\x80",         "truncated 3-byte (leader + 1 continuation)" ],
++    [ "a\xF0",             "truncated 4-byte (leader only)" ],
++    [ "a\xF0\x80",         "truncated 4-byte (leader + 1 continuation)" ],
++    [ "a\xF0\x80\x80",     "truncated 4-byte (leader + 2 continuations)" ],
++);
++
++# Invalid continuation bytes — the leader is valid but the continuations
++# are not 10xxxxxx.
++my @invalid_continuations = (
++    [ "a\xC2\x41",         "2-byte with ASCII continuation" ],
++    [ "a\xE0\x41\x80",     "3-byte with ASCII in first continuation" ],
++    [ "a\xE0\x80\x41",     "3-byte with ASCII in second continuation" ],
++    [ "a\xF0\x41\x80\x80", "4-byte with ASCII in first continuation" ],
++    [ "a\xF0\x80\x41\x80", "4-byte with ASCII in second continuation" ],
++    [ "a\xF0\x80\x80\x41", "4-byte with ASCII in third continuation" ],
++);
++
++my @all_bad = (@truncated_sequences, @invalid_continuations);
++
++my @methods = qw( createElement setNodeName createElementNS
++                   createAttribute createAttributeNS );
++
++# TEST:$bad_count=13
++# TEST:$method_count=5
++plan tests => scalar(@all_bad) * scalar(@methods);
++
++my $doc   = XML::LibXML::Document->new();
++my $nsURI = "http://example.com/ns";;
++
++for my $case (@all_bad) {
++    my ($bytes, $desc) = @$case;
++
++    for my $method (@methods) {
++        eval {
++            if ($method eq 'createElement') {
++                $doc->createElement($bytes);
++            }
++            elsif ($method eq 'setNodeName') {
++                my $node = $doc->createElement("tmp");
++                $node->setNodeName($bytes);
++            }
++            elsif ($method eq 'createElementNS') {
++                $doc->createElementNS($nsURI, $bytes);
++            }
++            elsif ($method eq 'createAttribute') {
++                $doc->createAttribute($bytes, "value");
++            }
++            elsif ($method eq 'createAttributeNS') {
++                $doc->createAttributeNS($nsURI, $bytes, "value");
++            }
++        };
++
++        # TEST*$bad_count*$method_count
++        pass("$method survives $desc without crashing");
++    }
++}
+-- 
+2.53.0
+
diff -Nru libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/series 
libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/series
--- libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/series  
2022-02-11 19:08:51.000000000 +0000
+++ libxml-libxml-perl-2.0207+dfsg+really+2.0134/debian/patches/series  
2026-06-27 11:41:54.000000000 +0000
@@ -1,3 +1,4 @@
 fail-build-no-libxml2.patch
 disable_runtime-version_warning.patch
 test_against_runtime-version.patch
+fix-replace-domParseChar-with-xmlValidateName-to-pre.patch

Reply via email to