Hi,

Glad for being useful to someone. Thank you.

My patch dated 2012-08-21 (see in the archives of this list) handles
NULs characters gently, instead of doing like if an EOF had been met.
If you were interested, you'll also be interested that NULs found
in system identifiers be reported by a specific error message:
"invalid character 0x{0} in system id"
similar to the one you get in public identifiers:
"invalid character 0x{0} in public id"

Here (attached) is a patch for this (3.1.1). You might also find
useful my patch dated 2012-09-03 about empty public and system
identifiers.

I've also a patch to introduce set/getCreateProcessingInstructionNodes
(similar to set/getCreateCommentNodes), and to introduce
set/getCreateCDATASectionNodes (similar to
set/getCreateEntityReferenceNodes) if you are interested. And a little
patch to avoid incrementing fCurLine when a #xD (with no #xA)
is found within an input file.

Hoping all this (functionality if not patches) will be introduced
in the next formal issue (3.2.0?).

Regards,

Denis Excoffier.

On Mon, Oct 08, 2012 at 05:46:33PM +0200, Alberto Massari wrote:
>> Thanks for reporting this, it is fixed in SVN now.
>> 
>> Alberto
>> 
>> Il 08/10/2012 17:19, Denis Excoffier ha scritto:
>> >Hi,
>> >
>> >If you want consistent error messages, not like
>> >"invalid character 0x1e" and
>> >"invalid character 0x1F", you will want to apply the patch included
>> >(either Xerces-C-3.1.1 or trunk)
>> >
>> >Regards,
>> >
>> >Denis Excoffier.
>> 
>> 
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: [email protected]
>> For additional commands, e-mail: [email protected]
>> 
>> 
>> 
diff -uNr xerces-c-3.1.1o/src/xercesc/NLS/EN_US/XMLErrList_EN_US.Xml 
xerces-c-3.1.1p/src/xercesc/NLS/EN_US/XMLErrList_EN_US.Xml
--- xerces-c-3.1.1o/src/xercesc/NLS/EN_US/XMLErrList_EN_US.Xml  2010-04-11 
15:04:35.000000000 +0159
+++ xerces-c-3.1.1p/src/xercesc/NLS/EN_US/XMLErrList_EN_US.Xml  2012-10-09 
16:35:10.554189500 +0159
@@ -343,6 +343,7 @@
             <Message Id="XIncludeDisallowedChild" Text="element '{0}' is not 
allowed as a child of include element"/>
             <Message Id="XIncludeConflictingNotation" Text="included notation 
'{0}' conflicts with notation already defined"/>
             <Message Id="XIncludeConflictingEntity" Text="included entity 
'{0}' conflicts with entity already defined"/>
+            <Message Id="InvalidSystemIdChar" Text="invalid character 0x{0} in 
system id"/>
         </FatalError>
     </MsgDomain>
     <MsgDomain Domain="http://apache.org/xml/messages/XMLValidity";>
diff -uNr xerces-c-3.1.1o/src/xercesc/framework/XMLErrorCodes.hpp 
xerces-c-3.1.1p/src/xercesc/framework/XMLErrorCodes.hpp
--- xerces-c-3.1.1o/src/xercesc/framework/XMLErrorCodes.hpp     2009-08-10 
15:33:24.000000000 +0159
+++ xerces-c-3.1.1p/src/xercesc/framework/XMLErrorCodes.hpp     2012-10-09 
16:37:58.427040700 +0159
@@ -301,7 +301,8 @@
       , XIncludeDisallowedChild            = 284
       , XIncludeConflictingNotation        = 285
       , XIncludeConflictingEntity          = 286
-      , F_HighBounds                       = 287
+      , InvalidSystemIdChar                = 287
+      , F_HighBounds                       = 288
     };
 
     static bool isFatal(const XMLErrs::Codes toCheck)
diff -uNr xerces-c-3.1.1o/src/xercesc/util/MsgLoaders/ICU/resources/root.txt 
xerces-c-3.1.1p/src/xercesc/util/MsgLoaders/ICU/resources/root.txt
--- xerces-c-3.1.1o/src/xercesc/util/MsgLoaders/ICU/resources/root.txt  
2009-11-18 12:28:05.000000000 +0059
+++ xerces-c-3.1.1p/src/xercesc/util/MsgLoaders/ICU/resources/root.txt  
2012-10-09 16:35:24.507135900 +0159
@@ -288,6 +288,7 @@
                "element '{0}' is not allowed as a child of include element" ,
                "included notation '{0}' conflicts with notation already 
defined" ,
                "included entity '{0}' conflicts with entity already defined" ,
+               "invalid character 0x{0} in system id" ,
                "F_ End " ,
                } 
 
diff -uNr 
xerces-c-3.1.1o/src/xercesc/util/MsgLoaders/InMemory/XercesMessages_en_US.hpp 
xerces-c-3.1.1p/src/xercesc/util/MsgLoaders/InMemory/XercesMessages_en_US.hpp
--- 
xerces-c-3.1.1o/src/xercesc/util/MsgLoaders/InMemory/XercesMessages_en_US.hpp   
    2009-11-18 12:28:05.000000000 +0059
+++ 
xerces-c-3.1.1p/src/xercesc/util/MsgLoaders/InMemory/XercesMessages_en_US.hpp   
    2012-10-09 17:44:26.172871900 +0159
@@ -575,6 +575,8 @@
       
0x0069,0x0074,0x0068,0x0020,0x006E,0x006F,0x0074,0x0061,0x0074,0x0069,0x006F,0x006E,0x0020,0x0061,0x006C,0x0072,0x0065,0x0061,0x0064,0x0079,0x0020,0x0064,0x0065,0x0066,0x0069,0x006E,0x0065,0x0064,0x00
 }
   , { 
0x0069,0x006E,0x0063,0x006C,0x0075,0x0064,0x0065,0x0064,0x0020,0x0065,0x006E,0x0074,0x0069,0x0074,0x0079,0x0020,0x0027,0x007B,0x0030,0x007D,0x0027,0x0020,0x0063,0x006F,0x006E,0x0066,0x006C,0x0069,0x0063,0x0074,0x0073,0x0020,0x0077,0x0069,0x0074,
       
0x0068,0x0020,0x0065,0x006E,0x0074,0x0069,0x0074,0x0079,0x0020,0x0061,0x006C,0x0072,0x0065,0x0061,0x0064,0x0079,0x0020,0x0064,0x0065,0x0066,0x0069,0x006E,0x0065,0x0064,0x00
 }
+  , { 
0x0069,0x006E,0x0076,0x0061,0x006C,0x0069,0x0064,0x0020,0x0063,0x0068,0x0061,0x0072,0x0061,0x0063,0x0074,0x0065,0x0072,0x0020,0x0030,0x0078,0x007B,0x0030,0x007D,0x0020,0x0069,0x006E,0x0020,0x0073,0x0079,0x0073,0x0074,0x0065,0x006D,0x0020,0x0069,
+      0x0064,0x00 }
   , { 0x0046,0x005F,0x0045,0x006E,0x0064,0x00 }
 
 };
diff -uNr 
xerces-c-3.1.1o/src/xercesc/util/MsgLoaders/MsgCatalog/XercesMessages_en_US.Msg 
xerces-c-3.1.1p/src/xercesc/util/MsgLoaders/MsgCatalog/XercesMessages_en_US.Msg
--- 
xerces-c-3.1.1o/src/xercesc/util/MsgLoaders/MsgCatalog/XercesMessages_en_US.Msg 
    2009-11-18 12:28:05.000000000 +0059
+++ 
xerces-c-3.1.1p/src/xercesc/util/MsgLoaders/MsgCatalog/XercesMessages_en_US.Msg 
    2012-10-09 16:35:47.788087900 +0159
@@ -281,6 +281,7 @@
 284  element '{0}' is not allowed as a child of include element
 285  included notation '{0}' conflicts with notation already defined
 286  included entity '{0}' conflicts with entity already defined
+287  invalid character 0x{0} in system id
 
 
 $set 2
diff -uNr xerces-c-3.1.1o/src/xercesc/util/MsgLoaders/Win32/Version.rc 
xerces-c-3.1.1p/src/xercesc/util/MsgLoaders/Win32/Version.rc
--- xerces-c-3.1.1o/src/xercesc/util/MsgLoaders/Win32/Version.rc        
2010-04-18 17:02:12.000000000 +0159
+++ xerces-c-3.1.1p/src/xercesc/util/MsgLoaders/Win32/Version.rc        
2012-10-09 16:36:11.819030300 +0159
@@ -386,6 +386,7 @@
     284               L"element '{0}' is not allowed as a child of include 
element"
     285               L"included notation '{0}' conflicts with notation 
already defined"
     286               L"included entity '{0}' conflicts with entity already 
defined"
+    287               L"invalid character 0x{0} in system id"
 END
 STRINGTABLE DISCARDABLE
 BEGIN
diff -uNr xerces-c-3.1.1o/src/xercesc/validators/DTD/DTDScanner.cpp 
xerces-c-3.1.1p/src/xercesc/validators/DTD/DTDScanner.cpp
--- xerces-c-3.1.1o/src/xercesc/validators/DTD/DTDScanner.cpp   2009-11-05 
14:21:03.000000000 +0059
+++ xerces-c-3.1.1p/src/xercesc/validators/DTD/DTDScanner.cpp   2012-10-09 
16:39:25.972795100 +0159
@@ -3757,6 +3757,18 @@
         // Watch for EOF
         if (!nextCh)
             ThrowXMLwithMemMgr(UnexpectedEOFException, 
XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
+        if (!nextCh) {
+          XMLCh tmpBuf[9];
+          XMLString::binToText
+          (
+              nextCh
+              , tmpBuf
+              , 8
+              , 16
+              , fMemoryManager
+          );
+          fScanner->emitError(XMLErrs::InvalidSystemIdChar, tmpBuf);
+        };
         toFill.append(nextCh);
     }
     return true;

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to