[ 
https://issues.apache.org/jira/browse/XALANC-756?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Andrei Gheorghe updated XALANC-756:
-----------------------------------

    Description: 
Xalan-C++ throws SAXException for this XML:
{code}<?xml version="1.0" encoding="UTF-8"?><elem attr="&#xA;"/>{code}
This XML is valid according to all the validators I have tried.
This issue can be reproduced using the following test program.
I also supplied a patch to show where the problem originates and which seems to 
solve the issue.

{code:title=test.cpp|borderStyle=solid}
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/framework/MemBufInputSource.hpp>
#include <xercesc/parsers/XercesDOMParser.hpp>

#include <xalanc/XercesParserLiaison/XercesDocumentWrapper.hpp>
#include <xalanc/PlatformSupport/XalanStdOutputStream.hpp>
#include <xalanc/PlatformSupport/XalanOutputStreamPrintWriter.hpp>
#include <xalanc/XMLSupport/FormatterToXML.hpp>
#include <xalanc/XMLSupport/FormatterTreeWalker.hpp>

#include <sstream>
#include <iostream>

XERCES_CPP_NAMESPACE_USE
XALAN_CPP_NAMESPACE_USE

using namespace std;

int main()
{
        try 
        {
                XMLPlatformUtils::Initialize();

                string testXML = "<?xml version=\"1.0\" 
encoding=\"UTF-8\"?><elem attr=\"&#xA;\"/>";
                MemBufInputSource bufferMemSource( ( const XMLByte* 
)testXML.c_str(), testXML.size(), "testXML", false );
                XercesDOMParser parser;
                parser.parse(bufferMemSource);
                XercesDocumentWrapper docWrapper( 
*XMLPlatformUtils::fgMemoryManager, parser.getDocument(), true, true, true );

                XalanStdOutputStream stream( cout );
                XalanOutputStreamPrintWriter printWriter( stream );
                FormatterToXML formatter( printWriter );
                FormatterTreeWalker walker( formatter );

                formatter.setShouldWriteXMLHeader( false );
                formatter.startDocument();
                walker.traverseSubtree( &docWrapper ); //throws SAXException: 
'A' is an invalid Unicode scalar value.
                formatter.endDocument();
        }
        catch ( const exception& )
        {}

        return 0;
}
{code}

{code}
Index: src/xalanc/XMLSupport/FormatterToXML.cpp
===================================================================
--- src/xalanc/XMLSupport/FormatterToXML.cpp    (revision 1612825)
+++ src/xalanc/XMLSupport/FormatterToXML.cpp    (working copy)
@@ -910,7 +910,7 @@
             {
                 if(ch < 0x20 )
                 {
-                    if(m_isXML1_1)
+                    if(ch == 0x9 || ch == 0xA || ch == 0xD || m_isXML1_1)
                     {
                         writeNumberedEntityReference(ch);
                     }
{code}

  was:
Xalan-C++ throws SAXException for this XML: <?xml version="1.0" 
encoding="UTF-8"?><elem attr="&#xA;"/>
This XML is valid according to all the validators I have tried.
This issue can be reproduced using the following test program.
I also supplied a patch to show where the problem originates and which seems to 
solve the issue.

{code:title=test.cpp|borderStyle=solid}
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/framework/MemBufInputSource.hpp>
#include <xercesc/parsers/XercesDOMParser.hpp>

#include <xalanc/XercesParserLiaison/XercesDocumentWrapper.hpp>
#include <xalanc/PlatformSupport/XalanStdOutputStream.hpp>
#include <xalanc/PlatformSupport/XalanOutputStreamPrintWriter.hpp>
#include <xalanc/XMLSupport/FormatterToXML.hpp>
#include <xalanc/XMLSupport/FormatterTreeWalker.hpp>

#include <sstream>
#include <iostream>

XERCES_CPP_NAMESPACE_USE
XALAN_CPP_NAMESPACE_USE

using namespace std;

int main()
{
        try 
        {
                XMLPlatformUtils::Initialize();

                string testXML = "<?xml version=\"1.0\" 
encoding=\"UTF-8\"?><elem attr=\"&#xA;\"/>";
                MemBufInputSource bufferMemSource( ( const XMLByte* 
)testXML.c_str(), testXML.size(), "testXML", false );
                XercesDOMParser parser;
                parser.parse(bufferMemSource);
                XercesDocumentWrapper docWrapper( 
*XMLPlatformUtils::fgMemoryManager, parser.getDocument(), true, true, true );

                XalanStdOutputStream stream( cout );
                XalanOutputStreamPrintWriter printWriter( stream );
                FormatterToXML formatter( printWriter );
                FormatterTreeWalker walker( formatter );

                formatter.setShouldWriteXMLHeader( false );
                formatter.startDocument();
                walker.traverseSubtree( &docWrapper ); //throws SAXException: 
'A' is an invalid Unicode scalar value.
                formatter.endDocument();
        }
        catch ( const exception& )
        {}

        return 0;
}
{code}

{code}
Index: src/xalanc/XMLSupport/FormatterToXML.cpp
===================================================================
--- src/xalanc/XMLSupport/FormatterToXML.cpp    (revision 1612825)
+++ src/xalanc/XMLSupport/FormatterToXML.cpp    (working copy)
@@ -910,7 +910,7 @@
             {
                 if(ch < 0x20 )
                 {
-                    if(m_isXML1_1)
+                    if(ch == 0x9 || ch == 0xA || ch == 0xD || m_isXML1_1)
                     {
                         writeNumberedEntityReference(ch);
                     }
{code}


> FormatterToXML throws exception for valid XML
> ---------------------------------------------
>
>                 Key: XALANC-756
>                 URL: https://issues.apache.org/jira/browse/XALANC-756
>             Project: XalanC
>          Issue Type: Bug
>            Reporter: Andrei Gheorghe
>            Assignee: Steven J. Hathaway
>
> Xalan-C++ throws SAXException for this XML:
> {code}<?xml version="1.0" encoding="UTF-8"?><elem attr="&#xA;"/>{code}
> This XML is valid according to all the validators I have tried.
> This issue can be reproduced using the following test program.
> I also supplied a patch to show where the problem originates and which seems 
> to solve the issue.
> {code:title=test.cpp|borderStyle=solid}
> #include <xercesc/util/PlatformUtils.hpp>
> #include <xercesc/framework/MemBufInputSource.hpp>
> #include <xercesc/parsers/XercesDOMParser.hpp>
> #include <xalanc/XercesParserLiaison/XercesDocumentWrapper.hpp>
> #include <xalanc/PlatformSupport/XalanStdOutputStream.hpp>
> #include <xalanc/PlatformSupport/XalanOutputStreamPrintWriter.hpp>
> #include <xalanc/XMLSupport/FormatterToXML.hpp>
> #include <xalanc/XMLSupport/FormatterTreeWalker.hpp>
> #include <sstream>
> #include <iostream>
> XERCES_CPP_NAMESPACE_USE
> XALAN_CPP_NAMESPACE_USE
> using namespace std;
> int main()
> {
>       try 
>       {
>               XMLPlatformUtils::Initialize();
>               string testXML = "<?xml version=\"1.0\" 
> encoding=\"UTF-8\"?><elem attr=\"&#xA;\"/>";
>               MemBufInputSource bufferMemSource( ( const XMLByte* 
> )testXML.c_str(), testXML.size(), "testXML", false );
>               XercesDOMParser parser;
>               parser.parse(bufferMemSource);
>               XercesDocumentWrapper docWrapper( 
> *XMLPlatformUtils::fgMemoryManager, parser.getDocument(), true, true, true );
>               XalanStdOutputStream stream( cout );
>               XalanOutputStreamPrintWriter printWriter( stream );
>               FormatterToXML formatter( printWriter );
>               FormatterTreeWalker walker( formatter );
>               formatter.setShouldWriteXMLHeader( false );
>               formatter.startDocument();
>               walker.traverseSubtree( &docWrapper ); //throws SAXException: 
> 'A' is an invalid Unicode scalar value.
>               formatter.endDocument();
>       }
>       catch ( const exception& )
>       {}
>       return 0;
> }
> {code}
> {code}
> Index: src/xalanc/XMLSupport/FormatterToXML.cpp
> ===================================================================
> --- src/xalanc/XMLSupport/FormatterToXML.cpp  (revision 1612825)
> +++ src/xalanc/XMLSupport/FormatterToXML.cpp  (working copy)
> @@ -910,7 +910,7 @@
>              {
>                  if(ch < 0x20 )
>                  {
> -                    if(m_isXML1_1)
> +                    if(ch == 0x9 || ch == 0xA || ch == 0xD || m_isXML1_1)
>                      {
>                          writeNumberedEntityReference(ch);
>                      }
> {code}



--
This message was sent by Atlassian JIRA
(v6.2#6252)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to