Author: amassari
Date: Tue Jun 21 10:52:28 2011
New Revision: 1137953
URL: http://svn.apache.org/viewvc?rev=1137953&view=rev
Log:
The ReaderMgr now asks the stream if it knows which encoding has been used
(XERCESC-1967)
Modified:
xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp
xerces/c/trunk/src/xercesc/util/BinInputStream.cpp
xerces/c/trunk/src/xercesc/util/BinInputStream.hpp
xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp
xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp
Modified: xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp?rev=1137953&r1=1137952&r2=1137953&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp (original)
+++ xerces/c/trunk/src/xercesc/internal/ReaderMgr.cpp Tue Jun 21 10:52:28 2011
@@ -397,14 +397,17 @@ XMLReader* ReaderMgr::createReader( cons
// We need to handle this exception to avoid leak on newStream.
try {
- if (src.getEncoding())
+ const XMLCh* encoding = src.getEncoding();
+ if(encoding == 0)
+ encoding = newStream->getEncoding();
+ if (encoding)
{
retVal = new (fMemoryManager) XMLReader
(
src.getPublicId()
, src.getSystemId()
, newStream
- , src.getEncoding()
+ , encoding
, refFrom
, type
, source
Modified: xerces/c/trunk/src/xercesc/util/BinInputStream.cpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/BinInputStream.cpp?rev=1137953&r1=1137952&r2=1137953&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/BinInputStream.cpp (original)
+++ xerces/c/trunk/src/xercesc/util/BinInputStream.cpp Tue Jun 21 10:52:28 2011
@@ -42,4 +42,12 @@ BinInputStream::BinInputStream()
{
}
+// ---------------------------------------------------------------------------
+// BinInputStream: Default implementations
+// ---------------------------------------------------------------------------
+const XMLCh* BinInputStream::getEncoding() const
+{
+ return 0;
+}
+
XERCES_CPP_NAMESPACE_END
Modified: xerces/c/trunk/src/xercesc/util/BinInputStream.hpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/BinInputStream.hpp?rev=1137953&r1=1137952&r2=1137953&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/BinInputStream.hpp (original)
+++ xerces/c/trunk/src/xercesc/util/BinInputStream.hpp Tue Jun 21 10:52:28 2011
@@ -66,6 +66,23 @@ public :
*/
virtual const XMLCh* getContentType() const = 0;
+ /**
+ * Return the "out-of-band" encoding for the data supplied by this
+ * input stream. If no such content type is provided for the data, 0 is
+ * returned. This function is expected to return the correct value at
+ * any time after the construction of the stream.
+ *
+ * An example of the stream that may return non-0 from this function is
+ * an HTTP stream with the value returned taken from the "Content-Type"
+ * HTTP header. Note also that if the encoding of the data is known
+ * to the application by some other means then the setEncoding function
+ * in the InputSource object should be used instead. The getEncoding
+ * function should only be used to return information that is intrinsic
+ * to the stream.
+ *
+ * @return The name of the encoding, or 0 if one is not available.
+ */
+ virtual const XMLCh *getEncoding() const;
protected :
// -----------------------------------------------------------------------
Modified:
xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp?rev=1137953&r1=1137952&r2=1137953&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp
(original)
+++ xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.cpp
Tue Jun 21 10:52:28 2011
@@ -45,7 +45,9 @@ XERCES_CPP_NAMESPACE_BEGIN
BinHTTPInputStreamCommon::BinHTTPInputStreamCommon(MemoryManager *manager)
: fBytesProcessed(0)
, fBuffer(1023, manager)
+ , fBufferPos(0)
, fContentType(0)
+ , fEncoding(0)
, fMemoryManager(manager)
{
}
@@ -54,6 +56,7 @@ BinHTTPInputStreamCommon::BinHTTPInputSt
BinHTTPInputStreamCommon::~BinHTTPInputStreamCommon()
{
if(fContentType) fMemoryManager->deallocate(fContentType);
+ if(fEncoding) fMemoryManager->deallocate(fEncoding);
}
static const char *CRLF = "\r\n";
@@ -263,6 +266,69 @@ const XMLCh *BinHTTPInputStreamCommon::g
return fContentType;
}
+const XMLCh *BinHTTPInputStreamCommon::getEncoding() const
+{
+ if(fEncoding == 0) {
+ const XMLCh* contentTypeHeader = getContentType();
+ if(contentTypeHeader)
+ {
+ const XMLCh szCharsetEquals[] = {chLatin_c, chLatin_h,
chLatin_a, chLatin_r, chLatin_s, chLatin_e, chLatin_t, chEqual, chNull };
+
+ BaseRefVectorOf<XMLCh>*
tokens=XMLString::tokenizeString(contentTypeHeader, chSemiColon,
fMemoryManager);
+ for(XMLSize_t i=0;i<tokens->size();i++)
+ {
+ XMLString::removeWS(tokens->elementAt(i),
fMemoryManager);
+ if(XMLString::startsWithI(tokens->elementAt(i),
szCharsetEquals))
+ {
+ // mutable
+ const XMLCh*
encodingName=tokens->elementAt(i)+XMLString::stringLen(szCharsetEquals);
+
const_cast<BinHTTPInputStreamCommon*>(this)->fEncoding =
XMLString::replicate(encodingName, fMemoryManager);
+ break;
+ }
+ }
+ // if the encoding=value entry was not present, check
if we should use a default value
+ if(fEncoding==0 && tokens->size()>0)
+ {
+ const XMLCh szTextSlash[] = { chLatin_t,
chLatin_e, chLatin_x, chLatin_t, chForwardSlash, chNull };
+ const XMLCh szXml[] = {chLatin_x, chLatin_m,
chLatin_l, chNull };
+ const XMLCh szXmlDash[] = {chLatin_x,
chLatin_m, chLatin_l, chDash, chNull };
+
+ XMLBuffer
contentType(XMLString::stringLen(contentTypeHeader), fMemoryManager);
+ contentType.set(tokens->elementAt(0));
+
+ XMLCh* strType = contentType.getRawBuffer();
+ XMLString::removeWS(strType, fMemoryManager);
+ if(XMLString::startsWithI(strType, szTextSlash))
+ {
+ // text/* has a default encoding of
iso-8859-1
+
+ // text/xml,
text/xml-external-parsed-entity, or a subtype like text/AnythingAtAll+xml
+ // has a default encoding of us-ascii
+ XMLCh* subType =
strType+XMLString::stringLen(szTextSlash);
+ XMLCh* cursor=subType;
+ int plusPos;
+ do
+ {
+
plusPos=XMLString::indexOf(cursor, chPlus);
+ if(plusPos!=-1)
+ *(cursor+plusPos)=0;
+
if(XMLString::compareIStringASCII(cursor, szXml)==0 ||
XMLString::startsWithI(cursor, szXmlDash))
+ {
+
const_cast<BinHTTPInputStreamCommon*>(this)->fEncoding =
XMLString::replicate(XMLUni::fgUSASCIIEncodingString, fMemoryManager);
+ break;
+ }
+ cursor+=plusPos+1;
+ } while(plusPos==-1);
+ if(fEncoding==0)
+
const_cast<BinHTTPInputStreamCommon*>(this)->fEncoding =
XMLString::replicate(XMLUni::fgISO88591EncodingString, fMemoryManager);
+ }
+ }
+ delete tokens;
+ }
+ }
+ return fEncoding;
+}
+
XMLSize_t BinHTTPInputStreamCommon::readBytes(XMLByte* const toFill,
const XMLSize_t maxToRead)
{
Modified:
xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp?rev=1137953&r1=1137952&r2=1137953&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp
(original)
+++ xerces/c/trunk/src/xercesc/util/NetAccessors/BinHTTPInputStreamCommon.hpp
Tue Jun 21 10:52:28 2011
@@ -169,6 +169,7 @@ public :
);
virtual const XMLCh *getContentType() const;
+ virtual const XMLCh *getEncoding() const;
protected :
BinHTTPInputStreamCommon(MemoryManager *manager);
@@ -208,12 +209,17 @@ private :
// fBufferPos
// Pointers into fBuffer, showing start and end+1 of content
// that readBytes must return.
+ // fContentType
+ // Holds the HTTP header for the Content-Type setting
+ // fEncoding
+ // Holds the encoding of this stream, extracted from the
Content-Type setting
// -----------------------------------------------------------------------
XMLSize_t fBytesProcessed;
CharBuffer fBuffer;
char * fBufferPos;
XMLCh * fContentType;
+ XMLCh * fEncoding;
MemoryManager* fMemoryManager;
};
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]