cziegeler 01/10/23 05:04:35 Modified: src/org/apache/cocoon/generation StreamGenerator.java src/org/apache/cocoon/util PostInputStream.java Log: Patch for Bug 4124 StreamGenerator does not preserve XML encoding PI Submitted by: kingadziembowska [[EMAIL PROTECTED]]Reviewed by: Revision Changes Path 1.8 +99 -19 xml-cocoon2/src/org/apache/cocoon/generation/StreamGenerator.java Index: StreamGenerator.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/org/apache/cocoon/generation/StreamGenerator.java,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- StreamGenerator.java 2001/10/11 07:28:21 1.7 +++ StreamGenerator.java 2001/10/23 12:04:35 1.8 @@ -13,8 +13,9 @@ import org.apache.cocoon.ProcessingException; import org.apache.cocoon.ResourceNotFoundException; import org.apache.cocoon.components.parser.Parser; -import org.apache.cocoon.environment.Request; +import org.apache.avalon.excalibur.pool.Poolable; import org.apache.cocoon.environment.http.HttpEnvironment; +import javax.servlet.http.HttpServletRequest; import org.apache.cocoon.util.PostInputStream; import org.xml.sax.InputSource; import org.xml.sax.SAXException; @@ -43,9 +44,10 @@ * number of bytes read is equal to the getContentLength() value. * * @author <a href="mailto:[EMAIL PROTECTED]">Kinga Dziembowski</a> - * @version $Revision: 1.7 $ $Date: 2001/10/11 07:28:21 $ + * @version $Revision: 1.8 $ $Date: 2001/10/23 12:04:35 $ */ -public class StreamGenerator extends ComposerGenerator { +public class StreamGenerator extends ComposerGenerator implements Poolable +{ public static final String CLASS = StreamGenerator.class.getName(); /** The parameter holding the name associated with the xml data **/ @@ -70,31 +72,27 @@ /** * Generate XML data out of request InputStream. */ - public void generate() throws IOException, SAXException, ProcessingException { + public void generate() throws IOException, SAXException, ProcessingException + { Parser parser = null; String parameter = parameters.getParameter(StreamGenerator.FORM_NAME, null); int len = 0; - - try { - Request request = (Request)objectModel.get(Constants.REQUEST_OBJECT); - if (request.getContentType().equals("application/x-www-form-urlencoded")) { + String contentType = null; + try + { + HttpServletRequest request = (HttpServletRequest) objectModel.get(HttpEnvironment.HTTP_REQUEST_OBJECT); + contentType = request.getContentType(); + if (contentType.startsWith("application/x-www-form-urlencoded")) { String sXml = request.getParameter(parameter); inputSource = new InputSource(new StringReader(sXml)); - } else if (request.getContentType().equals("text/plain") || - request.getContentType().equals("text/xml") || - request.getContentType().equals("application/xml")) { + } else if (contentType.startsWith("text/plain") || + contentType.startsWith("text/xml") || + contentType.startsWith("application/xml")) { len = request.getContentLength(); if (len > 0) { - // we have hopefully an http request here - javax.servlet.http.HttpServletRequest httpRequest = - (javax.servlet.http.HttpServletRequest)objectModel.get(HttpEnvironment.HTTP_REQUEST_OBJECT); - if (httpRequest != null) { - PostInputStream anStream = new PostInputStream(httpRequest.getInputStream(), len); + PostInputStream anStream = new PostInputStream(request.getInputStream(), len); inputSource = new InputSource(anStream); - } else { - throw new IOException("No http request object found"); - } } else { throw new IOException("getContentLen() == 0"); } @@ -105,6 +103,11 @@ if (getLogger().isDebugEnabled()) { getLogger().debug("processing stream ContentType= " + request.getContentType() + "ContentLen= " + len); } + String charset = getCharacterEncoding(request, contentType) ; + if( charset != null) + { + this.inputSource.setEncoding(charset); + } parser = (Parser)this.manager.lookup(Parser.ROLE); parser.setContentHandler(super.contentHandler); parser.setLexicalHandler(super.lexicalHandler); @@ -124,5 +127,82 @@ } } } + + /** + * Content type HTTP header can contains character encodinf info + * for ex. Content-Type: text/xml; charset=UTF-8 + * If the servlet is following spec 2.3 and higher the servlet API can be used to retrieve character encoding part of + * Content-Type header. Some containers can choose to not unpack charset info - the spec is not strong about it. + * in any case this method can be used as a latest resource to retrieve the passed charset value. + * <code>null</code> is returned. + * It is very common mistake to send : Content-Type: text/xml; charset="UTF-8". + * Some containers are not filtering this mistake and the processing results in exception.. + * The getCharacterEncoding() compensates for above mistake. + * + * @param contentType value associated with Content-Type HTTP header. + */ + public String getCharacterEncoding(HttpServletRequest req, String contentType) + { + String charencoding = null; + String charset = "charset="; + if (contentType == null) + { + return (null); + } + int idx = contentType.indexOf(charset); + if (idx == -1) + { + return (null); + } + try + { + charencoding = req.getCharacterEncoding(); + + if ( charencoding != null) + { + getLogger().debug("charset from container: " + charencoding); + charencoding = charencoding.trim(); + if ((charencoding.length() > 2) && (charencoding.startsWith("\""))&& (charencoding.endsWith("\""))) + { + charencoding = charencoding.substring(1, charencoding.length() - 1); + } + getLogger().debug("charset from container clean: " + charencoding); + return (charencoding); + } + else + { + + return extractCharset( contentType, idx ); + } + } + catch(Throwable e) + { + // We will be there if the container do not implement getCharacterEncoding() method + return extractCharset( contentType, idx ); + } + } + + + protected String extractCharset(String contentType, int idx) + { + String charencoding = null; + String charset = "charset="; + + getLogger().debug("charset from extractCharset"); + charencoding = contentType.substring(idx + charset.length()); + int idxEnd = charencoding.indexOf(";"); + if (idxEnd != -1) + { + charencoding = charencoding.substring(0, idxEnd); + } + charencoding = charencoding.trim(); + if ((charencoding.length() > 2) && (charencoding.startsWith("\""))&& (charencoding.endsWith("\""))) + { + charencoding = charencoding.substring(1, charencoding.length() - 1); + } + getLogger().debug("charset from extractCharset: " + charencoding); + return (charencoding.trim()); + + } } 1.6 +13 -4 xml-cocoon2/src/org/apache/cocoon/util/PostInputStream.java Index: PostInputStream.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/org/apache/cocoon/util/PostInputStream.java,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- PostInputStream.java 2001/10/11 07:28:25 1.5 +++ PostInputStream.java 2001/10/23 12:04:35 1.6 @@ -14,7 +14,7 @@ * It allows to control read operation, restricting the number of bytes read to the value returned by getContentLen() method. * * @author <a href="mailto:[EMAIL PROTECTED]">Kinga Dziembowski</a> - * @version $Id: PostInputStream.java,v 1.5 2001/10/11 07:28:25 cziegeler Exp $ + * @version $Id: PostInputStream.java,v 1.6 2001/10/23 12:04:35 cziegeler Exp $ */ public class PostInputStream extends InputStream { @@ -151,7 +151,8 @@ if (m_bytesRead == m_contentLen) { return -1; } - int num = m_inputStream.read(buffer, offset, len); + int available = Math.min( available(), len ); + int num = m_inputStream.read( buffer, offset, available ); if (num > 0) { m_bytesRead += num; } @@ -185,8 +186,16 @@ * @exception IOException if an I/O error occurs. */ public synchronized long skip(long n) throws IOException { - checkOpen(); - return m_inputStream.skip(n); + checkOpen(); + if ( m_bytesRead == m_contentLen ) + { + return ( 0 ); + } + else + { + return ( m_inputStream.skip( n ) ); + } + } /**
---------------------------------------------------------------------- In case of troubles, e-mail: [EMAIL PROTECTED] To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]