cziegeler 02/02/11 01:51:19 Modified: src/java/org/apache/cocoon/components/source URLSource.java Added: lib/core avalon-scratchpad-20020205.jar Log: Added excalibur scratchpad for start of reintegration components Revision Changes Path 1.1 xml-cocoon2/lib/core/avalon-scratchpad-20020205.jar <<Binary file>> 1.5 +165 -105 xml-cocoon2/src/java/org/apache/cocoon/components/source/URLSource.java Index: URLSource.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/source/URLSource.java,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- URLSource.java 4 Feb 2002 12:31:09 -0000 1.4 +++ URLSource.java 11 Feb 2002 09:51:19 -0000 1.5 @@ -55,12 +55,16 @@ package org.apache.cocoon.components.source; +import org.apache.avalon.excalibur.source.SourceParameters; +import org.apache.avalon.excalibur.source.SourceUtil; import org.apache.avalon.framework.component.ComponentManager; import org.apache.cocoon.ProcessingException; import org.apache.cocoon.ResourceNotFoundException; import org.apache.cocoon.components.parser.Parser; import org.apache.cocoon.environment.ModifiableSource; +import org.apache.cocoon.util.ClassUtils; import org.apache.cocoon.xml.XMLConsumer; +import org.w3c.tidy.Tidy; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; @@ -71,19 +75,52 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.io.StringWriter; import java.lang.reflect.Method; -import java.net.URL; -import java.net.URLConnection; +import java.net.*; +import java.util.Iterator; +import java.util.Properties; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; /** * Description of a source which is described by an URL. * * @author <a href="mailto:[EMAIL PROTECTED]">Carsten Ziegeler</a> - * @version CVS $Id: URLSource.java,v 1.4 2002/02/04 12:31:09 cziegeler Exp $ + * @version CVS $Id: URLSource.java,v 1.5 2002/02/11 09:51:19 cziegeler Exp $ */ public class URLSource implements ModifiableSource { + /** Is JTidy available? */ + private static boolean jtidyAvailable; + + /** Properties used for converting HTML to XML */ + private static Properties xmlProperties; + + /** The TrAX factory for serializing xml */ + public static TransformerFactory transformerFactory = TransformerFactory.newInstance(); + + /** + * Test if JTidy is available + */ + static { + jtidyAvailable = false; + try { + Class jtidy = ClassUtils.loadClass("org.w3c.tidy.Tidy"); + if ( null != jtidy ) jtidyAvailable = true; + } catch (ClassNotFoundException cnfe) { + // ignore + } + xmlProperties = new Properties(); + xmlProperties.put(OutputKeys.METHOD, "xml"); + xmlProperties.put(OutputKeys.OMIT_XML_DECLARATION, "no"); + } + + /** Identifier for file urls */ private final String FILE = "file:"; @@ -93,6 +130,9 @@ /** The content length */ private long contentLength; + /** Is the content html or xml? */ + private boolean isHTMLContent = false; + /** The system id */ private String systemId; @@ -111,6 +151,12 @@ /** The ComponentManager needed for streaming */ private ComponentManager manager; + /** The <code>SourceParameters</code> for post */ + private SourceParameters postParameters; + + /** Follow Redirects ? */ + private boolean followRedirects = true; + /** * Construct a new object */ @@ -119,6 +165,11 @@ this.manager = manager; this.systemId = url.toExternalForm(); this.isFile = systemId.startsWith(FILE); + if (this.isFile == true) { + if (systemId.endsWith(".htm") || systemId.endsWith(".html")) { + this.isHTMLContent = true; + } + } this.url = url; this.gotInfos = false; } @@ -134,19 +185,25 @@ this.lastModificationDate = file.lastModified(); this.contentLength = file.length(); } else { - try { - if (this.connection == null) { - this.connection = this.url.openConnection(); - String userInfo = this.getUserInfo(); - if (this.url.getProtocol().startsWith("http") && userInfo != null) { - this.connection.setRequestProperty("Authorization","Basic "+this.encodeBASE64(userInfo)); + if (this.postParameters == null) { + try { + if (this.connection == null) { + this.connection = this.url.openConnection(); + String userInfo = this.getUserInfo(); + if (this.url.getProtocol().startsWith("http") && userInfo != null) { + this.connection.setRequestProperty("Authorization","Basic "+SourceUtil.encodeBASE64(userInfo)); + } } + this.lastModificationDate = this.connection.getLastModified(); + this.contentLength = this.connection.getContentLength(); + } catch (IOException ignore) { + this.lastModificationDate = 0; + this.contentLength = -1; } - this.lastModificationDate = this.connection.getLastModified(); - this.contentLength = this.connection.getContentLength(); - } catch (IOException ignore) { - this.lastModificationDate = 0; - this.contentLength = -1; + } else { + // do not open connection when using post! + this.lastModificationDate = 0; + this.contentLength = -1; } } this.gotInfos = true; @@ -183,7 +240,7 @@ this.getInfos(); try{ InputStream input = null; - if (this.isFile) { + if ( this.isFile ) { input = new FileInputStream(this.systemId.substring(FILE.length())); } else { if (this.connection == null) { @@ -191,17 +248,65 @@ /* The following requires a jdk 1.3 */ String userInfo = this.getUserInfo(); if (this.url.getProtocol().startsWith("http") && userInfo != null) { - this.connection.setRequestProperty("Authorization","Basic "+encodeBASE64(userInfo)); - } + this.connection.setRequestProperty("Authorization","Basic "+SourceUtil.encodeBASE64(userInfo)); } + if (this.followRedirects == false && this.connection instanceof HttpURLConnection) { + ((HttpURLConnection)connection).setInstanceFollowRedirects(false); + } + // do a post operation + if (this.connection instanceof HttpURLConnection + && this.postParameters != null) { + StringBuffer buffer = new StringBuffer(2000); + String key; + Iterator i = postParameters.getParameterNames(); + Iterator values; + String value; + boolean first = true; + while ( i.hasNext() ) { + key = (String)i.next(); + values = this.postParameters.getParameterValues(key); + while (values.hasNext() == true) { + value = SourceUtil.encode((String)values.next()); + if (first == false) buffer.append('&'); + first = false; + buffer.append(key.toString()); + buffer.append('='); + buffer.append(value); + } + } + HttpURLConnection httpCon = (HttpURLConnection)connection; + httpCon.setDoInput(true); + if (buffer.length() > 1) { // only post if we have parameters + String postString = buffer.toString(); + httpCon.setRequestMethod("POST"); // this is POST + httpCon.setDoOutput(true); + httpCon.setRequestProperty("Content-type", "application/x-www-form-urlencoded"); + + // A content-length header must be contained in a POST request + httpCon.setRequestProperty("Content-length", Integer.toString(postString.length())); + java.io.OutputStream out = new java.io.BufferedOutputStream(httpCon.getOutputStream()); + out.write(postString.getBytes()); + out.close(); + } + if ("text/html".equals(httpCon.getContentType()) == true) { + this.isHTMLContent = true; + } + input = httpCon.getInputStream(); + this.connection = null; // make sure a new connection is created next time + return input; + } + } + if ("text/html".equals(this.connection.getContentType()) == true) { + this.isHTMLContent = true; + } input = this.connection.getInputStream(); this.connection = null; // make sure a new connection is created next time } return input; - }catch(FileNotFoundException e){ + } catch(FileNotFoundException e) { throw new ResourceNotFoundException("Resource not found " - + this.systemId); + + this.systemId, e); } } @@ -267,92 +372,34 @@ */ public InputSource getInputSource() throws IOException, ProcessingException { - InputSource newObject = new InputSource(this.getInputStream()); + InputStream stream = this.getInputStream(); + if ( this.isHTMLContent && jtidyAvailable ) { + try { + final Tidy xhtmlconvert = new Tidy(); + xhtmlconvert.setXmlOut(true); + xhtmlconvert.setXHTML(true); + xhtmlconvert.setShowWarnings(false); + final org.w3c.dom.Document doc = xhtmlconvert.parseDOM(stream, null); + final StringWriter writer = new StringWriter(); + final Transformer transformer; + transformer = transformerFactory.newTransformer(); + transformer.setOutputProperties(xmlProperties); + transformer.transform(new DOMSource(doc), new StreamResult(writer)); + final String xmlstring = writer.toString(); + InputSource newObject = new InputSource(new java.io.StringReader(xmlstring)); + newObject.setSystemId(this.systemId); + return newObject; + } catch (Exception ignore) { + // Let someone else worry about what we got . This is as before. + this.refresh(); + stream = this.getInputStream(); + } + } + InputSource newObject = new InputSource(stream); newObject.setSystemId(this.systemId); return newObject; } - public static final char [ ] alphabet = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // 0 to 7 - 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', // 8 to 15 - 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', // 16 to 23 - 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', // 24 to 31 - 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', // 32 to 39 - 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', // 40 to 47 - 'w', 'x', 'y', 'z', '0', '1', '2', '3', // 48 to 55 - '4', '5', '6', '7', '8', '9', '+', '/' }; // 56 to 63 - - /** - * BASE 64 encoding. - * See also RFC 1421 - * @since 1.2 - */ - public static String encodeBASE64 ( String s ) { - return encodeBASE64 ( s.getBytes ( ) ); - } - - /** - * BASE 64 encoding. - * See also RFC 1421 - * @since 1.2 - */ - public static String encodeBASE64 ( byte [ ] octetString ) { - int bits24; - int bits6; - - char [ ] out - = new char [ ( ( octetString.length - 1 ) / 3 + 1 ) * 4 ]; - - int outIndex = 0; - int i = 0; - - while ( ( i + 3 ) <= octetString.length ) { - // store the octets - bits24 = ( octetString [ i++ ] & 0xFF ) << 16; - bits24 |= ( octetString [ i++ ] & 0xFF ) << 8; - bits24 |= ( octetString [ i++ ] & 0xFF ) << 0; - - bits6 = ( bits24 & 0x00FC0000 ) >> 18; - out [ outIndex++ ] = alphabet [ bits6 ]; - bits6 = ( bits24 & 0x0003F000 ) >> 12; - out [ outIndex++ ] = alphabet [ bits6 ]; - bits6 = ( bits24 & 0x00000FC0 ) >> 6; - out [ outIndex++ ] = alphabet [ bits6 ]; - bits6 = ( bits24 & 0x0000003F ); - out [ outIndex++ ] = alphabet [ bits6 ]; - } - - if ( octetString.length - i == 2 ) { - // store the octets - bits24 = ( octetString [ i ] & 0xFF ) << 16; - bits24 |= ( octetString [ i + 1 ] & 0xFF ) << 8; - - bits6 = ( bits24 & 0x00FC0000 ) >> 18; - out [ outIndex++ ] = alphabet [ bits6 ]; - bits6 = ( bits24 & 0x0003F000 ) >> 12; - out [ outIndex++ ] = alphabet [ bits6 ]; - bits6 = ( bits24 & 0x00000FC0 ) >> 6; - out [ outIndex++ ] = alphabet [ bits6 ]; - - // padding - out [ outIndex++ ] = '='; - } else if ( octetString.length - i == 1 ) { - // store the octets - bits24 = ( octetString [ i ] & 0xFF ) << 16; - - bits6 = ( bits24 & 0x00FC0000 ) >> 18; - out [ outIndex++ ] = alphabet [ bits6 ]; - bits6 = ( bits24 & 0x0003F000 ) >> 12; - out [ outIndex++ ] = alphabet [ bits6 ]; - - // padding - out [ outIndex++ ] = '='; - out [ outIndex++ ] = '='; - } - - return new String ( out ); - } - /** * Stream content to a content handler or to an XMLConsumer. * @@ -361,8 +408,7 @@ * @throws SAXException if failed to parse source document. */ public void toSAX(ContentHandler handler) - throws SAXException, ProcessingException - { + throws SAXException, ProcessingException { Parser parser = null; try { parser = (Parser)this.manager.lookup(Parser.ROLE); @@ -390,7 +436,21 @@ } } - public void recycle() - { + public void recycle() { + } + + /** + * Set the post parameters + */ + public void setPostParameters(SourceParameters pars) { + this.postParameters = pars; + } + + /** + * Set the follow redirects flag + */ + public void setFollowRedirects(boolean flag) { + this.followRedirects = flag; } + }
---------------------------------------------------------------------- In case of troubles, e-mail: [EMAIL PROTECTED] To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]