Author: vgritsenko Date: Thu Nov 11 17:54:49 2004 New Revision: 57483 Added: cocoon/branches/BRANCH_2_1_X/lib/optional/nekohtml-0.9.3.jar - copied unchanged from rev 57469, cocoon/trunk/src/blocks/html/lib/nekohtml-0.9.3.jar cocoon/branches/BRANCH_2_1_X/src/blocks/html/WEB-INF/ - copied from rev 57469, cocoon/trunk/src/blocks/html/WEB-INF/ cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html.xmap - copied, changed from rev 57468, cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/tidy.xmap cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java - copied, changed from rev 57469, cocoon/trunk/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java Removed: cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/tidy.xmap Modified: cocoon/branches/BRANCH_2_1_X/gump.xml cocoon/branches/BRANCH_2_1_X/lib/jars.xml cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/HTMLGenerator.java cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/HTMLTransformer.java cocoon/branches/BRANCH_2_1_X/status.xml Log: merge html block (trunk -> branch 2.1)
Modified: cocoon/branches/BRANCH_2_1_X/gump.xml ============================================================================== --- cocoon/branches/BRANCH_2_1_X/gump.xml (original) +++ cocoon/branches/BRANCH_2_1_X/gump.xml Thu Nov 11 17:54:49 2004 @@ -528,9 +528,10 @@ <depend project="cocoon" inherit="all"/> <depend project="jtidy"/> - <depend project="jakarta-servletapi"/> + <depend project="nekohtml"/> <library name="jtidy"/> + <library name="nekohtml"/> <work nested="tools/anttasks"/> <home nested="build/cocoon-@@DATE@@"/> Modified: cocoon/branches/BRANCH_2_1_X/lib/jars.xml ============================================================================== --- cocoon/branches/BRANCH_2_1_X/lib/jars.xml (original) +++ cocoon/branches/BRANCH_2_1_X/lib/jars.xml Thu Nov 11 17:54:49 2004 @@ -538,6 +538,14 @@ </file> <file> + <title>Transform HTML to XML</title> + <description>NekoHTML is a lightweight HTML syntax correcter written using Xerces Native Interface.</description> + <used-by>NekoHTML generator (html block)</used-by> + <lib>optional/nekohtml-0.9.3.jar</lib> + <homepage>http://www.apache.org/~andyc/neko/</homepage> + </file> + + <file> <title>Search engine</title> <description> jakarta-lucene is a search engine toolkit designed for indexing and Copied: cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html.xmap (from rev 57468, cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/tidy.xmap) ============================================================================== --- cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/tidy.xmap (original) +++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html.xmap Thu Nov 11 17:54:49 2004 @@ -18,7 +18,20 @@ <xmap xpath="/sitemap/components/generators" unless="[EMAIL PROTECTED]'html']"> - <map:generator name="html" logger="sitemap.generator.html" + <map:generator name="html" + logger="sitemap.generator.html" src="org.apache.cocoon.generation.HTMLGenerator" - label="content"/> + label="content"> + <!-- Tidy configuration file. + <jtidy-config>context://WEB-INF/tidy.properties</jtidy-config> + --> + </map:generator> + <map:generator name="nekohtml" + logger="sitemap.generator.html" + src="org.apache.cocoon.generation.NekoHTMLGenerator" + label="content"> + <!-- Tidy configuration file. + <neko-config>???</neko-config> + --> + </map:generator> </xmap> Modified: cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/HTMLGenerator.java ============================================================================== --- cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/HTMLGenerator.java (original) +++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/HTMLGenerator.java Thu Nov 11 17:54:49 2004 @@ -1,12 +1,12 @@ /* * Copyright 1999-2004 The Apache Software Foundation. - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -34,7 +34,6 @@ import org.apache.avalon.framework.service.ServiceException; import org.apache.avalon.framework.service.ServiceManager; import org.apache.cocoon.ProcessingException; -import org.apache.cocoon.ResourceNotFoundException; import org.apache.cocoon.caching.CacheableProcessingComponent; import org.apache.cocoon.components.source.SourceUtil; import org.apache.cocoon.environment.ObjectModelHelper; @@ -56,13 +55,13 @@ * @cocoon.sitemap.component.documentation * The html generator reads HTML from a source, converts it to XHTML * and generates SAX Events. - * + * * @cocoon.sitemap.component.name html * @cocoon.sitemap.component.label content * @cocoon.sitemap.component.logger sitemap.generator.html * @cocoon.sitemap.component.documentation.caching * Uses the last modification date of the xml document for validation - * + * * @cocoon.sitemap.component.pooling.min 4 * @cocoon.sitemap.component.pooling.max 32 * @cocoon.sitemap.component.pooling.grow 4 @@ -72,7 +71,7 @@ * @author <a href="mailto:[EMAIL PROTECTED]">Nicola Ken Barozzi</a> * @author <a href="mailto:[EMAIL PROTECTED]">Gianugo Rabellino</a> * - * @version CVS $Id: HTMLGenerator.java,v 1.12 2004/05/03 13:07:26 cziegeler Exp $ + * @version CVS $Id$ */ public class HTMLGenerator extends ServiceableGenerator implements Configurable, CacheableProcessingComponent, Disposable { @@ -153,7 +152,7 @@ super.setup(resolver, objectModel, src, par); Request request = ObjectModelHelper.getRequest(objectModel); - + if (src == null) { // Handle this request as the StreamGenerator does (from the POST // request or from a request parameter), but try to make sure @@ -199,8 +198,9 @@ } xpath = request.getParameter("xpath"); - if(xpath == null) + if (xpath == null) { xpath = par.getParameter("xpath",null); + } // append the request parameter to the URL if necessary if (par.getParameterAsBoolean("copy-parameters", false) @@ -212,8 +212,9 @@ } try { - if (source != null) + if (source != null) { this.inputSource = resolver.resolveURI(super.source); + } } catch (SourceException se) { throw SourceUtil.handle("Unable to resolve " + super.source, se); } @@ -228,8 +229,9 @@ * is currently not cacheable. */ public java.io.Serializable getKey() { - if (this.inputSource == null) + if (this.inputSource == null) { return null; + } if (this.xpath != null) { StringBuffer buffer = new StringBuffer(this.inputSource.getURI()); @@ -249,8 +251,9 @@ * component is currently not cacheable. */ public SourceValidity getValidity() { - if (this.inputSource == null) + if (this.inputSource == null) { return null; + } return this.inputSource.getValidity(); } @@ -313,13 +316,8 @@ domStreamer.stream(doc.getDocumentElement()); } this.contentHandler.endDocument(); - } catch (IOException e){ - throw new ResourceNotFoundException("Could not get resource " - + this.inputSource.getURI(), e); } catch (SAXException e){ - throw e; - } catch (Exception e){ - throw new ProcessingException("Exception in HTMLGenerator.generate()",e); + SourceUtil.handleSAXException(this.inputSource.getURI(), e); } } Copied: cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java (from rev 57469, cocoon/trunk/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java) ============================================================================== --- cocoon/trunk/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java (original) +++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java Thu Nov 11 17:54:49 2004 @@ -73,7 +73,7 @@ * @author <a href="mailto:[EMAIL PROTECTED]">Nicola Ken Barozzi</a> * @author <a href="mailto:[EMAIL PROTECTED]">Gianugo Rabellino</a> * - * @version CVS $Id: NekoHTMLGenerator.java,v 1.2 2004/07/08 12:04:08 upayavira Exp $ + * @version CVS $Id$ */ public class NekoHTMLGenerator extends ServiceableGenerator implements Configurable, CacheableProcessingComponent, Disposable { Modified: cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/HTMLTransformer.java ============================================================================== --- cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/HTMLTransformer.java (original) +++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/HTMLTransformer.java Thu Nov 11 17:54:49 2004 @@ -40,23 +40,26 @@ import org.xml.sax.SAXException; /** - * Unstable transformer: converts (escaped) HTML snippets into JTidied HTML. + * Converts (escaped) HTML snippets into JTidied HTML. * This transformer expects a list of elements, passed as comma separated * values of the "tags" parameter. It records the text enclosed in such * elements and pass it thru JTidy to obtain valid XHTML. - * TODO: add namespace support. - * WARNING: this transformer should be considered unstable. + * + * <p>TODO: Add namespace support. + * <p><strong>WARNING:</strong> This transformer should be considered unstable. * * @author <a href="mailto:[EMAIL PROTECTED]">Daniele Madama</a> * @author <a href="mailto:[EMAIL PROTECTED]">Gianugo Rabellino</a> + * + * @version CVS $Id$ */ public class HTMLTransformer extends AbstractSAXTransformer implements Configurable { - /** - * Properties for Tidy format - */ + /** + * Properties for Tidy format + */ private Properties properties; /** @@ -104,7 +107,6 @@ /** * Configure this transformer, possibly passing to it * a jtidy configuration file location. - * */ public void configure(Configuration config) throws ConfigurationException { String configUrl = config.getChild("jtidy-config").getValue(null); @@ -209,5 +211,4 @@ this.tags.put(tok, tok); } } - } Modified: cocoon/branches/BRANCH_2_1_X/status.xml ============================================================================== --- cocoon/branches/BRANCH_2_1_X/status.xml (original) +++ cocoon/branches/BRANCH_2_1_X/status.xml Thu Nov 11 17:54:49 2004 @@ -452,7 +452,7 @@ </action> <action dev="AG" type="update"> Update ant to 1.6.2, commons-jxpath to 1.2, commons-beanutils to 1.7, - POI to 2.5.1-final-20040804, commons-httpclient to 2.0.1, + POI to 2.5.1-final-20040804, commons-httpclient to 2.0.1, nekohtml to 0.9.3, hsqldb to 1.7.2, jcs to 1.1-dev-20040811, quartz to 1.4.2, asm to 1.4.3, asm-util to 1.4.3 and groovy to 1.0-beta6 </action> @@ -483,7 +483,12 @@ Still in the scratchpad area at the time of this writing, added a CachedSource proxy subclass for Sources that implement TraversableSource and InspectableSource (for instance WebDAVSource). - </action> + </action> + <action dev="UV" type="add"> + Added a NekoHTMLGenerator to HTML block. This is a simpler HTML parser than + JTidy, which preserves more of the original HTML, primarily just balancing + closing tags. + </action> <action dev="TC" type="add" fixes-bug="29935" due-to="Leszek Gawron" due-to-email="[EMAIL PROTECTED]"> Added support for stripping root elements in the CIncludeTransformer. </action>