upayavira    2004/07/08 03:11:41

  Modified:    .        gump.xml status.xml
               lib      jars.xml
  Added:       legal    nekohtml.0.9.2.jar.license.txt
               src/blocks/html/conf html.xmap
               src/blocks/html/java/org/apache/cocoon/generation
                        NekoHTMLGenerator.java
               src/blocks/html/lib nekohtml-0.9.2.jar
  Removed:     src/blocks/html/conf tidy.xmap
  Log:
  Adding a NekoHTMLGenerator. I wondered whether to extend the existing 
HTMLGenerator to use Neko or JTidy, but decided on a new Generator. They could 
be merged if necessary.
  
  This is the first Jar I've committed. Can someone check I've done gump, 
licence, etc, correctly?
  
  Revision  Changes    Path
  1.169     +7 -2      cocoon-2.1/gump.xml
  
  Index: gump.xml
  ===================================================================
  RCS file: /home/cvs/cocoon-2.1/gump.xml,v
  retrieving revision 1.168
  retrieving revision 1.169
  diff -u -r1.168 -r1.169
  --- gump.xml  6 Jul 2004 15:31:38 -0000       1.168
  +++ gump.xml  8 Jul 2004 10:11:38 -0000       1.169
  @@ -470,7 +470,8 @@
   
       <depend project="cocoon" inherit="all"/>
       <depend project="jtidy"/>
  -
  +    <depend project="nekohtml"/>
  +    
       <work nested="tools/anttasks"/>
       <home nested="build/cocoon-@@DATE@@"/>
   
  @@ -1271,4 +1272,8 @@
       <jar name="lib/core/javacImpl-0.9.jar" id="impl"/>
     </project>
   
  +  <project name="nekohtml">
  +    <package>org.cyberneko.html</package>
  +    <jar name="src/blocks/html/lib/nekohtml-0.9.2.jar" id="nekohtml"/>
  +  </project>
   </module>
  
  
  
  1.389     +6 -1      cocoon-2.1/status.xml
  
  Index: status.xml
  ===================================================================
  RCS file: /home/cvs/cocoon-2.1/status.xml,v
  retrieving revision 1.388
  retrieving revision 1.389
  diff -u -r1.388 -r1.389
  --- status.xml        8 Jul 2004 07:22:34 -0000       1.388
  +++ status.xml        8 Jul 2004 10:11:38 -0000       1.389
  @@ -204,6 +204,11 @@
   
     <changes>
    <release version="@version@" date="@date@">
  +   <action dev="UV" type="add">
  +     Added a NekoHTMLGenerator to HTML block. This is a simpler HTML parser 
than
  +     JTidy, which preserves more of the original HTML, primarily just 
balancing
  +     closing tags.
  +   </action>
      <action dev="TC" type="add" fixes-bug="29935" due-to="Leszek Gawron" 
due-to-email="[EMAIL PROTECTED]">
       added support for stripping root elements in the CIncludeTransformer
      <action>
  
  
  
  1.238     +9 -1      cocoon-2.1/lib/jars.xml
  
  Index: jars.xml
  ===================================================================
  RCS file: /home/cvs/cocoon-2.1/lib/jars.xml,v
  retrieving revision 1.237
  retrieving revision 1.238
  diff -u -r1.237 -r1.238
  --- jars.xml  8 Jul 2004 09:17:31 -0000       1.237
  +++ jars.xml  8 Jul 2004 10:11:40 -0000       1.238
  @@ -571,6 +571,14 @@
     </file>
   
     <file>
  +    <title>Transform HTML to XML</title>
  +    <description>NekoHTML is a lightweight HTML syntax correcter written 
using Xerces Native Interface.</description>
  +    <used-by>NekoHTML generator (html block)</used-by>
  +    <lib>html/lib/nekohtml-0.9.2.jar</lib>
  +    <homepage>http://www.apache.org/~andyc/neko/</homepage>
  +  </file>
  +
  +  <file>
       <title>Search engine</title>
       <description>
         jakarta-lucene is a search engine toolkit designed for indexing and
  
  
  
  1.1                  cocoon-2.1/legal/nekohtml.0.9.2.jar.license.txt
  
  Index: nekohtml.0.9.2.jar.license.txt
  ===================================================================
  The CyberNeko Software License, Version 1.0
  
   
  (C) Copyright 2002,2003, Andy Clark.  All rights reserved.
   
  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions
  are met:
  
  1. Redistributions of source code must retain the above copyright
     notice, this list of conditions and the following disclaimer. 
  
  2. Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in
     the documentation and/or other materials provided with the
     distribution.
  
  3. The end-user documentation included with the redistribution,
     if any, must include the following acknowledgment:  
       "This product includes software developed by Andy Clark."
     Alternately, this acknowledgment may appear in the software itself,
     if and wherever such third-party acknowledgments normally appear.
  
  4. The names "CyberNeko" and "NekoHTML" must not be used to endorse
     or promote products derived from this software without prior 
     written permission. For written permission, please contact 
     [EMAIL PROTECTED]
  
  5. Products derived from this software may not be called "NekoHTML",
     nor may "NekoHTML" appear in their name, without prior written
     permission of the author.
  
  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS
  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 
  OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 
  OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 
  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 
  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 
  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
  ====================================================================
  
  This license is based on the Apache Software License, version 1.1.
  
  
  1.1                  cocoon-2.1/src/blocks/html/conf/html.xmap
  
  Index: html.xmap
  ===================================================================
  <?xml version="1.0"?>
  <!--
    Copyright 1999-2004 The Apache Software Foundation
  
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at
  
        http://www.apache.org/licenses/LICENSE-2.0
  
    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
  -->
  
  <xmap xpath="/sitemap/components/generators"
        unless="[EMAIL PROTECTED]'html']">
  
      <map:generator name="html"
                     src="org.apache.cocoon.generation.HTMLGenerator"
                     label="content"/>
      <map:generator name="nekohtml"
                     src="org.apache.cocoon.generation.NekoHTMLGenerator"
                     label="content">
        <!-- Tidy configuration file.
        <jtidy-config>context://WEB-INF/tidy.properties</jtidy-config>
        -->
      </map:generator>
  </xmap>
  
  
  
  1.1                  
cocoon-2.1/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java
  
  Index: NekoHTMLGenerator.java
  ===================================================================
  /*
   * Copyright 1999-2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  package org.apache.cocoon.generation;
  
  import java.io.ByteArrayInputStream;
  import java.io.IOException;
  import java.io.InputStream;
  import java.util.Iterator;
  import java.util.Map;
  import java.util.Properties;
  
  import javax.servlet.http.HttpServletRequest;
  
  import org.apache.avalon.framework.activity.Disposable;
  import org.apache.avalon.framework.configuration.Configurable;
  import org.apache.avalon.framework.configuration.Configuration;
  import org.apache.avalon.framework.configuration.ConfigurationException;
  import org.apache.avalon.framework.parameters.Parameters;
  import org.apache.avalon.framework.service.ServiceException;
  import org.apache.avalon.framework.service.ServiceManager;
  import org.apache.cocoon.ProcessingException;
  import org.apache.cocoon.ResourceNotFoundException;
  import org.apache.cocoon.caching.CacheableProcessingComponent;
  import org.apache.cocoon.components.source.SourceUtil;
  import org.apache.cocoon.environment.ObjectModelHelper;
  import org.apache.cocoon.environment.Request;
  import org.apache.cocoon.environment.SourceResolver;
  import org.apache.cocoon.environment.http.HttpEnvironment;
  import org.apache.cocoon.util.PostInputStream;
  import org.apache.cocoon.xml.dom.DOMBuilder;
  import org.apache.cocoon.xml.dom.DOMStreamer;
  import org.apache.excalibur.source.Source;
  import org.apache.excalibur.source.SourceException;
  import org.apache.excalibur.source.SourceValidity;
  import org.apache.excalibur.xml.xpath.XPathProcessor;
  import org.apache.xerces.parsers.AbstractSAXParser;
  import org.cyberneko.html.HTMLConfiguration;
  import org.w3c.dom.Document;
  import org.w3c.dom.NodeList;
  import org.xml.sax.InputSource;
  import org.xml.sax.SAXException;
  
  /**
   * @cocoon.sitemap.component.documentation
   * The neko html generator reads HTML from a source, converts it to XHTML
   * and generates SAX Events. It uses the NekoHTML library to do this.
   * 
   * @cocoon.sitemap.component.name   html
   * @cocoon.sitemap.component.label  content
   * @cocoon.sitemap.component.logger sitemap.generator.nekohtml
   * @cocoon.sitemap.component.documentation.caching
   *               Uses the last modification date of the xml document for 
validation
   * 
   * @cocoon.sitemap.component.pooling.min   4
   * @cocoon.sitemap.component.pooling.max  32
   * @cocoon.sitemap.component.pooling.grow  4
   *
   * @author <a href="mailto:[EMAIL PROTECTED]">Davanum Srinivas</a>
   * @author <a href="mailto:[EMAIL PROTECTED]">Carsten Ziegeler</a>
   * @author <a href="mailto:[EMAIL PROTECTED]">Nicola Ken Barozzi</a>
   * @author <a href="mailto:[EMAIL PROTECTED]">Gianugo Rabellino</a>
   *
   * @version CVS $Id: NekoHTMLGenerator.java,v 1.1 2004/07/08 10:11:41 
upayavira Exp $
   */
  public class NekoHTMLGenerator extends ServiceableGenerator
  implements Configurable, CacheableProcessingComponent, Disposable {
  
      /** The parameter that specifies what request attribute to use, if any */
      public static final String FORM_NAME = "form-name";
  
      /** The  source, if coming from a file */
      private Source inputSource;
  
      /** The source, if coming from the request */
      private InputStream requestStream;
  
      /** XPATH expression */
      private String xpath = null;
  
      /** XPath Processor */
      private XPathProcessor processor = null;
  
      /** JTidy properties */
      private Properties properties;
  
      public void service(ServiceManager manager)
      throws ServiceException {
          super.service( manager );
          this.processor = 
(XPathProcessor)this.manager.lookup(XPathProcessor.ROLE);
      }
  
      public void configure(Configuration config) throws ConfigurationException 
{
  
          String configUrl = config.getChild("neko-config").getValue(null);
  
          if(configUrl != null) {
              org.apache.excalibur.source.SourceResolver resolver = null;
              Source configSource = null;
              try {
                  resolver = 
(org.apache.excalibur.source.SourceResolver)this.manager.lookup(org.apache.excalibur.source.SourceResolver.ROLE);
                  configSource = resolver.resolveURI(configUrl);
                  if (getLogger().isDebugEnabled()) {
                      getLogger().debug("Loading configuration from " + 
configSource.getURI());
                  }
  
                  this.properties = new Properties();
                  this.properties.load(configSource.getInputStream());
  
              } catch (Exception e) {
                  getLogger().warn("Cannot load configuration from " + 
configUrl);
                  throw new ConfigurationException("Cannot load configuration 
from " + configUrl, e);
              } finally {
                  if ( null != resolver ) {
                      this.manager.release(resolver);
                      resolver.release(configSource);
                  }
              }
          }
      }
  
      /**
       * Recycle this component.
       * All instance variables are set to <code>null</code>.
       */
      public void recycle() {
          if (this.inputSource != null) {
              this.resolver.release( this.inputSource );
              this.inputSource = null;
              this.requestStream = null;
          }
          this.xpath = null;
          super.recycle();
      }
  
      /**
       * Setup the html generator.
       * Try to get the last modification date of the source for caching.
       */
      public void setup(SourceResolver resolver, Map objectModel, String src, 
Parameters par)
      throws ProcessingException, SAXException, IOException {
          super.setup(resolver, objectModel, src, par);
  
          Request request = ObjectModelHelper.getRequest(objectModel);
          
          if (src == null) {
              // Handle this request as the StreamGenerator does (from the POST
              // request or from a request parameter), but try to make sure
              // that the output will be well-formed
  
              String contentType = request.getContentType();
  
              if (contentType == null ) {
                  throw new IOException("Content-type was not specified for 
this request");
              } else if 
(contentType.startsWith("application/x-www-form-urlencoded") ||
                  contentType.startsWith("multipart/form-data")) {
                  String requested = parameters.getParameter(FORM_NAME, null);
                  if (requested == null) {
                      throw new ProcessingException(
                          "NekoHtmlGenerator with no \"src\" parameter expects 
a sitemap parameter called '" +
                          FORM_NAME + "' for handling form data"
                      );
                  }
  
                  String sXml = request.getParameter(requested);
  
                  requestStream = new ByteArrayInputStream(sXml.getBytes());
  
              } else if (contentType.startsWith("text/plain") ||
                  contentType.startsWith("text/xml") ||
                  contentType.startsWith("application/xml")) {
  
                  HttpServletRequest httpRequest = (HttpServletRequest) 
objectModel.get(HttpEnvironment.HTTP_REQUEST_OBJECT);
                  if ( httpRequest == null ) {
                      throw new ProcessingException("This functionality only 
works in an http environment.");
                  }
                  int len = request.getContentLength();
                  if (len > 0) {
                      requestStream = new 
PostInputStream(httpRequest.getInputStream(), len);
                  } else {
                      throw new IOException("getContentLen() == 0");
                  }
              } else {
                  throw new IOException("Unexpected getContentType(): " + 
request.getContentType());
              }
  
  
          }
  
          xpath = request.getParameter("xpath");
          if(xpath == null)
              xpath = par.getParameter("xpath",null);
  
          // append the request parameter to the URL if necessary
          if (par.getParameterAsBoolean("copy-parameters", false)
                  && request.getQueryString() != null) {
              StringBuffer query = new StringBuffer(super.source);
              query.append(super.source.indexOf("?") == -1 ? '?' : '&');
              query.append(request.getQueryString());
              super.source = query.toString();
          }
  
          try {
              if (source != null)
                  this.inputSource = resolver.resolveURI(super.source);
          } catch (SourceException se) {
              throw SourceUtil.handle("Unable to resolve " + super.source, se);
          }
      }
  
      /**
       * Generate the unique key.
       * This key must be unique inside the space of this component.
       * This method must be invoked before the generateValidity() method.
       *
       * @return The generated key or <code>0</code> if the component
       *              is currently not cacheable.
       */
      public java.io.Serializable getKey() {
          if (this.inputSource == null)
              return null;
  
          if (this.xpath != null) {
              StringBuffer buffer = new StringBuffer(this.inputSource.getURI());
              buffer.append(':').append(this.xpath);
              return buffer.toString();
          } else {
              return this.inputSource.getURI();
          }
      }
  
      /**
       * Generate the validity object.
       * Before this method can be invoked the generateKey() method
       * must be invoked.
       *
       * @return The generated validity object or <code>null</code> if the
       *         component is currently not cacheable.
       */
      public SourceValidity getValidity() {
          if (this.inputSource == null)
              return null;
          return this.inputSource.getValidity();
      }
  
      /**
       * Generate XML data.
       */
      public void generate()
      throws IOException, SAXException, ProcessingException {
          try {
              HtmlSaxParser parser = new HtmlSaxParser(this.properties);
              
              if (inputSource != null)
                  requestStream = this.inputSource.getInputStream();
  
              if(xpath != null) {
                  DOMBuilder builder = new DOMBuilder();
                  parser.setContentHandler(builder);
                  parser.parse(new InputSource(requestStream));
                  Document doc = builder.getDocument();
  
                  DOMStreamer domStreamer = new DOMStreamer(this.contentHandler,
                                                            
this.lexicalHandler);
                  this.contentHandler.startDocument();
                  NodeList nl = processor.selectNodeList(doc, xpath);
                  int length = nl.getLength();
                  for(int i=0; i < length; i++) {
                      domStreamer.stream(nl.item(i));
                  }
                  this.contentHandler.endDocument();
              } else {
                  parser.setContentHandler(this.contentHandler);
                  parser.parse(new InputSource(requestStream));
              }
              requestStream.close();
          } catch (IOException e){
              throw new ResourceNotFoundException("Could not get resource "
                  + this.inputSource.getURI(), e);
          } catch (SAXException e){
              throw e;
          } catch (Exception e){
              throw new ProcessingException("Exception in 
NekoHTMLGenerator.generate()",e);
          }
      }
  
  
      public void dispose() {
          if (this.manager != null) {
              this.manager.release(this.processor);
              this.manager = null;
          }
          this.processor = null;
          super.dispose();
      }
  
      public static class HtmlSaxParser extends AbstractSAXParser {
  
          public HtmlSaxParser(Properties properties) {
              super(getConfig(properties));
          }
      
          private static HTMLConfiguration getConfig(Properties properties) {
              HTMLConfiguration config = new HTMLConfiguration();
              
config.setProperty("http://cyberneko.org/html/properties/names/elems";, "lower");
              if (properties != null) {
                  for (Iterator i = 
properties.keySet().iterator();i.hasNext();) {
                      String name = (String) i.next();
                      config.setProperty(name, properties.getProperty(name));
                  }
              }
              return config;
          }
      }
  }
  
  
  
  1.1                  cocoon-2.1/src/blocks/html/lib/nekohtml-0.9.2.jar
  
        <<Binary file>>
  
  

Reply via email to