xlawrence    2005/07/08 16:50:46 CEST

  Modified files:
    core/src/java/org/jahia/services/htmlparser 
                                                NekoHtmlParser.java 
  Log:
  Cleanup...
  
  Revision  Changes    Path
  1.2       +43 -45    
jahia/core/src/java/org/jahia/services/htmlparser/NekoHtmlParser.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/htmlparser/NekoHtmlParser.java.diff?r1=1.1&r2=1.2&f=h
  
  
  
  Index: NekoHtmlParser.java
  ===================================================================
  RCS file: 
/home/cvs/repository/jahia/core/src/java/org/jahia/services/htmlparser/NekoHtmlParser.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- NekoHtmlParser.java       11 Jan 2005 11:27:07 -0000      1.1
  +++ NekoHtmlParser.java       8 Jul 2005 14:50:45 -0000       1.2
  @@ -1,17 +1,18 @@
   package org.jahia.services.htmlparser;
   
  -import java.io.*;
  -import java.util.*;
  -
  -import javax.xml.transform.*;
  -import javax.xml.transform.dom.*;
  -import javax.xml.transform.stream.*;
  -
  -import org.apache.xalan.templates.*;
  -import org.cyberneko.html.parsers.*;
  -import org.jahia.utils.fileparsers.*;
  -import org.w3c.dom.*;
  +import java.io.ByteArrayInputStream;
  +import java.io.ByteArrayOutputStream;
  +import java.util.Vector;
  +import javax.xml.transform.OutputKeys;
  +import javax.xml.transform.Transformer;
  +import javax.xml.transform.TransformerFactory;
  +import javax.xml.transform.dom.DOMSource;
  +import javax.xml.transform.stream.StreamResult;
  +import org.apache.xalan.templates.OutputProperties;
  +import org.cyberneko.html.parsers.DOMParser;
   import org.jahia.utils.JahiaTools;
  +import org.jahia.utils.fileparsers.CharsetDetection;
  +import org.w3c.dom.Document;
   
   /**
    *
  @@ -23,22 +24,18 @@
    * @version 1.0
    */
   public class NekoHtmlParser implements HtmlParser {
  -
  -    public static String AMPERSAND = "$$$amp$$$";
  -
  -    private static org.apache.log4j.Logger logger =
  -                org.apache.log4j.Logger.getLogger(NekoHtmlParser.class);
  -
  -    public NekoHtmlParser(){}
  -
  +    
  +    public static final String AMPERSAND = "$$$amp$$$";
  +    
  +    private static final org.apache.log4j.Logger logger =
  +            org.apache.log4j.Logger.getLogger(NekoHtmlParser.class);
  +    
       /**
        *
        * @param htmlParserService HtmlParserService
        */
  -    public void init(HtmlParserService htmlParserService){
  -
  -    }
  -
  +    public void init(HtmlParserService htmlParserService){}
  +    
       /**
        * Parses and generates a clean html document, remove unwanted markups,..
        * Using default settings
  @@ -50,7 +47,7 @@
       public String parse(String inputString, Vector DOMVisitors){
           return parse(inputString,-1,DOMVisitors);
       }
  -
  +    
       /**
        * Parses and generates a clean html document, remove unwanted markups,..
        * Using settings as defined for a given site
  @@ -61,13 +58,13 @@
        * @return
        */
       public String parse(String inputString, Vector DOMVisitors,
  -                        int siteId){
  +            int siteId){
           if ( inputString == null || inputString.trim().equals("") ){
               return inputString;
           }
           return parse(inputString,siteId,DOMVisitors);
       }
  -
  +    
       /**
        * Parses and generates a clean html document, remove unwanted markups,..
        * Using settings as defined for a given site
  @@ -78,16 +75,16 @@
        * @return
        */
       public static String parse( String input,
  -                                int siteId,
  -                                Vector DOMVisitors){
  -
  +            int siteId,
  +            Vector DOMVisitors){
  +        
           if ( input == null || "".equals(input.trim())){
               return input;
           }
  -
  +        
           String result = new String(input);
           result = JahiaTools.replacePattern(result, "&", AMPERSAND);
  -
  +        
           ByteArrayInputStream strIn;
           ByteArrayOutputStream strOut = new ByteArrayOutputStream();
           byte[] strByte = null;
  @@ -99,9 +96,10 @@
               charsetDet.charsetDetection(strIn);
               charSet = charsetDet.getCharset();
           } catch ( Throwable t ){
  +            logger.error("Error parsing the document", t);
           }
  -
  -        DOMParser domParser  = new DOMParser();
  +        
  +        final DOMParser domParser = new DOMParser();
           Document doc;
           int size = 0;
           try {
  @@ -117,20 +115,20 @@
               
domParser.setProperty("http://cyberneko.org/html/properties/names/attrs";, 
"lower");
               domParser.parse(in);
               doc = domParser.getDocument();
  -
  +            
               size = DOMVisitors.size();
               for (int i = 0; i < size; i++) {
                   HtmlDOMVisitor visitor = (HtmlDOMVisitor) DOMVisitors.get(i);
                   doc = visitor.parseDOM(doc);
               }
  -
  +            
               doc.normalize();
  -            TransformerFactory tfactory = TransformerFactory.newInstance();
  -
  +            final TransformerFactory tfactory = 
TransformerFactory.newInstance();
  +            
               // This creates a transformer that does a simple identity 
transform,
               // and thus can be used for all intents and purposes as a 
serializer.
  -            Transformer serializer = tfactory.newTransformer();
  -
  +            final Transformer serializer = tfactory.newTransformer();
  +            
               serializer.setOutputProperty(OutputKeys.METHOD, "html");
               serializer.setOutputProperty(OutputKeys.INDENT, "yes");
               if ( charSet != null ){
  @@ -138,21 +136,21 @@
               }
               
//serializer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount";, 
"4");
               
serializer.setOutputProperty(OutputProperties.S_KEY_INDENT_AMOUNT, "2");
  -            serializer.transform (new DOMSource(doc),
  -                                 new StreamResult(strOut));
  +            serializer.transform(new DOMSource(doc),
  +                    new StreamResult(strOut));
               if ( charSet == null ){
                   result = strOut.toString();
               } else {
                   result = strOut.toString(charSet);
               }
  -
  +            
               result = JahiaTools.text2XMLEntityRef(result, 1);
               result = JahiaTools.replacePattern(result, AMPERSAND, "&");
  -
  +            
           } catch ( Throwable t ){
  -            logger.debug(t);
  +            logger.error("Error parsing the document", t);
               return input;
           }
           return result;
  -   }
  +    }
   }
  

Reply via email to