xlawrence 2005/07/08 16:50:46 CEST
Modified files:
core/src/java/org/jahia/services/htmlparser
NekoHtmlParser.java
Log:
Cleanup...
Revision Changes Path
1.2 +43 -45
jahia/core/src/java/org/jahia/services/htmlparser/NekoHtmlParser.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/jahia/core/src/java/org/jahia/services/htmlparser/NekoHtmlParser.java.diff?r1=1.1&r2=1.2&f=h
Index: NekoHtmlParser.java
===================================================================
RCS file:
/home/cvs/repository/jahia/core/src/java/org/jahia/services/htmlparser/NekoHtmlParser.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- NekoHtmlParser.java 11 Jan 2005 11:27:07 -0000 1.1
+++ NekoHtmlParser.java 8 Jul 2005 14:50:45 -0000 1.2
@@ -1,17 +1,18 @@
package org.jahia.services.htmlparser;
-import java.io.*;
-import java.util.*;
-
-import javax.xml.transform.*;
-import javax.xml.transform.dom.*;
-import javax.xml.transform.stream.*;
-
-import org.apache.xalan.templates.*;
-import org.cyberneko.html.parsers.*;
-import org.jahia.utils.fileparsers.*;
-import org.w3c.dom.*;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.util.Vector;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+import org.apache.xalan.templates.OutputProperties;
+import org.cyberneko.html.parsers.DOMParser;
import org.jahia.utils.JahiaTools;
+import org.jahia.utils.fileparsers.CharsetDetection;
+import org.w3c.dom.Document;
/**
*
@@ -23,22 +24,18 @@
* @version 1.0
*/
public class NekoHtmlParser implements HtmlParser {
-
- public static String AMPERSAND = "$$$amp$$$";
-
- private static org.apache.log4j.Logger logger =
- org.apache.log4j.Logger.getLogger(NekoHtmlParser.class);
-
- public NekoHtmlParser(){}
-
+
+ public static final String AMPERSAND = "$$$amp$$$";
+
+ private static final org.apache.log4j.Logger logger =
+ org.apache.log4j.Logger.getLogger(NekoHtmlParser.class);
+
/**
*
* @param htmlParserService HtmlParserService
*/
- public void init(HtmlParserService htmlParserService){
-
- }
-
+ public void init(HtmlParserService htmlParserService){}
+
/**
* Parses and generates a clean html document, remove unwanted markups,..
* Using default settings
@@ -50,7 +47,7 @@
public String parse(String inputString, Vector DOMVisitors){
return parse(inputString,-1,DOMVisitors);
}
-
+
/**
* Parses and generates a clean html document, remove unwanted markups,..
* Using settings as defined for a given site
@@ -61,13 +58,13 @@
* @return
*/
public String parse(String inputString, Vector DOMVisitors,
- int siteId){
+ int siteId){
if ( inputString == null || inputString.trim().equals("") ){
return inputString;
}
return parse(inputString,siteId,DOMVisitors);
}
-
+
/**
* Parses and generates a clean html document, remove unwanted markups,..
* Using settings as defined for a given site
@@ -78,16 +75,16 @@
* @return
*/
public static String parse( String input,
- int siteId,
- Vector DOMVisitors){
-
+ int siteId,
+ Vector DOMVisitors){
+
if ( input == null || "".equals(input.trim())){
return input;
}
-
+
String result = new String(input);
result = JahiaTools.replacePattern(result, "&", AMPERSAND);
-
+
ByteArrayInputStream strIn;
ByteArrayOutputStream strOut = new ByteArrayOutputStream();
byte[] strByte = null;
@@ -99,9 +96,10 @@
charsetDet.charsetDetection(strIn);
charSet = charsetDet.getCharset();
} catch ( Throwable t ){
+ logger.error("Error parsing the document", t);
}
-
- DOMParser domParser = new DOMParser();
+
+ final DOMParser domParser = new DOMParser();
Document doc;
int size = 0;
try {
@@ -117,20 +115,20 @@
domParser.setProperty("http://cyberneko.org/html/properties/names/attrs",
"lower");
domParser.parse(in);
doc = domParser.getDocument();
-
+
size = DOMVisitors.size();
for (int i = 0; i < size; i++) {
HtmlDOMVisitor visitor = (HtmlDOMVisitor) DOMVisitors.get(i);
doc = visitor.parseDOM(doc);
}
-
+
doc.normalize();
- TransformerFactory tfactory = TransformerFactory.newInstance();
-
+ final TransformerFactory tfactory =
TransformerFactory.newInstance();
+
// This creates a transformer that does a simple identity
transform,
// and thus can be used for all intents and purposes as a
serializer.
- Transformer serializer = tfactory.newTransformer();
-
+ final Transformer serializer = tfactory.newTransformer();
+
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
if ( charSet != null ){
@@ -138,21 +136,21 @@
}
//serializer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount",
"4");
serializer.setOutputProperty(OutputProperties.S_KEY_INDENT_AMOUNT, "2");
- serializer.transform (new DOMSource(doc),
- new StreamResult(strOut));
+ serializer.transform(new DOMSource(doc),
+ new StreamResult(strOut));
if ( charSet == null ){
result = strOut.toString();
} else {
result = strOut.toString(charSet);
}
-
+
result = JahiaTools.text2XMLEntityRef(result, 1);
result = JahiaTools.replacePattern(result, AMPERSAND, "&");
-
+
} catch ( Throwable t ){
- logger.debug(t);
+ logger.error("Error parsing the document", t);
return input;
}
return result;
- }
+ }
}