ktlili 2005/06/09 18:46:52 CEST
Modified files:
war/src/java/com/jahia/clipping/web/html HTMLDocument.java
war/src/java/com/jahia/clipping/web/html/Impl
DefaultHTMLParser.java
war/src/java/com/jahia/clipping/web/html/Impl/Neko
DomHTMLDocument.java
DomHTMLTransformer.java
Log:
Synchronize whith webclip_builder
Revision Changes Path
1.2 +1 -6
webclip_portlet/war/src/java/com/jahia/clipping/web/html/HTMLDocument.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/webclip_portlet/war/src/java/com/jahia/clipping/web/html/HTMLDocument.java.diff?r1=1.1&r2=1.2&f=h
1.2 +6 -2
webclip_portlet/war/src/java/com/jahia/clipping/web/html/Impl/DefaultHTMLParser.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/webclip_portlet/war/src/java/com/jahia/clipping/web/html/Impl/DefaultHTMLParser.java.diff?r1=1.1&r2=1.2&f=h
1.2 +0 -12
webclip_portlet/war/src/java/com/jahia/clipping/web/html/Impl/Neko/DomHTMLDocument.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/webclip_portlet/war/src/java/com/jahia/clipping/web/html/Impl/Neko/DomHTMLDocument.java.diff?r1=1.1&r2=1.2&f=h
1.2 +155 -51
webclip_portlet/war/src/java/com/jahia/clipping/web/html/Impl/Neko/DomHTMLTransformer.java
http://jahia.mine.nu:8080/cgi-bin/cvsweb.cgi/webclip_portlet/war/src/java/com/jahia/clipping/web/html/Impl/Neko/DomHTMLTransformer.java.diff?r1=1.1&r2=1.2&f=h
Index: HTMLDocument.java
===================================================================
RCS file:
/home/cvs/repository/webclip_portlet/war/src/java/com/jahia/clipping/web/html/HTMLDocument.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- HTMLDocument.java 3 Jun 2005 17:03:27 -0000 1.1
+++ HTMLDocument.java 9 Jun 2005 16:46:52 -0000 1.2
@@ -10,12 +10,7 @@
*/
public interface HTMLDocument {
- /**
- * Gets the W3CDocument attribute of the HTMLDocument object
- *
- [EMAIL PROTECTED] The W3CDocument value
- */
- public org.jdom.Document getJDOMTransformedDocument();
+
/**
Index: DefaultHTMLParser.java
===================================================================
RCS file:
/home/cvs/repository/webclip_portlet/war/src/java/com/jahia/clipping/web/html/Impl/DefaultHTMLParser.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- DefaultHTMLParser.java 3 Jun 2005 17:03:27 -0000 1.1
+++ DefaultHTMLParser.java 9 Jun 2005 16:46:52 -0000 1.2
@@ -68,10 +68,10 @@
logger.debug("[ Parsing finished. ]");
}
catch (IOException ex) {
- ex.printStackTrace();
+ ex.printStackTrace();
}
catch (SAXException ex) {
- ex.printStackTrace();
+ ex.printStackTrace();
}
return htmlDocument;
@@ -93,6 +93,10 @@
configuration.setFeature("http://cyberneko.org/html/features/scanner/script/strip-comment-delims",
true);
configuration.setFeature("http://cyberneko.org/html/features/scanner/style/strip-comment-delims",
true);
configuration.setFeature("http://apache.org/xml/features/scanner/notify-builtin-refs",
true);
+ //Deal whith namespace
+
configuration.setFeature("http://xml.org/sax/features/namespaces", false);
+
configuration.setFeature("http://cyberneko.org/html/features/insert-namespaces",
false);
+
configuration.setFeature("http://cyberneko.org/html/features/override-namespaces",
false);
}
Index: DomHTMLDocument.java
===================================================================
RCS file:
/home/cvs/repository/webclip_portlet/war/src/java/com/jahia/clipping/web/html/Impl/Neko/DomHTMLDocument.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- DomHTMLDocument.java 3 Jun 2005 17:03:29 -0000 1.1
+++ DomHTMLDocument.java 9 Jun 2005 16:46:52 -0000 1.2
@@ -34,7 +34,6 @@
*/
public DomHTMLDocument(UrlBean uBean, org.w3c.dom.Document document,
String htmlDocumentAsString) {
setW3cOriginalDocument(document);
- setTransformedDocument(document);
setUrlBean(uBean);
setOriginalHTMLSocumentAsString(htmlDocumentAsString);
}
@@ -107,17 +106,6 @@
}
- /**
- * Gets the JDOMTransformedDocument attribute of the DomHTMLDocument
object
- *
- [EMAIL PROTECTED] The JDOMTransformedDocument value
- */
- public org.jdom.Document getJDOMTransformedDocument() {
- org.jdom.input.DOMBuilder builder = new
org.jdom.input.DOMBuilder();
- org.jdom.Document jdomDoc =
builder.build(this.getTransformedDocument());
- return jdomDoc;
- }
-
/**
* Gets the ParsingErrors attribute of the DomHTMLDocument object
Index: DomHTMLTransformer.java
===================================================================
RCS file:
/home/cvs/repository/webclip_portlet/war/src/java/com/jahia/clipping/web/html/Impl/Neko/DomHTMLTransformer.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- DomHTMLTransformer.java 3 Jun 2005 17:03:29 -0000 1.1
+++ DomHTMLTransformer.java 9 Jun 2005 16:46:52 -0000 1.2
@@ -1,6 +1,7 @@
package com.jahia.clipping.web.html.Impl.Neko;
import java.util.*;
+
import javax.portlet.*;
import javax.swing.text.html.*;
import javax.xml.parsers.*;
@@ -21,35 +22,14 @@
public class DomHTMLTransformer extends DefaultHTMLTransformer {
private DomHTMLDocument document;
//Xpath
- /**
- * All link element
- */
- public static final String LINK_ELE_XPATH = "//[EMAIL PROTECTED]";
-
- /**
- * Description of the Field
- */
- public static final String INPUT_ELE_XPATH = "//input";
-
- /**
- * All action att
- */
- public static final String ACTION_ATT_XPATH = "//@action";
-
- /**
- * All href att
- */
- public static final String HREF_ATT_XPATH = "//@href";
- /**
- * All src att
- */
- public static final String SRC_ATT_XPATH = "//@src";
-
- /**
- * Description of the Field
- */
- public static String FORM_ELE_XPATH = "//form";
-
+ private final String LINK_ELE_XPATH = "//[EMAIL
PROTECTED]'stylesheet']|//[EMAIL PROTECTED]'text/css']";
+ private final String INPUT_ELE_XPATH = "//input";
+ private final String ACTION_ATT_XPATH = "//@action";
+ private final String HREF_ATT_XPATH = "//@href";
+ private final String SRC_ATT_XPATH = "//@src";
+ private final String FORM_ELE_XPATH = "//form";
+ private final String SYTLE_ELE_XPATH = "//style";
+ private final String SCRIPT_ELE_XPATH = "//script";
private static org.apache.log4j.Logger logger =
org.apache.log4j.Logger.getLogger(DomHTMLTransformer.class);
@@ -191,9 +171,11 @@
processBodyElement(transformedDocument);
//Rewrite urls
- processHrefAttribute(transformedDocument);
processActionAttribute(transformedDocument);
processSrcAttribute(transformedDocument);
+ processSytleElement(transformedDocument);
+ processScriptElement(transformedDocument);
+ processHrefAttribute(transformedDocument);
}
@@ -276,6 +258,8 @@
body = this.getBodyNode(doc);
if (body != null) {
doc.renameNode(body, "", HTML.Tag.DIV.toString());
+ String classValue = CssUtilities.BODY_CLASS;
+ body.setAttribute(HTML.Attribute.CLASS.toString(),
classValue);
}
else {
logger.error("[Body element not found]");
@@ -340,6 +324,7 @@
att.setNodeValue(relatifToAbsolute(att.getNodeValue()));
}
catch (Exception ex) {
+ ex.printStackTrace();
addParsingErrors("Src rewriting error: " +
ex.toString());
}
}
@@ -348,6 +333,117 @@
/**
+ * Description of the Method
+ *
+ [EMAIL PROTECTED] doc Description of Parameter
+ */
+ private void processSytleElement(Document doc) {
+ logger.debug("[ Process StyleTag ]");
+ NodeList eleList = DomUtilities.getNodeListByXPath(doc,
this.SYTLE_ELE_XPATH);
+
+ for (int i = 0; i < eleList.getLength(); i++) {
+ Node ele = eleList.item(i);
+ NodeList children = ele.getChildNodes();
+ Node child = null;
+ //look for text node
+ for (int j = 0; j < children.getLength(); j++) {
+ child = children.item(j);
+
+ // style is set as text
+ if (child instanceof Text) {
+ logger.debug("Sytle Text found");
+ Text tChild = (Text) child;
+ try {
+ String content =
getInlineCssContent(child.getNodeValue());
+ Comment comment =
doc.createComment(content);
+ ele.appendChild(comment);
+ ele.removeChild(tChild);
+ break;
+ }
+ catch (Exception ex) {
+ ex.printStackTrace();
+ }
+ }
+ else {
+ // style is set as Comment
+ if (child instanceof Comment) {
+ logger.debug("Style set as
comment");
+ Comment cChild = (Comment)
child;
+ try {
+ String comment =
getInlineCssContent(cChild.getNodeValue());
+
cChild.setNodeValue(comment);
+ }
+ catch (Exception ex) {
+ ex.printStackTrace();
+ }
+ }
+ else {
+ logger.debug("Child is not a
Text, it's a: " + child.toString());
+ }
+ }
+
+ }
+
+ }
+
+ }
+
+
+ /**
+ * Description of the Method
+ *
+ [EMAIL PROTECTED] doc Description of Parameter
+ */
+ private void processScriptElement(Document doc) {
+ logger.debug("[ Process StyleTag ]");
+ NodeList eleList = DomUtilities.getNodeListByXPath(doc,
SCRIPT_ELE_XPATH);
+ for (int i = 0; i < eleList.getLength(); i++) {
+ Element ele = (Element) eleList.item(i);
+ NodeList children = ele.getChildNodes();
+ Node child = null;
+ //Inline script
+ for (int j = 0; j < children.getLength(); j++) {
+ child = children.item(j);
+ // style is set as text
+ if (child instanceof Text) {
+ logger.debug("script Text found");
+ Text tChild = (Text) child;
+ try {
+ Comment comment =
doc.createComment(child.getNodeValue());
+ ele.appendChild(comment);
+ ele.removeChild(tChild);
+ break;
+ }
+ catch (Exception ex) {
+ ex.printStackTrace();
+ }
+ }
+ }
+
+ //imported Script
+ String src =
ele.getAttribute(HTML.Attribute.SRC.toString());
+ if (src != null && !src.equals("")) {
+ // get content
+ try {
+ String content =
getImportJavascriptContent(src);
+ // add it as comment
+ Comment comment =
doc.createComment(content);
+
ele.removeAttribute(HTML.Attribute.SRC.toString());
+ ele.appendChild(comment);
+ }
+ catch (Exception ex) {
+ ex.printStackTrace();
+ addParsingErrors("Extraction
Javascript: " + ex.toString());
+ }
+ }
+
+ }
+
+ }
+
+
+
+ /**
* Extract css from link element and make it icluded css
*
[EMAIL PROTECTED] doc Description of Parameter
@@ -361,15 +457,18 @@
Element ele = (Element) elements.item(i);
if (isEnableCSS()) {
// create a style element
- Element style =
doc.createElement(HTML.Tag.STYLE.toString());
+ Element style =
ele.getOwnerDocument().createElement(HTML.Tag.STYLE.toString());
String href =
ele.getAttribute(HTML.Attribute.HREF.toString());
- String media =
ele.getAttribute("media");
- String content =
getImportCssContent(href,media);
- Comment comment =
doc.createComment(content);
-
+ String media =
ele.getAttribute("media");
+ String content =
getImportCssContent(href, media);
+ Comment comment =
ele.getOwnerDocument().createComment(content);
//replace link by a style ehich contain
the css
style.appendChild(comment);
- doc.replaceChild(ele, style);
+ ele.appendChild(style);
+
+
//ele.getOwnerDocument().replaceChild(ele, style);
+ //
ele.getOwnerDocument().appendChild(style);
+
}
else {
// remove link tag
@@ -378,9 +477,11 @@
}
}
catch (Exception ex) {
+ ex.printStackTrace();
addParsingErrors("CSS Parsing error: " +
ex.toString());
}
}
+
}
@@ -396,7 +497,7 @@
Element ele = (Element) inputList.item(i);
String type = ele.getAttribute(
HTML.Attribute.TYPE.toString());
- //logger.debug("[Input element founded whith
type: " + type + " ]");
+ //logger.debug("[Input element found whith
type: " + type + " ]");
// Test that the type is a "valid" one
if (type != null) {
@@ -425,17 +526,18 @@
/**
* Description of the Method
*
- [EMAIL PROTECTED] transformedDocument Description of Parameter
- [EMAIL PROTECTED] DOMException Description of Exception
+ [EMAIL PROTECTED] doc Description of Parameter
+ [EMAIL PROTECTED] DOMException Description of Exception
*/
- private void processTitleElement(Document transformedDocument) throws
DOMException {
+ private void processTitleElement(DocumentImpl doc) throws DOMException {
//remove title
- Element title = (Element)
transformedDocument.getElementsByTagName(HTML.Tag.TITLE.toString()).item(0);
+ Element title = (Element)
doc.getElementsByTagName(HTML.Tag.TITLE.toString()).item(0);
if (title != null) {
DomUtilities.removeNode(title);
+ //doc.renameNode(doc, "", HTML.Tag.DIV.toString());
}
else {
- addParsingErrors("Title element no found");
+ //addParsingErrors("Title element no found");
}
}
@@ -443,19 +545,21 @@
/**
* Description of the Method
*
- [EMAIL PROTECTED] doc Description of Parameter
+ [EMAIL PROTECTED] doc Description of Parameter
*/
private void processHtmlElement(DocumentImpl doc) {
//change tag HTML-->div
Element html = (Element)
doc.getElementsByTagName(HTML.Tag.HTML.toString()).item(0);
doc.renameNode(html, "", HTML.Tag.DIV.toString());
+ String classValue = CssUtilities.HTML_CLASS;
+ html.setAttribute(HTML.Attribute.CLASS.toString(), classValue);
}
/**
* Process Form element
*
- [EMAIL PROTECTED] doc Description of Parameter
+ [EMAIL PROTECTED] doc Description of Parameter
*/
private void processFormElement(Document doc) {
// add action input elements params
@@ -522,15 +626,15 @@
/**
* Description of the Method
*
- [EMAIL PROTECTED] transformedDocument Description of Parameter
- [EMAIL PROTECTED] DOMException Description of Exception
+ [EMAIL PROTECTED] doc Description of Parameter
*/
- private void processHeadElement(Document transformedDocument) {
+ private void processHeadElement(DocumentImpl doc) {
//remove head
- Element head = (Element) getHeadNode(transformedDocument);
+ Element head = (Element) getHeadNode(doc);
if (head != null) {
- Node parent = head.getParentNode();
- parent.removeChild(head);
+ //Node parent = head.getParentNode();
+ doc.renameNode(head, "", HTML.Tag.DIV.toString());
+ //parent.removeChild(head);
}
}
@@ -570,7 +674,7 @@
//add text to <i>
Text text = originalDoc.createTextNode("[Param=" + name + "]");
if (type != null &&
type.equalsIgnoreCase(WebConstants.TYPE_RADIO)) {
- text = originalDoc.createTextNode("[Param=" + name + ",
value=\'" + value +"\']");
+ text = originalDoc.createTextNode("[Param=" + name + ",
value=\'" + value + "\']");
}
labelEle.appendChild(text);
return labelEle;