Hi Vincent,
On Tue, Oct 28, 2008 at 7:53 PM, Vincent Massol <[EMAIL PROTECTED]> wrote:
> Hi Asiri,
>
> I think I'd really prefer one filter per class. Same as what is done
> in the HTML cleaner. Also please donc use any *Utils class and no
> static please (these are both anti patterns).
>
Ok, reverting now.
Thanks.
- Asiri
>
> Thanks
> -Vincent
>
> On Oct 28, 2008, at 2:54 PM, asiri (SVN) wrote:
>
> > Author: asiri
> > Date: 2008-10-28 14:54:04 +0100 (Tue, 28 Oct 2008)
> > New Revision: 13868
> >
> > Removed:
> > sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
> > plugin/officeimporter/filter/
> > Modified:
> > sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
> > plugin/officeimporter/OfficeImporterPlugin.java
> > sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
> > plugin/officeimporter/utils/HtmlFilterUtils.java
> > sandbox/xwiki-plugin-officeimporter/src/test/java/com/xpn/xwiki/
> > plugin/officeconverter/CleanHTMLTest.java
> > Log:
> > Moved all html filter code into a single utility class called
> > HtmlFilterUtils. I thought of introducing some sort of a filter
> > chain (may be chain of responsibility pattern) but it seemed like an
> > over-kill for this scenario.
> >
> > Modified: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/
> > xwiki/plugin/officeimporter/OfficeImporterPlugin.java
> > ===================================================================
> > --- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
> > plugin/officeimporter/OfficeImporterPlugin.java 2008-10-28 11:33:41
> > UTC (rev 13867)
> > +++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
> > plugin/officeimporter/OfficeImporterPlugin.java 2008-10-28 13:54:04
> > UTC (rev 13868)
> > @@ -57,14 +57,9 @@
> > import com.xpn.xwiki.doc.XWikiDocument;
> > import com.xpn.xwiki.plugin.XWikiDefaultPlugin;
> > import com.xpn.xwiki.plugin.XWikiPluginInterface;
> > -import com.xpn.xwiki.plugin.officeimporter.filter.EmptyLinkFilter;
> > -import com.xpn.xwiki.plugin.officeimporter.filter.ImageTagFilter;
> > -import com.xpn.xwiki.plugin.officeimporter.filter.PinLiFilter;
> > -import com.xpn.xwiki.plugin.officeimporter.filter.TagRemoveFilter;
> > -import
> > com.xpn.xwiki.plugin.officeimporter.filter.UnderlineLinkFilter;
> > -import
> > com.xpn.xwiki.plugin.officeimporter.filter.XWikiSyntaxEscapeFilter;
> > -import com.xpn.xwiki.plugin.officeimporter.utils.ImporterException;
> > import com.xpn.xwiki.plugin.officeimporter.utils.DocumentType;
> > +import com.xpn.xwiki.plugin.officeimporter.utils.HtmlFilterUtils;
> > +import com.xpn.xwiki.plugin.officeimporter.utils.ImporterException;
> > import com.xpn.xwiki.web.Utils;
> >
> > /**
> > @@ -471,9 +466,7 @@
> > HTMLCleaner.ROLE), e);
> > }
> > Document document = htmlCleaner.clean(new
> > StringReader(inputHTML));
> > -
> > - new UnderlineLinkFilter().filter(document);
> > -
> > + HtmlFilterUtils.filterUnderlinedLinks(document);
> > XMLUtils.stripHTMLEnvelope(document);
> > String cleanedHTML = XMLUtils.toString(document);
> > return cleanedHTML;
> > @@ -499,14 +492,12 @@
> > HTMLCleaner.ROLE), e);
> > }
> > Document document = htmlCleaner.clean(new
> > StringReader(inputHTML));
> > -
> > - new TagRemoveFilter().filter(document);
> > - new UnderlineLinkFilter().filter(document);
> > - new XWikiSyntaxEscapeFilter().filter(document);
> > - new ImageTagFilter().filter(document);
> > - new PinLiFilter().filter(document);
> > - new EmptyLinkFilter().filter(document);
> > -
> > + HtmlFilterUtils.filterTags(document, new String[]{"style",
> > "script"});
> > + HtmlFilterUtils.filterUnderlinedLinks(document);
> > + HtmlFilterUtils.filterSytaxChars(document);
> > + HtmlFilterUtils.filterImageLinks(document);
> > + HtmlFilterUtils.filterParagraphTagsInLineItemTags(document);
> > + HtmlFilterUtils.filterEmptyLinks(document);
> > XMLUtils.stripHTMLEnvelope(document);
> > String cleanedHTML = XMLUtils.toString(document);
> > return cleanedHTML;
> >
> > Modified: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/
> > xwiki/plugin/officeimporter/utils/HtmlFilterUtils.java
> > ===================================================================
> > --- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
> > plugin/officeimporter/utils/HtmlFilterUtils.java 2008-10-28 11:33:41
> > UTC (rev 13867)
> > +++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
> > plugin/officeimporter/utils/HtmlFilterUtils.java 2008-10-28 13:54:04
> > UTC (rev 13868)
> > @@ -1,12 +1,247 @@
> > package com.xpn.xwiki.plugin.officeimporter.utils;
> >
> > +import java.util.ArrayList;
> > +import java.util.List;
> > +
> > +import org.w3c.dom.Document;
> > +import org.w3c.dom.Element;
> > +import org.w3c.dom.NamedNodeMap;
> > +import org.w3c.dom.Node;
> > +import org.w3c.dom.NodeList;
> > +import org.w3c.dom.Text;
> > +
> > /**
> > * A utility class containing a suite of filter methods used to
> > manipulate Html documents.
> > *
> > * @version $Id$
> > * @since 1.7M1
> > */
> > -public class HtmlFilterUtils
> > +public abstract class HtmlFilterUtils
> > {
> > + /**
> > + * Characters that need to be escaped when jumping from html to
> > xwiki syntax.
> > + */
> > + private static final List<String> escapeChars = new
> > ArrayList<String>();
> >
> > + /**
> > + * Static initializer for escape chars.
> > + */
> > + static {
> > + escapeChars.add("[");
> > + escapeChars.add("]");
> > + escapeChars.add("{");
> > + escapeChars.add("}");
> > + escapeChars.add("*");
> > + escapeChars.add("~");
> > + escapeChars.add("_");
> > + escapeChars.add("-");
> > + escapeChars.add("1");
> > + escapeChars.add("#");
> > + escapeChars.add("$");
> > + }
> > +
> > + /**
> > + * Removes empty links from html documents. If the label of the
> > link is empty, simply remove the
> > + * tag as in [EMAIL PROTECTED] <a/>} or [EMAIL PROTECTED] <a
> > href=""/>}. If the label
> > is not null but the href
> > + * attribute is missing, replace the tag with it's label. Like
> > changing [EMAIL PROTECTED] <a>something</a>}
> > + * to [EMAIL PROTECTED] something}.
> > + *
> > + * @param document The html document.
> > + */
> > + public static void filterEmptyLinks(Document document)
> > + {
> > + Element root = document.getDocumentElement();
> > + NodeList links = root.getElementsByTagName("a");
> > + for (int i = 0; i < links.getLength(); i++) {
> > + Node link = links.item(i);
> > + if (link.getTextContent() == null ||
> > link.getTextContent().trim().equals("")) {
> > + link.getParentNode().removeChild(link);
> > + i--;
> > + continue;
> > + }
> > +
> > + Node hrefAttr =
> > link.getAttributes().getNamedItem("href");
> > + if (hrefAttr == null ||
> > hrefAttr.getTextContent().trim().equals("")) {
> > + NodeList children = link.getChildNodes();
> > + while (children.getLength() > 0) {
> > +
> > link.getParentNode().insertBefore(children.item(0), link);
> > + }
> > + link.getParentNode().removeChild(link);
> > + i--;
> > + }
> > + }
> > + }
> > +
> > + /**
> > + * Replaces the [EMAIL PROTECTED] <img>} tags with corresponding
> > {image}
> > macro elements which are
> > + * recognized by xwiki syntax 1.0. Handles image attributes
> > like src, width, height, alt, align.
> > + *
> > + * @param document The html document.
> > + */
> > + public static void filterImageLinks(Document document)
> > + {
> > + Element root = document.getDocumentElement();
> > + NodeList imgs = root.getElementsByTagName("img");
> > + while (imgs.getLength() > 0) {
> > + Node image = imgs.item(0);
> > + String imageCode = generateImageMacroString(image);
> > + Node parent = image.getParentNode();
> > + Text newImg = document.createTextNode(imageCode);
> > + parent.replaceChild(newImg, image);
> > + }
> > + }
> > +
> > + /**
> > + * Converts a [EMAIL PROTECTED] <img>} element into a xwiki syntax 1.0
> > {image} macro element.
> > + *
> > + * @param imageLink Node representing the image link.
> > + * @return Converted {image} macro string.
> > + */
> > + private static String generateImageMacroString(Node imageLink)
> > + {
> > + NamedNodeMap attrs = imageLink.getAttributes();
> > + if (attrs == null) {
> > + return null;
> > + }
> > + StringBuffer sb = new StringBuffer();
> > + sb.append("{image:");
> > + if (attrs.getNamedItem("src") != null) {
> > + String src = attrs.getNamedItem("src").getTextContent();
> > + sb.append(src);
> > + }
> > + if (attrs.getNamedItem("width") != null) {
> > + String width =
> > attrs.getNamedItem("width").getTextContent();
> > + sb.append("|width=" + width);
> > + }
> > + if (attrs.getNamedItem("height") != null) {
> > + String height =
> > attrs.getNamedItem("height").getTextContent();
> > + sb.append("|height=" + height);
> > + }
> > + if (attrs.getNamedItem("alt") != null) {
> > + String alt = attrs.getNamedItem("alt").getTextContent();
> > + sb.append("|alt=" + alt);
> > + }
> > + if (attrs.getNamedItem("align") != null) {
> > + String align =
> > attrs.getNamedItem("align").getTextContent();
> > + sb.append("|align=" + align);
> > + }
> > + sb.append("}");
> > + return sb.toString();
> > + }
> > +
> > + /**
> > + * Removes the starting [EMAIL PROTECTED] <p>} tags found within
> > [EMAIL PROTECTED]
> > <li>} tags. This is useful since
> > + * such formations are not properly handled in xwiki 1.0 syntax.
> > + *
> > + * @param document The html document.
> > + */
> > + public static void filterParagraphTagsInLineItemTags(Document
> > document)
> > + {
> > + Element root = document.getDocumentElement();
> > + NodeList lists = root.getElementsByTagName("li");
> > + for (int i = 0; i < lists.getLength(); i++) {
> > + Node list = lists.item(i);
> > + Node firstChild = list.getFirstChild();
> > + if (firstChild.getNodeName() != null &&
> > firstChild.getNodeName().equals("p")) {
> > + NodeList childchildren = firstChild.getChildNodes();
> > + while (childchildren.getLength() > 0) {
> > + list.insertBefore(childchildren.item(0),
> > firstChild);
> > + }
> > + list.removeChild(firstChild);
> > + }
> > + }
> > + }
> > +
> > + /**
> > + * Removes all listed tags from the given html document.
> > + *
> > + * @param document The html document.
> > + * @param tags Tags to be removed.
> > + */
> > + public static void filterTags(Document document, String[] tags)
> > + {
> > + Element root = document.getDocumentElement();
> > + for (String tag : tags) {
> > + NodeList toBeRemovedTags =
> > root.getElementsByTagName(tag);
> > + while (toBeRemovedTags.getLength() > 0) {
> > + Node t = toBeRemovedTags.item(0);
> > + t.getParentNode().removeChild(t);
> > + }
> > + }
> > + }
> > +
> > + /**
> > + * Strips off underline tags surrounding links like [EMAIL PROTECTED]
> > <u><a href="something">link</a></u>}.
> > + *
> > + * @param document The html document.
> > + */
> > + public static void filterUnderlinedLinks(Document document)
> > + {
> > + Element root = document.getDocumentElement();
> > + NodeList links = root.getElementsByTagName("a");
> > + for (int i = 0; i < links.getLength(); i++) {
> > + Node link = links.item(i);
> > + Node parent = link.getParentNode();
> > + String parentName = parent.getNodeName();
> > + if (parentName != null && (parentName.equals("u") ||
> > parentName.equals("del"))) {
> > + parent.getParentNode().insertBefore(link, parent);
> > + parent.getParentNode().removeChild(parent);
> > + }
> > + }
> > + }
> > +
> > + /**
> > + * Escapes the xwiki sytax characters from the given html
> > document. Example : [EMAIL PROTECTED] [} will be
> > + * replaced by [EMAIL PROTECTED] \]}.
> > + *
> > + * @param document The html document.
> > + */
> > + public static void filterSytaxChars(Document document)
> > + {
> > + Element root = document.getDocumentElement();
> > + escapeNode(root);
> > + }
> > +
> > + /**
> > + * Escapes xwiki syntax characters within the given node's
> > content.
> > + *
> > + * @param node The node which is to be examined.
> > + */
> > + private static void escapeNode(Node node)
> > + {
> > + NodeList nodes = node.getChildNodes();
> > + for (int i = 0; i < nodes.getLength(); i++) {
> > + Node next = nodes.item(i);
> > + if (next instanceof Text) {
> > + String text = next.getTextContent();
> > + text = escapeText(text);
> > + next.setTextContent(text);
> > + } else {
> > + if (next.hasChildNodes()) {
> > + escapeNode(next);
> > + }
> > + }
> > + }
> > + }
> > +
> > + /**
> > + * Escapes xwiki syntax characters within the given string.
> > + *
> > + * @param text The string to be examined.
> > + * @return The syntax escaped string.
> > + */
> > + private static String escapeText(String text)
> > + {
> > + StringBuffer sb = new StringBuffer();
> > + for (int i = 0; i < text.length(); i++) {
> > + char x = text.charAt(i);
> > + if (escapeChars.contains(String.valueOf(x))) {
> > + sb.append("\\");
> > + sb.append(String.valueOf(x));
> > + } else {
> > + sb.append(x);
> > + }
> > + }
> > + return sb.toString();
> > + }
> > }
> >
> > Modified: sandbox/xwiki-plugin-officeimporter/src/test/java/com/xpn/
> > xwiki/plugin/officeconverter/CleanHTMLTest.java
> > ===================================================================
> > --- sandbox/xwiki-plugin-officeimporter/src/test/java/com/xpn/xwiki/
> > plugin/officeconverter/CleanHTMLTest.java 2008-10-28 11:33:41 UTC
> > (rev 13867)
> > +++ sandbox/xwiki-plugin-officeimporter/src/test/java/com/xpn/xwiki/
> > plugin/officeconverter/CleanHTMLTest.java 2008-10-28 13:54:04 UTC
> > (rev 13868)
> > @@ -27,13 +27,7 @@
> > import org.xwiki.xml.XMLUtils;
> > import org.xwiki.xml.html.HTMLCleaner;
> >
> > -import com.xpn.xwiki.plugin.officeimporter.filter.EmptyLinkFilter;
> > -import com.xpn.xwiki.plugin.officeimporter.filter.HTMLFilter;
> > -import com.xpn.xwiki.plugin.officeimporter.filter.ImageTagFilter;
> > -import com.xpn.xwiki.plugin.officeimporter.filter.PinLiFilter;
> > -import com.xpn.xwiki.plugin.officeimporter.filter.TagRemoveFilter;
> > -import
> > com.xpn.xwiki.plugin.officeimporter.filter.UnderlineLinkFilter;
> > -import
> > com.xpn.xwiki.plugin.officeimporter.filter.XWikiSyntaxEscapeFilter;
> > +import com.xpn.xwiki.plugin.officeimporter.utils.HtmlFilterUtils;
> > import com.xpn.xwiki.plugin.officeimporter.utils.ImporterException;
> > import com.xpn.xwiki.test.AbstractXWikiComponentTestCase;
> >
> > @@ -121,14 +115,12 @@
> > private void test(String input, String expected) throws
> > ImporterException
> > {
> > Document document = cleaner.clean(new StringReader(input));
> > -
> > - new TagRemoveFilter().filter(document);
> > - new UnderlineLinkFilter().filter(document);
> > - new XWikiSyntaxEscapeFilter().filter(document);
> > - new ImageTagFilter().filter(document);
> > - new PinLiFilter().filter(document);
> > - new EmptyLinkFilter().filter(document);
> > -
> > + HtmlFilterUtils.filterTags(document, new String[]{"style",
> > "script"});
> > + HtmlFilterUtils.filterUnderlinedLinks(document);
> > + HtmlFilterUtils.filterSytaxChars(document);
> > + HtmlFilterUtils.filterImageLinks(document);
> > + HtmlFilterUtils.filterParagraphTagsInLineItemTags(document);
> > + HtmlFilterUtils.filterEmptyLinks(document);
> > XMLUtils.stripHTMLEnvelope(document);
> > String actual = XMLUtils.toString(document);
> > assertEquals(HEAD + expected + FOOT, actual);
> >
> > _______________________________________________
> > notifications mailing list
> > [EMAIL PROTECTED]
> > http://lists.xwiki.org/mailman/listinfo/notifications
>
> _______________________________________________
> devs mailing list
> [email protected]
> http://lists.xwiki.org/mailman/listinfo/devs
>
_______________________________________________
devs mailing list
[email protected]
http://lists.xwiki.org/mailman/listinfo/devs