html

Vincent Massol Fri, 02 Jan 2009 10:55:12 -0800

Hi Asiri,

You absolutely need a JIRA issue for this!


Actually you would also normally need a vote since you're modifying a  
public API but it's probably ok....

See also the comments below.

On Jan 2, 2009, at 5:07 PM, asiri (SVN) wrote:

> Author: asiri
> Date: 2009-01-02 17:07:40 +0100 (Fri, 02 Jan 2009)
> New Revision: 15020
>
> Modified:
>   platform/core/trunk/xwiki-xml/src/main/java/org/xwiki/xml/html/ 
> HTMLCleaner.java
>   platform/core/trunk/xwiki-xml/src/main/java/org/xwiki/xml/internal/ 
> html/DefaultHTMLCleaner.java
> Log:
> * Introduced a clean(Reader html, Map<String, String> params) method  
> to the HTMLCleaner interface.
>
> Modified: platform/core/trunk/xwiki-xml/src/main/java/org/xwiki/xml/ 
> html/HTMLCleaner.java
> ===================================================================
> --- platform/core/trunk/xwiki-xml/src/main/java/org/xwiki/xml/html/ 
> HTMLCleaner.java      2009-01-02 15:42:50 UTC (rev 15019)
> +++ platform/core/trunk/xwiki-xml/src/main/java/org/xwiki/xml/html/ 
> HTMLCleaner.java      2009-01-02 16:07:40 UTC (rev 15020)
> @@ -20,12 +20,13 @@
> package org.xwiki.xml.html;
>
> import java.io.Reader;
> +import java.util.Map;
>
> import org.w3c.dom.Document;
>
> /**
>  * Transforms any HTML content into valid XHTML that can be feed to  
> the XHTML Parser for example.
> - *
> + *
>  * @version $Id: $
>  * @since 1.6M1
>  */
> @@ -43,4 +44,14 @@
>      * @return the cleaned HTML as a w3c DOM (this allows further  
> transformations if needed)
>      */
>     Document clean(Reader originalHtmlContent);
> +
> +    /**
> +     * Transforms any HTML content into valid XHTML. Additional  
> parameters may be passed in to fine tune the cleaning
> +     * process.
> +     *
> +     * @param originalHtmlContent The original html content to be  
> cleaned.
> +     * @param params Additional parameters (implementation  
> dependent) for cleaning.
> +     * @return The cleaned HTML as a w3c DOM

You should use "the" and not "The". See 
http://dev.xwiki.org/xwiki/bin/view/Community/CodeStyle
which points to http://java.sun.com/j2se/javadoc/writingdoccomments/

>
> +     */
> +    Document clean(Reader originalHtmlContent, Map<String, String>  
> params);

Can you rename params to cleaningParameters since this is more  
explicit and this is what we discussed?

>
> }
>
> Modified: platform/core/trunk/xwiki-xml/src/main/java/org/xwiki/xml/ 
> internal/html/DefaultHTMLCleaner.java
> ===================================================================
> --- platform/core/trunk/xwiki-xml/src/main/java/org/xwiki/xml/ 
> internal/html/DefaultHTMLCleaner.java 2009-01-02 15:42:50 UTC (rev  
> 15019)
> +++ platform/core/trunk/xwiki-xml/src/main/java/org/xwiki/xml/ 
> internal/html/DefaultHTMLCleaner.java 2009-01-02 16:07:40 UTC (rev  
> 15020)
> @@ -24,6 +24,7 @@
> import java.io.StringReader;
> import java.util.ArrayList;
> import java.util.List;
> +import java.util.Map;
>
> import org.htmlcleaner.CleanerProperties;
> import org.htmlcleaner.CleanerTransformations;
> @@ -64,20 +65,20 @@
>         this.filters = new ArrayList<CleaningFilter>();
>         this.filters.add(new ListCleaningFilter());
>         this.filters.add(new DocTypeCleaningFilter());
> -
> +
>         // The clean method below is thread safe. However it seems  
> that DOMOutputter.output() is not
>         // fully thread safe since it causes the following exception  
> on the first time it's called
>         // from different threads:
> -        //   Caused by: org.jdom.JDOMException: Reflection failed  
> while creating new JAXP document:
> -        //       duplicate class definition: org/apache/xerces/jaxp/ 
> DocumentBuilderFactoryImpl
> -        //   at  
> org.jdom.adapters.JAXPDOMAdapter.createDocument(JAXPDOMAdapter.java: 
> 191)
> -        //   at  
> org 
> .jdom 
> .adapters.AbstractDOMAdapter.createDocument(AbstractDOMAdapter.java: 
> 133)
> -        //   at  
> org.jdom.output.DOMOutputter.createDOMDocument(DOMOutputter.java:208)
> -        //   at  
> org.jdom.output.DOMOutputter.output(DOMOutputter.java:127)
> +        // Caused by: org.jdom.JDOMException: Reflection failed  
> while creating new JAXP document:
> +        // duplicate class definition: org/apache/xerces/jaxp/ 
> DocumentBuilderFactoryImpl
> +        // at  
> org.jdom.adapters.JAXPDOMAdapter.createDocument(JAXPDOMAdapter.java: 
> 191)
> +        // at  
> org 
> .jdom 
> .adapters.AbstractDOMAdapter.createDocument(AbstractDOMAdapter.java: 
> 133)
> +        // at  
> org.jdom.output.DOMOutputter.createDOMDocument(DOMOutputter.java:208)
> +        // at org.jdom.output.DOMOutputter.output(DOMOutputter.java: 
> 127)

Why did you remove the white space I had put to indent the stack  
trace? :)

>
>         // Since this only happens once, we call it first here at  
> initialization time (since there's
>         // no thread contention at that time).
>         // Note: This email thread seems to say it's thread safe but  
> that's not what we see here:
> -        //   
> http://osdir.com/ml/text.xml.xforms.chiba.devel/2006-09/msg00025.html
> +        // 
> http://osdir.com/ml/text.xml.xforms.chiba.devel/2006-09/msg00025.html

same here.

>
>         clean(new StringReader(""));
>     }
>
> @@ -90,25 +91,30 @@
>     {
>         org.w3c.dom.Document result;
>
> -        // HtmlCleaner is not threadsafe. Thus we need to recreate  
> an instance at each run since otherwise
> -        // we would need to synchronize this clean() method which  
> would slow down the whole system by
> +        // HtmlCleaner is not threadsafe. Thus we need to recreate  
> an instance at each run since
> +        // otherwise

Why did you break at 100 chars when we are breaking at 120?

>
> +        // we would need to synchronize this clean() method which  
> would slow down the whole system
> +        // by
>         // queuing up cleaning requests.
> -        // See 
> http://sourceforge.net/tracker/index.php?func=detail&aid=2139927&group_id=183053&atid=903699
> +        // See
> +        // 
> http://sourceforge.net/tracker/index.php?func=detail&aid=2139927&group_id=183053&atid=
> +        // 903699

Same here.

>
>         HtmlCleaner cleaner = new HtmlCleaner();
>         cleaner.setTransformations(getCleaningTransformations());
>         CleanerProperties cleanerProperties = cleaner.getProperties();
>         cleanerProperties.setOmitUnknownTags(true);
>
> -        // By default HTMLCleaner treats style and script tags as  
> CDATA. This is causing errors if we use
> +        // By default HTMLCleaner treats style and script tags as  
> CDATA. This is causing errors if
> +        // we use
>         // the best practice of using CDATA inside a script. For  
> example:
> -        //   <script type="text/javascript">
> -        //     //<![CDATA[
> -        //     ...
> -        //     // ]]>
> -        //   </script>
> +        // <script type="text/javascript">
> +        // //<![CDATA[
> +        // ...
> +        // // ]]>
> +        // </script>
>         // Thus we need to turn off this feature.
>         cleanerProperties.setUseCdataForScriptAndStyle(false);
> -
> +
>         TagNode cleanedNode;
>         try {
>             cleanedNode = cleaner.clean(originalHtmlContent);
> @@ -120,7 +126,7 @@
>
>         // Fix cleaned node bug
>         fixCleanedNodeBug(cleanedNode);
> -
> +
>         Document document = new JDomSerializer(cleanerProperties,  
> false).createJDom(cleanedNode);
>
>         // Perform other cleaning operation this time using the W3C  
> Document interface.
> @@ -139,9 +145,20 @@
>     }
>
>     /**
> -     * @return the cleaning transformations to perform on tags, in  
> addition to the base transformations done by
> -     *         HTML Cleaner
> +     * {...@inheritdoc}
> +     * <p>
> +     * {...@link DefaultHTMLCleaner} does not allow fine-tuning of  
> html cleaning via parameters.
> +     * </p>
>      */
> +    public org.w3c.dom.Document clean(Reader originalHtmlContent,  
> Map<String, String> params)
> +    {
> +        return clean(originalHtmlContent);

the params don't seem to be used?

>
> +    }
> +
> +    /**
> +     * @return the cleaning transformations to perform on tags, in  
> addition to the base transformations done by HTML
> +     *         Cleaner
> +     */
>     private CleanerTransformations getCleaningTransformations()
>     {
>         CleanerTransformations transformations = new  
> CleanerTransformations();
> @@ -174,9 +191,9 @@
>     }
>
>     /**
> -     * There's a known limitation (bug?) in HTML Cleaner where if  
> there's a XML declaration specified
> -     * it'll be copied as the first element of the body. Thus  
> remove it if it's there.
> -     * See https://sourceforge.net/forum/message.php?msg_id=4657800
> +     * There's a known limitation (bug?) in HTML Cleaner where if  
> there's a XML declaration specified it'll be copied as
> +     * the first element of the body. Thus remove it if it's there.  
> See
> +     * https://sourceforge.net/forum/message.php?msg_id=4657800
>      *
>      * @param cleanedNode the cleaned node (ie after the HTML  
> cleaning)
>      */


Thanks
-Vincent
http://xwiki.com
http://massol.net
http://xwiki.org





_______________________________________________
devs mailing list
[email protected]
http://lists.xwiki.org/mailman/listinfo/devs

Re: [xwiki-devs] [xwiki-notifications] r15020 - in platform/core/trunk/xwiki-xml/src/main/java/org/xwiki/xml: html internal/html

Reply via email to