>>I've been using FCKeditor

FCKeditor has some MSWord filtering, but it is quite uncomplete.
I've made my own editor, and here is the Word filter Javascript routine.
It may be not complete either, but it cleans off much more garbage than FCK.
And you can also add your own filters.

function cleanWord (html)
    // cleans pasted text from Word
    {
    //alert(html)
    html = html.replace(/<o:p>\s*<\/o:p>/g, "") ;
    html = html.replace(/<o:p>.*?<\/o:p>/g, "") ;
   
    // Remove mso-xxx styles.
    html = html.replace( /\s*mso-[^:]+:[^;"]+;?/gi, "" ) ;

    // Remove margin styles.
    html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*;/gi, "" ) ;
    html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*"/gi, "\"" ) ;

    html = html.replace( /\s*TEXT-INDENT: 0cm\s*;/gi, "" ) ;
    html = html.replace( /\s*TEXT-INDENT: 0cm\s*"/gi, "\"" ) ;

    html = html.replace( /\s*TEXT-ALIGN: [^\s;]+;?"/gi, "\"" ) ;

    html = html.replace( /\s*PAGE-BREAK-BEFORE: [^\s;]+;?"/gi, "\"" ) ;

    html = html.replace( /\s*FONT-VARIANT: [^\s;]+;?"/gi, "\"" ) ;

    html = html.replace( /\s*tab-stops:[^;"]*;?/gi, "" ) ;
    html = html.replace( /\s*tab-stops:[^"]*/gi, "" ) ;

    html = html.replace( /\s*FONT-FAMILY:[^;"]*;?/gi, "" ) ;
   
    // Remove Class attributes
    html = html.replace(/<(\w[^>]*)\s*class=([^ |>]*)([^>]*)/gi, "<$1$3") ;

    // Remove styles.
    html = html.replace( /<(\w[^>]*)style="([^\"]*)"([^>]*)/gi, "<$1$3" ) ;

    // Remove empty styles.
    html =  html.replace( /\s*style="\s*"/gi, '' ) ;
   
    html = html.replace( /<SPAN[^>]*>\s*&nbsp;\s*<\/SPAN>/gi, '&nbsp;' ) ;
   
    html = html.replace( /<SPAN[^>]*>\s*<\/SPAN>/gi, '' ) ;
   
    // Remove Lang attributes
    html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3") ;
   
    html = html.replace( /<SPAN\s*>([\s\S]*?)<\/SPAN>/gi, '$1' ) ;
    html = html.replace( /<SPAN\s*>([\s\S]*?)<\/SPAN>/gi, '$1' ) ;
    html = html.replace( /<SPAN\s*>([\s\S]*?)<\/SPAN>/gi, '$1' ) ;
   
    // remove all font tags
    html = html.replace( /<\/?FONT[^>]*>/gi, '' ) ;
    html = html.replace( /<\/?FONT[^>]*>/gi, '' ) ;
    html = html.replace( /<\/?FONT[^>]*>/gi, '' ) ;

    // Remove XML elements and declarations
    html = html.replace(/<\\?\?xml[^>]*>/gi, "") ;
   
    // Remove Tags with XML namespace declarations: <o:p></o:p>
    html = html.replace(/<\/?\w+:[^>]*>/gi, "") ;
   
    html = html.replace( /<H\d>\s*<\/H\d>/gi, '' ) ;

    //clean up H tags   
    html = html.replace( /<H1([^>]*)>/gi, '<H1>' ) ;
    html = html.replace( /<H2([^>]*)>/gi, '<H2>' ) ;
    html = html.replace( /<H3([^>]*)>/gi, '<H3>' ) ;
    html = html.replace( /<H4([^>]*)>/gi, '<H4>' ) ;
    html = html.replace( /<H5([^>]*)>/gi, '<H5>' ) ;
    html = html.replace( /<H6([^>]*)>/gi, '<H6>' ) ;
   
    html = html.replace( /<(U|I|STRIKE)>&nbsp;<\/\1>/g, '&nbsp;' ) ;

    // no comment...
    html = html.replace( /<!--[\s\S]*?-->/gi, '' ) ;
   
    // Remove empty tags (three times, just to be sure).
    html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;
    html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;
    html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;

    // transform bullet lists
    var re = new RegExp("<P>·<SPAN>(&nbsp;| )*</SPAN>([\\s\\S]*?)</P>", 
"gi");
    html = html.replace( re, "<LI>$2</LI>" ) ;
    re = new RegExp("<P>·(&nbsp;| )*([\\s\\S]*?)</P>", "gi");
    html = html.replace( /<P>·(&nbsp;| )*([\s\S]*?)<\/P>/gi, 
"<LI>$2</LI>" ) ;
    // remove spaces at begining
    html = html.replace( /^(&nbsp;| )*\s*/, '') ;
    // replace all stupid <P align=center>...</P> because they are 
overridden by higher
    // style declarations like justify, etc.
    html = html.replace( /<P\s*align=center>([\s\S]*?)<\/P>/gi, 
'<BR><CENTER>$1</CENTER>' ) ;
    // remove useless </CENTER><CENTER>
    html = html.replace( /<\/CENTER>(\s*<BR>\s*)<CENTER>/gi, '$1' ) ;
    // remove useless <BR> in <TD>
    html = html.replace( /(<TD[^>]*>)\s*<BR>\s*/gi, '$1' ) ;
    // replace <CENTER>...</CENTER> inside of TDs
    html = html.replace( 
/(<TD[^>]*)>\s*<CENTER>([\s\S]*?)<\/CENTER>\s*<\/TD>/gi,
        '$1 align=center>$2</TD>' ) ;
    // remove Paragraphs inside TD
    html = 
html.replace(/(<TD[^>]*>)\s*<P[^>]*>([\s\S]*?)\s*<\/P>\s*([\s\S]*?<\/TD>)/gi, 

        '$1$2$3');
    // prepare to initilize tables
    html = html.replace( /<TABLE/gi, '<TABLE ID="new_table"');
    //alert(html)
  return (html);
    }

-- 
_______________________________________
REUSE CODE! Use custom tags;
See http://www.contentbox.com/claude/customtags/tagstore.cfm
(Please send any spam to this address: [EMAIL PROTECTED])
Thanks.



~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|
Message: http://www.houseoffusion.com/lists.cfm/link=i:4:244715
Archives: http://www.houseoffusion.com/cf_lists/threads.cfm/4
Subscription: http://www.houseoffusion.com/lists.cfm/link=s:4
Unsubscribe: 
http://www.houseoffusion.com/cf_lists/unsubscribe.cfm?user=11502.10531.4
Donations & Support: http://www.houseoffusion.com/tiny.cfm/54

Reply via email to