>>I've been using FCKeditor
FCKeditor has some MSWord filtering, but it is quite uncomplete.
I've made my own editor, and here is the Word filter Javascript routine.
It may be not complete either, but it cleans off much more garbage than FCK.
And you can also add your own filters.
function cleanWord (html)
// cleans pasted text from Word
{
//alert(html)
html = html.replace(/<o:p>\s*<\/o:p>/g, "") ;
html = html.replace(/<o:p>.*?<\/o:p>/g, "") ;
// Remove mso-xxx styles.
html = html.replace( /\s*mso-[^:]+:[^;"]+;?/gi, "" ) ;
// Remove margin styles.
html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*;/gi, "" ) ;
html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*"/gi, "\"" ) ;
html = html.replace( /\s*TEXT-INDENT: 0cm\s*;/gi, "" ) ;
html = html.replace( /\s*TEXT-INDENT: 0cm\s*"/gi, "\"" ) ;
html = html.replace( /\s*TEXT-ALIGN: [^\s;]+;?"/gi, "\"" ) ;
html = html.replace( /\s*PAGE-BREAK-BEFORE: [^\s;]+;?"/gi, "\"" ) ;
html = html.replace( /\s*FONT-VARIANT: [^\s;]+;?"/gi, "\"" ) ;
html = html.replace( /\s*tab-stops:[^;"]*;?/gi, "" ) ;
html = html.replace( /\s*tab-stops:[^"]*/gi, "" ) ;
html = html.replace( /\s*FONT-FAMILY:[^;"]*;?/gi, "" ) ;
// Remove Class attributes
html = html.replace(/<(\w[^>]*)\s*class=([^ |>]*)([^>]*)/gi, "<$1$3") ;
// Remove styles.
html = html.replace( /<(\w[^>]*)style="([^\"]*)"([^>]*)/gi, "<$1$3" ) ;
// Remove empty styles.
html = html.replace( /\s*style="\s*"/gi, '' ) ;
html = html.replace( /<SPAN[^>]*>\s* \s*<\/SPAN>/gi, ' ' ) ;
html = html.replace( /<SPAN[^>]*>\s*<\/SPAN>/gi, '' ) ;
// Remove Lang attributes
html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3") ;
html = html.replace( /<SPAN\s*>([\s\S]*?)<\/SPAN>/gi, '$1' ) ;
html = html.replace( /<SPAN\s*>([\s\S]*?)<\/SPAN>/gi, '$1' ) ;
html = html.replace( /<SPAN\s*>([\s\S]*?)<\/SPAN>/gi, '$1' ) ;
// remove all font tags
html = html.replace( /<\/?FONT[^>]*>/gi, '' ) ;
html = html.replace( /<\/?FONT[^>]*>/gi, '' ) ;
html = html.replace( /<\/?FONT[^>]*>/gi, '' ) ;
// Remove XML elements and declarations
html = html.replace(/<\\?\?xml[^>]*>/gi, "") ;
// Remove Tags with XML namespace declarations: <o:p></o:p>
html = html.replace(/<\/?\w+:[^>]*>/gi, "") ;
html = html.replace( /<H\d>\s*<\/H\d>/gi, '' ) ;
//clean up H tags
html = html.replace( /<H1([^>]*)>/gi, '<H1>' ) ;
html = html.replace( /<H2([^>]*)>/gi, '<H2>' ) ;
html = html.replace( /<H3([^>]*)>/gi, '<H3>' ) ;
html = html.replace( /<H4([^>]*)>/gi, '<H4>' ) ;
html = html.replace( /<H5([^>]*)>/gi, '<H5>' ) ;
html = html.replace( /<H6([^>]*)>/gi, '<H6>' ) ;
html = html.replace( /<(U|I|STRIKE)> <\/\1>/g, ' ' ) ;
// no comment...
html = html.replace( /<!--[\s\S]*?-->/gi, '' ) ;
// Remove empty tags (three times, just to be sure).
html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;
html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;
html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;
// transform bullet lists
var re = new RegExp("<P>·<SPAN>( | )*</SPAN>([\\s\\S]*?)</P>",
"gi");
html = html.replace( re, "<LI>$2</LI>" ) ;
re = new RegExp("<P>·( | )*([\\s\\S]*?)</P>", "gi");
html = html.replace( /<P>·( | )*([\s\S]*?)<\/P>/gi,
"<LI>$2</LI>" ) ;
// remove spaces at begining
html = html.replace( /^( | )*\s*/, '') ;
// replace all stupid <P align=center>...</P> because they are
overridden by higher
// style declarations like justify, etc.
html = html.replace( /<P\s*align=center>([\s\S]*?)<\/P>/gi,
'<BR><CENTER>$1</CENTER>' ) ;
// remove useless </CENTER><CENTER>
html = html.replace( /<\/CENTER>(\s*<BR>\s*)<CENTER>/gi, '$1' ) ;
// remove useless <BR> in <TD>
html = html.replace( /(<TD[^>]*>)\s*<BR>\s*/gi, '$1' ) ;
// replace <CENTER>...</CENTER> inside of TDs
html = html.replace(
/(<TD[^>]*)>\s*<CENTER>([\s\S]*?)<\/CENTER>\s*<\/TD>/gi,
'$1 align=center>$2</TD>' ) ;
// remove Paragraphs inside TD
html =
html.replace(/(<TD[^>]*>)\s*<P[^>]*>([\s\S]*?)\s*<\/P>\s*([\s\S]*?<\/TD>)/gi,
'$1$2$3');
// prepare to initilize tables
html = html.replace( /<TABLE/gi, '<TABLE ID="new_table"');
//alert(html)
return (html);
}
--
_______________________________________
REUSE CODE! Use custom tags;
See http://www.contentbox.com/claude/customtags/tagstore.cfm
(Please send any spam to this address: [EMAIL PROTECTED])
Thanks.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|
Message: http://www.houseoffusion.com/lists.cfm/link=i:4:244715
Archives: http://www.houseoffusion.com/cf_lists/threads.cfm/4
Subscription: http://www.houseoffusion.com/lists.cfm/link=s:4
Unsubscribe:
http://www.houseoffusion.com/cf_lists/unsubscribe.cfm?user=11502.10531.4
Donations & Support: http://www.houseoffusion.com/tiny.cfm/54