Custom MS Word Cleaner
----------------------

                 Key: ROL-1730
                 URL: https://issues.apache.org/roller/browse/ROL-1730
             Project: Roller
          Issue Type: Improvement
          Components: Weblog Editor
    Affects Versions: 4.0
         Environment: Roller 4.0
            Reporter: Terry Smith
            Assignee: Roller Unassigned
            Priority: Minor


Added the following function in htmlarea.js to cleanup copy/paste from MS Word:

function customWordCleaner(editor) {
        var inChars = editor.getInnerHTML(); 
        // Convert MS Word auto-correct characters
        var outChars = escape(inChars); // encode special characters
        // alert(outChars);  // uncomment to see character encoding
        outChars = outChars.replace(/%u201C/g,'"') // Replace left double quote.
        outChars = outChars.replace(/%u201D/g,'"') // Replace right double 
quote.
        outChars = outChars.replace(/%u2013/g,'-') // Replace --.
        outChars = outChars.replace(/%u2014/g,'-') // Replace --.
        outChars = outChars.replace(/%u2018/g,'\'') // Replace apost.
        outChars = outChars.replace(/%u2019/g,'\'') // Replace apost.
        outChars = outChars.replace(/%BD/g,'1/2') // Replace 1/2 symbol.
        outChars = outChars.replace(/%u2122/g,'TM') // Replace TM symbol.
        outChars = outChars.replace(/%u2026/g,'...') // Replace ...
        outChars = unescape(outChars); // decode string 

                     // Cleanup MS Word extra tags and doublespaces
                     outChars= 
outChars.replace(/<(html|body|div|meta|span|xml|del|ins|\!|[ovwxp]:\w+)[^>]*>/g,"");
                     outChars = 
outChars.replace(/<\/(html|body|div|meta|span|xml|del|ins|\!|[ovwxp]:\w+)[^>]*>/g,"");
                     outChars = 
outChars.replace(/<(p|ul|li|hr|table|tr|td)[^>]*>/gi,'<$1>');
                     outChars = 
outChars.replace(/<\/(p|ul|li|hr|table|tr|td)[^>]*>/gi,'</$1>');
                     outChars = 
outChars.replace(/<(p|ul|li|hr|table|tr|td)[^>]*>(&nbsp;)<\/(p|ul|li|hr|table|tr|td)[^>]*>/gi,'');
        editor.setHTML(outChars);
}       

.
.
.
  parseTree(this._doc.body);
  //  Add cleaner here.
  customWordCleaner(this);

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.

Reply via email to