Update of
/var/cvs/contributions/CMSContainer/cmsc/richtext/src/java/org/mmbase/applications/wordfilter
In directory
james.mmbase.org:/tmp/cvs-serv32469/cmsc/richtext/src/java/org/mmbase/applications/wordfilter
Modified Files:
Tag: v1_3
WordHtmlCleaner.java
Log Message:
CMSC-421 Wordfilter whitespaces
See also:
http://cvs.mmbase.org/viewcvs/contributions/CMSContainer/cmsc/richtext/src/java/org/mmbase/applications/wordfilter
See also: http://www.mmbase.org/jira/browse/CMSC-421
Index: WordHtmlCleaner.java
===================================================================
RCS file:
/var/cvs/contributions/CMSContainer/cmsc/richtext/src/java/org/mmbase/applications/wordfilter/WordHtmlCleaner.java,v
retrieving revision 1.9
retrieving revision 1.9.2.1
diff -u -b -r1.9 -r1.9.2.1
--- WordHtmlCleaner.java 7 Aug 2007 08:58:51 -0000 1.9
+++ WordHtmlCleaner.java 5 May 2008 15:26:20 -0000 1.9.2.1
@@ -80,7 +80,8 @@
xmlVersion += data[i]; // nog even het afsluitende haakje
// toevoegen
continue;
- } else if (data[i + 1] == '!') {
+ }
+ else if (data[i + 1] == '!') {
while (data[i] != '>') {
docType += data[i];
i++;
@@ -129,7 +130,8 @@
// xmlStr = shrinkBR(xmlStr);
log.debug("new value : " + xmlStr);
return xmlStr;
- } catch (IllegalStateException e) {
+ }
+ catch (IllegalStateException e) {
log.error("Clean html failed");
log.error(Logging.stackTrace(e));
}
@@ -145,8 +147,7 @@
private static String niceHtml(String xmlStr) {
try {
- xmlbs.XMLBS xmlbs = new xmlbs.XMLBS("<body>" + xmlStr
- + "</body>", xmlbsDTD);
+ xmlbs.XMLBS xmlbs = new xmlbs.XMLBS("<body>" + xmlStr + "</body>",
xmlbsDTD);
xmlbs.setRemoveEmptyTags(false); // Uitgezet omdat de <td/><td/>
onterecht werd gemerged
xmlbs.process();
ByteArrayOutputStream bout = new ByteArrayOutputStream();
@@ -160,14 +161,16 @@
if (i != -1) {
xmlStr = xmlStr.substring(0, i);
}
- } catch (Throwable t) {
+ }
+ catch (Throwable t) {
log.error(Logging.stackTrace(t));
}
return xmlStr;
}
/**
- * CMSC-416: FP: Using the DOTALL pattern matcher parameter, will solve
problems with linebreaks in hidden if blocks
+ * CMSC-416: FP: Using the DOTALL pattern matcher parameter, will solve
+ * problems with linebreaks in hidden if blocks
*/
private static String removeHtmlIfComments(String text) {
Pattern pattern =
Pattern.compile("<!--\\[if.*?endif]-->",Pattern.DOTALL);
@@ -192,17 +195,15 @@
}
private static String replaceParagraph(String text) {
+ // see CMSC-421 when you are going to change this code
+
// remove <p></p> (empty paragraphs)
-// CMSC-421: FP: Commented this out, because this is eating whitespace!
-// text =
text.replaceAll("<\\s{0,1}[pP]{1}\\s{0,1}></\\s{0,1}[pP]{1}\\s{0,1}>", "");
+ text = text.replaceAll("<[pP]{1}>\\s*</[pP]{1}>", "");
- // remove all remaining <p>
+ // remove all remaining <p> start tags
text = text.replaceAll("<\\s*[pP]{1}\\s*.*?>", "");
-
- // replace all remaining </p> with a <br><br>
-// CMSC-421: FP: Changed this to two newlines, because it was eating
whitespace
+ // replace all remaining </p> closing tags with a <br><br>
text = text.replaceAll("<\\s*/[pP]{1}\\s*.*?>", "<br/><br/>");
-
// remove all <br> at the end
text = text.replaceAll("(<\\s*[bB][rR]\\s*/?>|\\s| )+\\z", "");
return text;
@@ -211,7 +212,7 @@
private static String replaceHeaders(String text) {
// remove the starting header tags ( <h1> till <h7>)
text = text.replaceAll("<\\s*[hH]{1}[1-7]{1}\\s*.*?>",
"<strong>");
- // replace all remaining </p> with a <br><br>
+ // replace all remaining ending header tags ( </h1> till </h7>)
text = text.replaceAll("<\\s*/[hH]{1}[1-7]{1}\\s*.*?>",
"</strong><br />");
// remove all <br> at the end
text = text.replaceAll("(<\\s*[bB][rR]\\s*/?>|\\s| )+\\z",
"");
@@ -233,11 +234,11 @@
xml += xmlStr.substring(end, begin);
end = nextResult(xmlStr, "</U></FONT>", begin);
if (end > -1) {
- String link = xmlStr.substring(begin
- + "<U><FONT color=#0000ff>".length(), end);
+ String link = xmlStr.substring(begin + "<U><FONT
color=#0000ff>".length(), end);
xml += "<a href=\"" + stripHtml(link) + "\">" + link + "</a>";
end += "</U></FONT>".length();
- } else {
+ }
+ else {
xml += "<U><FONT color=#0000ff>";
end = begin + "<U><FONT color=#0000ff>".length();
}
@@ -251,8 +252,9 @@
/**
- * CMSC-417: FWP, this method fixes the problem with the 'ugly' lists
sometimes pasted from word,
- * these lists are created by adding spaces and tabs before and behind the
dots of the lists.
+ * CMSC-417: FWP, this method fixes the problem with the 'ugly' lists
+ * sometimes pasted from word, these lists are created by adding spaces and
+ * tabs before and behind the dots of the lists.
*/
private static String fixBadLists(String text) {
text = text.replaceAll("[??]", "");
@@ -285,7 +287,8 @@
if (end > -1) {
end += "</li>".length();
xml += xmlStr.substring(begin, end);
- } else {
+ }
+ else {
end = nextResult(xmlStr, "<li>", begin + "<li>".length());
if (end == -1) {
end = xmlStr.length();
@@ -302,13 +305,13 @@
if (end <= endList) {
xml += xmlStr.substring(begin, end) + "</li>";
end -= 1;
- } else {
+ }
+ else {
if (end > endList) {
xml += xmlStr.substring(begin, endList) + "</li>";
end = endList;
if (endList != xmlStr.length()) {
- xml += xmlStr.substring(endList, (endList + "</ol>"
- .length()));
+ xml += xmlStr.substring(endList, (endList +
"</ol>".length()));
end += "</ol>".length();
}
}
@@ -345,12 +348,14 @@
hrefBegin += "href=\"".length();
int hrefEnd = atag.indexOf("\"", hrefBegin);
xml += atag + atag.substring(hrefBegin, hrefEnd) + "</a>";
- } else if (nameBegin > -1) {
+ }
+ else if (nameBegin > -1) {
xml += atag + "</a>";
}
end += "</a>".length();
- } else {
+ }
+ else {
end += "</a>".length();
xml += xmlStr.substring(begin, end);
}
@@ -375,7 +380,8 @@
end = closinggt;
xml += xmlStr.substring(begin, end) + "></a>";
end += 2;
- } else {
+ }
+ else {
end = gt + 1;
xml += xmlStr.substring(begin, end);
}
_______________________________________________
Cvs mailing list
[email protected]
http://lists.mmbase.org/mailman/listinfo/cvs