Author: toad
Date: 2006-03-28 19:39:13 +0000 (Tue, 28 Mar 2006)
New Revision: 8343
Modified:
trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java
trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
trunk/freenet/src/freenet/node/Version.java
Log:
581: HTML content filter improvements:
- i18n: we now correctly detect UTF-8 documents
- we now delete the actual scripts as well as the tags indicating them
- we escape < and > in comments
Modified: trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java
2006-03-28 18:11:19 UTC (rev 8342)
+++ trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java
2006-03-28 19:39:13 UTC (rev 8343)
@@ -9,7 +9,6 @@
import freenet.support.Bucket;
import freenet.support.BucketFactory;
-import freenet.support.BucketTools;
import freenet.support.Logger;
/**
@@ -167,8 +166,10 @@
if(handler.defaultCharset != null) {
try {
- if((charset =
handler.charsetExtractor.getCharset(data, handler.defaultCharset)) != null)
+ if((charset =
handler.charsetExtractor.getCharset(data, handler.defaultCharset)) != null) {
+
Logger.minor(ContentFilter.class, "Returning charset: "+charset);
return charset;
+ }
} catch (DataFilterException e) {
// Ignore
}
Modified: trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
2006-03-28 18:11:19 UTC (rev 8342)
+++ trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
2006-03-28 19:39:13 UTC (rev 8343)
@@ -328,6 +328,7 @@
boolean inStyle = false; // has to be set on or off explicitly
by tags
boolean inScript = false; // has to be set on or off explicitly
by tags
boolean killText = false; // has to be set on or off explicitly
by tags
+ boolean killStyle = false;
int styleScriptRecurseCount = 0;
String currentStyleScriptChunk = new String();
String writeAfterTag = "";
@@ -340,7 +341,8 @@
}
for(int i=0;i<s.length();i++) {
- if(s.charAt(i) < 32) {
+ char c = s.charAt(i);
+ if(c < 32 && c != '\n' && c != '\r' ) {
// Not a real character
// STRONGLY suggests somebody is using a bogus
charset.
// This could be in order to break the filter.
@@ -349,7 +351,7 @@
}
String style = s.toString();
- if (pc.inStyle) {
+ if (pc.inStyle || pc.inScript) {
pc.currentStyleScriptChunk += style;
return; // is parsed and written elsewhere
}
@@ -359,6 +361,8 @@
void processTag(Vector splitTag, Writer w, HTMLParseContext pc)
throws IOException, DataFilterException {
// First, check that it is a recognized tag
+ for(int i=0;i<splitTag.size();i++)
+ Logger.minor(this, "Tag["+i+"]="+splitTag.get(i));
ParsedTag t = new ParsedTag(splitTag);
if (!pc.killTag) {
t = t.sanitize(pc);
@@ -392,13 +396,25 @@
return; // ignore it
if (pc.inStyle || pc.inScript) {
- pc.currentStyleScriptChunk += "<" + s + ">";
+ pc.currentStyleScriptChunk += s;
return; // </style> handler should write
}
if (pc.killTag) {
pc.killTag = false;
return;
}
+ StringBuffer sb = new StringBuffer();
+ for(int i=0;i<s.length();i++) {
+ char c = s.charAt(i);
+ if(c == '<') {
+ sb.append("<");
+ } else if(c == '>') {
+ sb.append(">");
+ } else {
+ sb.append(c);
+ }
+ }
+ s = sb;
w.write('<');
w.write(s.toString());
w.write('>');
@@ -452,11 +468,13 @@
for (int x = 1; x < len; x++)
unparsedAttrs[x - 1] = (String)
v.elementAt(x);
}
+ Logger.minor(this, "Element = "+element);
}
public ParsedTag sanitize(HTMLParseContext pc) throws
DataFilterException {
TagVerifier tv =
(TagVerifier)
allowedTagsVerifiers.get(element.toLowerCase());
+ Logger.minor(this, "Got verifier: "+tv+" for "+element);
if (tv == null) {
if (deleteWierdStuff) {
return null;
@@ -1208,7 +1226,9 @@
Hashtable h,
Hashtable hn,
HTMLParseContext pc) throws DataFilterException {
+ Logger.minor(this, "Finishing script/style");
// Finishing
+ setStyle(false, pc);
pc.styleScriptRecurseCount--;
if (pc.styleScriptRecurseCount < 0) {
if (deleteErrors)
@@ -1218,15 +1238,20 @@
throwFilterException("Too many nested
</style> tags - ambiguous or invalid parsing, can't reliably filter so removing
the inner tags - garbage may appear in browser");
return null;
}
- setStyle(false, pc);
- processStyle(pc);
+ if(!pc.killStyle) {
+ processStyle(pc);
+ pc.writeStyleScriptWithTag = true;
+ } else {
+ pc.killStyle = false;
+ pc.currentStyleScriptChunk = "";
+ }
pc.expectingBadComment = false;
- pc.writeStyleScriptWithTag = true;
// Pass it on, no params for </style>
return hn;
}
Hashtable start(Hashtable h, Hashtable hn, HTMLParseContext pc)
throws DataFilterException {
+ Logger.minor(this, "Starting script/style");
pc.styleScriptRecurseCount++;
if (pc.styleScriptRecurseCount > 1) {
if (deleteErrors)
@@ -1241,7 +1266,7 @@
if (type != null) {
if (!type.equalsIgnoreCase("text/css") /* FIXME
*/
) {
- pc.killText = true;
+ pc.killStyle = true;
pc.expectingBadComment = true;
return null; // kill the tag
}
@@ -1299,8 +1324,9 @@
Hashtable sanitizeHash(
Hashtable hn,
ParsedTag p,
- HTMLParseContext pc) {
- //Hashtable h = super.sanitizeHash(hn, p, pc);
+ HTMLParseContext pc) throws DataFilterException {
+ // Call parent so we swallow the scripting
+ Hashtable h = super.sanitizeHash(hn, p, pc);
return null; // Lose the tags
}
@@ -1508,8 +1534,7 @@
Hashtable sanitizeHash(
Hashtable h,
ParsedTag p,
- HTMLParseContext pc,
- int linkHtl) throws DataFilterException {
+ HTMLParseContext pc) throws DataFilterException {
Hashtable hn = super.sanitizeHash(h, p, pc);
/*
* Several possibilities: a) meta http-equiv=X
content=Y b) meta
@@ -1519,6 +1544,7 @@
String name = getHashString(h, "name");
String content = getHashString(h, "content");
String scheme = getHashString(h, "scheme");
+ Logger.minor(this, "meta: name="+name+",
content="+content+", http-equiv="+http_equiv+", scheme="+scheme);
if (content != null) {
if (name != null && http_equiv == null) {
if (name.equalsIgnoreCase("Author")) {
@@ -1554,7 +1580,10 @@
}
// FIXME: add some more headers
- Dublin Core?
} else if
(http_equiv.equalsIgnoreCase("Content-Type")) {
+ Logger.minor(this, "Found
http-equiv content-type="+content);
String[] typesplit =
splitType(content);
+ for(int
i=0;i<typesplit.length;i++)
+ Logger.minor(this,
"["+i+"] = "+typesplit[i]);
if
(typesplit[0].equalsIgnoreCase("text/html")
&& (typesplit[1] == null
||
typesplit[1].equalsIgnoreCase(pc.charset))) {
@@ -1567,7 +1596,7 @@
: ""));
}
if(typesplit[1] != null)
- pc.detectedCharset =
typesplit[1];
+ pc.detectedCharset =
typesplit[1].trim();
} else if (
http_equiv.equalsIgnoreCase("Content-Language")) {
hn.put("http-equiv",
"Content-Language");
@@ -1660,8 +1689,7 @@
Hashtable sanitizeHash(
Hashtable h,
ParsedTag p,
- HTMLParseContext pc,
- int linkHtl) throws DataFilterException {
+ HTMLParseContext pc) throws DataFilterException {
Hashtable hn = super.sanitizeHash(h, p, pc);
String xmlns = getHashString(h, "xmlns");
if (xmlns != null &&
xmlns.equals("http://www.w3.org/1999/xhtml"))
@@ -1674,6 +1702,7 @@
Logger.debug(
HTMLFilter.class,
"Sanitizing style: " + style);
+ if(style == null) return null;
Reader r = new StringReader(style);
Writer w = new StringWriter();
CSSParser pc = new CSSParser(r, w, false, cb);
Modified: trunk/freenet/src/freenet/node/Version.java
===================================================================
--- trunk/freenet/src/freenet/node/Version.java 2006-03-28 18:11:19 UTC (rev
8342)
+++ trunk/freenet/src/freenet/node/Version.java 2006-03-28 19:39:13 UTC (rev
8343)
@@ -20,7 +20,7 @@
public static final String protocolVersion = "1.0";
/** The build number of the current revision */
- private static final int buildNumber = 580;
+ private static final int buildNumber = 581;
/** Oldest build of Fred we will talk to */
private static final int lastGoodBuild = 555;