Author: toad
Date: 2006-03-18 15:18:54 +0000 (Sat, 18 Mar 2006)
New Revision: 8275
Added:
trunk/freenet/src/freenet/clients/http/filter/CSSParser.java
trunk/freenet/src/freenet/clients/http/filter/CSSReadFilter.java
trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.java
trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.jflex
trunk/freenet/src/freenet/clients/http/filter/DataFilterException.java
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
trunk/freenet/src/freenet/clients/http/filter/KnownUnsafeContentTypeException.java
trunk/freenet/src/freenet/clients/http/filter/NullFilterCallback.java
trunk/freenet/src/freenet/clients/http/filter/UnknownContentTypeException.java
trunk/freenet/src/freenet/clients/http/filter/UnsafeContentTypeException.java
trunk/freenet/src/freenet/clients/http/filter/Yytoken.java
trunk/freenet/src/freenet/support/io/NullWriter.java
Modified:
trunk/freenet/src/freenet/clients/http/FproxyToadlet.java
trunk/freenet/src/freenet/clients/http/ToadletContext.java
trunk/freenet/src/freenet/clients/http/ToadletContextImpl.java
trunk/freenet/src/freenet/clients/http/filter/CharsetExtractor.java
trunk/freenet/src/freenet/clients/http/filter/ContentDataFilter.java
trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java
trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java
trunk/freenet/src/freenet/clients/http/filter/MIMEType.java
trunk/freenet/src/freenet/node/TextModeClientInterface.java
trunk/freenet/src/freenet/node/Version.java
Log:
544:
Anonymity filter.
Modified: trunk/freenet/src/freenet/clients/http/FproxyToadlet.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/FproxyToadlet.java 2006-03-18
14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/FproxyToadlet.java 2006-03-18
15:18:54 UTC (rev 8275)
@@ -6,10 +6,15 @@
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
import freenet.client.FetchException;
import freenet.client.FetchResult;
import freenet.client.HighLevelSimpleClient;
+import freenet.clients.http.filter.ContentFilter;
+import freenet.clients.http.filter.MIMEType;
+import freenet.clients.http.filter.UnsafeContentTypeException;
import freenet.config.Config;
import freenet.config.InvalidConfigValueException;
import freenet.config.SubConfig;
@@ -19,15 +24,21 @@
import freenet.pluginmanager.HTTPRequest;
import freenet.pluginmanager.PproxyToadlet;
import freenet.support.Bucket;
-import freenet.support.BucketTools;
import freenet.support.HTMLEncoder;
+import freenet.support.HexUtil;
import freenet.support.Logger;
import freenet.support.MultiValueTable;
public class FproxyToadlet extends Toadlet {
- public FproxyToadlet(HighLevelSimpleClient client) {
+ final byte[] random;
+
+ // ?force= links become invalid after 2 hours.
+ long FORCE_GRAIN_INTERVAL = 60*60*1000;
+
+ public FproxyToadlet(HighLevelSimpleClient client, byte[] random) {
super(client);
+ this.random = random;
}
public String supportedMethods() {
@@ -54,10 +65,9 @@
//String ks = uri.toString();
String ks = uri.getPath();
- HTTPRequest request = new HTTPRequest(uri);
+ HTTPRequest httprequest = new HTTPRequest(uri);
if (ks.equals("/")) {
- HTTPRequest httprequest = new HTTPRequest(uri);
if (httprequest.isParameterSet("key")) {
MultiValueTable headers = new MultiValueTable();
@@ -88,7 +98,53 @@
try {
Logger.minor(this, "Fproxy fetching "+key);
FetchResult result = fetch(key);
- writeReply(ctx, 200, result.getMimeType(), "OK",
result.asBucket());
+
+ // Now, is it safe?
+
+ Bucket data = result.asBucket();
+
+ String typeName = result.getMimeType();
+
+ String reqParam = httprequest.getParam("type", null);
+
+ if(reqParam != null)
+ typeName = reqParam;
+
+ Logger.minor(this, "Type: "+typeName+"
("+result.getMimeType()+" "+reqParam+")");
+
+ long now = System.currentTimeMillis();
+
+ String forceString = httprequest.getParam("force");
+ boolean force = false;
+ if(forceString != null) {
+ if(forceString.equals(getForceValue(key, now))
||
+
forceString.equals(getForceValue(key, now-FORCE_GRAIN_INTERVAL)))
+ force = true;
+ }
+
+ try {
+ if(!force)
+ data = ContentFilter.filter(data,
ctx.getBucketFactory(), typeName);
+
+ // Send the data, intact
+ writeReply(ctx, 200, typeName, "OK", data);
+ } catch (UnsafeContentTypeException e) {
+ StringBuffer buf = new StringBuffer();
+ ctx.getPageMaker().makeHead(buf, "Potentially
Dangerous Content");
+ buf.append("<h1>");
+ buf.append(e.getHTMLEncodedTitle());
+ buf.append("</h1>\n");
+ buf.append(e.getExplanation());
+ buf.append("<p>Your options are:</p><ul>\n");
+ buf.append("<li><a
href=\"/"+key.toString(false)+"?type=text/plain\">Click here</a> to open the
file as plain text (this should not be dangerous, but it may be
garbled).</li>\n");
+ // FIXME: is this safe? See bug #131
+ buf.append("<li><a
href=\"/"+key.toString(false)+"?type=application/x-msdownload\">Click here</a>
to force your browser to download the file to disk.</li>\n");
+ buf.append("<li><a
href=\"/"+key.toString(false)+"?force="+getForceValue(key, now)+"\">Click
here</a> to open the file as "+HTMLEncoder.encode(typeName)+".</li>\n");
+ buf.append("<li><a href=\"/\">Click here</a> to
go to the FProxy home page.</li>\n");
+ buf.append("</ul>");
+ ctx.getPageMaker().makeTail(buf);
+ writeReply(ctx, 200, "text/html", "OK",
buf.toString());
+ }
} catch (FetchException e) {
String msg = e.getMessage();
String extra = "";
@@ -108,6 +164,18 @@
}
}
+ private String getForceValue(FreenetURI key, long time) {
+ try {
+ MessageDigest md5 =
MessageDigest.getInstance("SHA-256");
+ md5.update(random);
+ md5.update(key.toString(false).getBytes());
+ md5.update(Long.toString(time /
FORCE_GRAIN_INTERVAL).getBytes());
+ return HexUtil.bytesToHex(md5.digest());
+ } catch (NoSuchAlgorithmException e) {
+ throw new Error(e);
+ }
+ }
+
public static void maybeCreateFproxyEtc(Node node, Config config)
throws IOException, InvalidConfigValueException {
SubConfig fproxyConfig = new SubConfig("fproxy", config);
@@ -118,7 +186,9 @@
HighLevelSimpleClient client =
node.makeClient(RequestStarter.INTERACTIVE_PRIORITY_CLASS);
node.setToadletContainer(server);
- FproxyToadlet fproxy = new FproxyToadlet(client);
+ byte[] random = new byte[32];
+ node.random.nextBytes(random);
+ FproxyToadlet fproxy = new FproxyToadlet(client,
random);
node.setFproxy(fproxy);
server.register(fproxy, "/", false);
@@ -141,7 +211,7 @@
server.register(darknetToadlet, "/darknet/", true);
} catch (IOException ioe) {
- Logger.error(node,"Failed to start fproxy on
"+fproxyConfig.getString("bindTo")+":"+fproxyConfig.getInt("port"));
+ Logger.error(node,"Failed to start fproxy: "+ioe, ioe);
}
fproxyConfig.finishedInitialization();
Modified: trunk/freenet/src/freenet/clients/http/ToadletContext.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/ToadletContext.java 2006-03-18
14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/ToadletContext.java 2006-03-18
15:18:54 UTC (rev 8275)
@@ -3,6 +3,7 @@
import java.io.IOException;
import freenet.support.Bucket;
+import freenet.support.BucketFactory;
import freenet.support.MultiValueTable;
/**
@@ -35,5 +36,7 @@
* Get the page maker object.
*/
PageMaker getPageMaker();
+
+ BucketFactory getBucketFactory();
}
Modified: trunk/freenet/src/freenet/clients/http/ToadletContextImpl.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/ToadletContextImpl.java
2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/ToadletContextImpl.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -32,18 +32,19 @@
private final MultiValueTable headers;
private final OutputStream sockOutputStream;
private final PageMaker pagemaker;
+ private final BucketFactory bf;
/** Is the context closed? If so, don't allow any more writes. This is
because there
* may be later requests.
*/
private boolean closed;
- public ToadletContextImpl(Socket sock, MultiValueTable headers, String
CSSName) throws IOException {
+ public ToadletContextImpl(Socket sock, MultiValueTable headers, String
CSSName, BucketFactory bf) throws IOException {
this.sock = sock;
this.headers = headers;
this.closed = false;
sockOutputStream = sock.getOutputStream();
-
+ this.bf = bf;
pagemaker = new PageMaker(CSSName);
}
@@ -185,7 +186,7 @@
boolean shouldDisconnect =
shouldDisconnectAfterHandled(split[2].equals("HTTP/1.0"), headers);
- ToadletContextImpl ctx = new
ToadletContextImpl(sock, headers, container.getCSSName());
+ ToadletContextImpl ctx = new
ToadletContextImpl(sock, headers, container.getCSSName(), bf);
/*
* if we're handling a POST, copy the data into
a bucket now,
@@ -315,4 +316,8 @@
if(closed) throw new ToadletContextClosedException();
BucketTools.copyTo(data, sockOutputStream, Long.MAX_VALUE);
}
+
+ public BucketFactory getBucketFactory() {
+ return bf;
+ }
}
Added: trunk/freenet/src/freenet/clients/http/filter/CSSParser.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/CSSParser.java
2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/CSSParser.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,59 @@
+package freenet.clients.http.filter;
+
+import java.io.Reader;
+import java.io.Writer;
+
+import freenet.support.Logger;
+
+/**
+ * WARNING: this is not as thorough as the HTML filter - we do not
+ * enumerate all possible attributes etc. New versions of the spec could
+ * conceivably lead to new risks How this would happen: a) Another way to
+ * include URLs, apart from @import and url() (we are safe from new @
+ * directives though) b) A way to specify the MIME type of includes, IF
+ * those includes could be a risky type (HTML, CSS, etc) This is still FAR
+ * more rigorous than the old filter though.
+ * <p>
+ * If you want extra paranoia, turn on paranoidStringCheck, which will
+ * throw an exception when it encounters strings with colons in; then the
+ * only risk is something that includes, and specifies the type of, HTML,
+ * XML or XSL.
+ * </p>
+ */
+class CSSParser extends CSSTokenizerFilter {
+
+ final FilterCallback cb;
+
+ CSSParser(
+ Reader r,
+ Writer w,
+ boolean paranoidStringCheck,
+ FilterCallback cb) {
+ super(r, w, paranoidStringCheck);
+ this.cb = cb;
+ this.deleteErrors = super.deleteErrors;
+ }
+
+ void throwError(String s) throws DataFilterException {
+ HTMLFilter.throwFilterException(s);
+ }
+
+ String processImportURL(String s) {
+ return "\""
+ + HTMLFilter.sanitizeURI(HTMLFilter.stripQuotes(s),
"text/css", null, cb)
+ + "\"";
+ }
+
+ String processURL(String s) {
+ return HTMLFilter.sanitizeURI(HTMLFilter.stripQuotes(s), null,
null, cb);
+ }
+
+ void log(String s) {
+ if (Logger.shouldLog(Logger.DEBUG, this))
+ Logger.debug(this, s);
+ }
+
+ void logError(String s) {
+ Logger.error(this, s);
+ }
+}
\ No newline at end of file
Added: trunk/freenet/src/freenet/clients/http/filter/CSSReadFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/CSSReadFilter.java
2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/CSSReadFilter.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,83 @@
+package freenet.clients.http.filter;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.util.HashMap;
+
+import freenet.support.Bucket;
+import freenet.support.BucketFactory;
+import freenet.support.HTMLEncoder;
+import freenet.support.Logger;
+import freenet.support.io.NullWriter;
+
+public class CSSReadFilter implements ContentDataFilter, CharsetExtractor {
+
+ public Bucket readFilter(Bucket bucket, BucketFactory bf, String
charset,
+ HashMap otherParams, FilterCallback cb) throws
DataFilterException,
+ IOException {
+ if (Logger.shouldLog(Logger.DEBUG, this))
+ Logger.debug(
+ this,
+ "running "
+ + this
+ + " on "
+ + bucket
+ + ","
+ + charset);
+ InputStream strm = bucket.getInputStream();
+ Bucket temp = bf.makeBucket(bucket.size());
+ OutputStream os = temp.getOutputStream();
+ Reader r;
+ Writer w;
+ try {
+ r = new BufferedReader(new InputStreamReader(strm,
charset), 32768);
+ w = new BufferedWriter(new OutputStreamWriter(os,
charset), 32768);
+ } catch (UnsupportedEncodingException e) {
+ os.close();
+ strm.close();
+ throw new DataFilterException("Warning: Unknown
character set ("+charset+")", "Warning: Unknown character set
("+HTMLEncoder.encode(charset)+")",
+ "<p><b>Unknown character set</b> The
page you are about to display has an unknown character set. "+
+ "This means that we are not able to
filter the page, and it may compromize your anonymity.");
+ }
+ CSSParser parser = new CSSParser(r, w, false, cb);
+ parser.parse();
+ r.close();
+ w.close();
+ return temp;
+ }
+
+ public Bucket writeFilter(Bucket data, BucketFactory bf, String charset,
+ HashMap otherParams, FilterCallback cb) throws
DataFilterException,
+ IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ public String getCharset(Bucket bucket, String parseCharset) throws
DataFilterException, IOException {
+ InputStream strm = bucket.getInputStream();
+ Writer w = new NullWriter();
+ Reader r;
+ try {
+ r = new BufferedReader(new InputStreamReader(strm,
parseCharset), 32768);
+ } catch (UnsupportedEncodingException e) {
+ throw new Error(e);
+ }
+ CSSParser parser = new CSSParser(r, w, false, new
NullFilterCallback());
+ try {
+ parser.parse();
+ } catch (Throwable t) {
+ // Ignore ALL errors!
+ Logger.minor(this, "Caught "+t+" trying to detect MIME
type with "+parseCharset);
+ }
+ r.close();
+ return parser.detectedCharset;
+ }
+
+}
Added: trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.java
2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,1499 @@
+/* The following code was generated by JFlex 1.3.5 on 18/03/06 13:46 */
+
+package freenet.clients.http.filter;
+import java.io.*;
+import java.util.*;
+/* This class tokenizes a CSS2 Reader stream, writes it out to the output
Writer, and filters any URLs found */
+// WARNING: this is not as thorough as the HTML parser - new versions of the
standard could lead to anonymity risks. See comments in SaferFilter.java
+// Mostly from http://www.w3.org/TR/REC-CSS2/grammar.html
+
+
+/**
+ * This class is a scanner generated by
+ * <a href="http://www.jflex.de/">JFlex</a> 1.3.5
+ * on 18/03/06 13:46 from the specification file
+ * <tt>file:/usr/src/cvs/eclipse-workspace/Freenet
0.7/src/freenet/clients/http/filter/CSSTokenizerFilter.jflex</tt>
+ */
+class CSSTokenizerFilter {
+
+ /** This character denotes the end of file */
+ final public static int YYEOF = -1;
+
+ /** initial size of the lookahead buffer */
+ final private static int YY_BUFFERSIZE = 16384;
+
+ /** lexical states */
+ final public static int YYINITIAL = 0;
+
+ /**
+ * Translates characters to character classes
+ */
+ final private static String yycmap_packed =
+ "\11\0\1\3\1\3\1\0\1\3\1\3\22\0\1\5\1\43\1\11"+
+ "\1\25\1\4\1\63\1\4\1\12\1\23\1\14\1\41\1\56\1\26"+
+ "\1\10\1\13\1\40\12\1\1\64\1\35\1\42\1\46\1\44\1\24"+
+ "\1\31\1\50\1\6\1\54\1\52\1\51\1\53\1\22\1\60\1\20"+
+ "\1\7\1\62\1\30\1\32\1\21\1\34\1\33\1\7\1\17\1\15"+
+ "\1\16\1\27\1\7\1\55\1\57\1\7\1\61\1\4\1\2\4\4"+
+ "\1\50\1\6\1\54\1\52\1\51\1\53\1\22\1\60\1\20\1\7"+
+ "\1\62\1\30\1\32\1\21\1\34\1\33\1\7\1\17\1\15\1\16"+
+ "\1\27\1\7\1\55\1\57\1\7\1\61\1\36\1\47\1\37\1\45"+
+ "\uff81\0";
+
+ /**
+ * Translates characters to character classes
+ */
+ final private static char [] yycmap = yy_unpack_cmap(yycmap_packed);
+
+ /**
+ * Translates a state to a row index in the transition table
+ */
+ final private static int yy_rowMap [] = {
+ 0, 53, 106, 159, 212, 265, 318, 371, 424, 477,
+ 53, 530, 53, 583, 636, 53, 689, 742, 795, 848,
+ 901, 954, 1007, 159, 1060, 477, 53, 1113, 53, 53,
+ 1166, 1219, 371, 1272, 53, 424, 1325, 1378, 1431, 1484,
+ 1537, 1590, 1643, 1696, 1749, 1802, 1855, 1908, 1961, 2014,
+ 2067, 2120, 2173, 53, 53, 2226, 2279, 2332, 53, 371,
+ 424, 2385, 2438, 2491, 2544, 2597, 2650, 2703, 2756, 2809,
+ 2862, 2915, 2968, 3021, 3074, 3127, 3180, 3233, 53, 3286,
+ 3339, 3392, 3445, 3498, 3551, 3604, 3657, 3710, 3763, 3816,
+ 3869, 3922, 3975, 4028, 4081, 4134, 4187, 4240, 1696, 4293,
+ 4346, 4399, 4452, 4505, 4558, 53, 4611, 53, 4664, 4717,
+ 4770, 4823, 4876, 4929, 4982, 4081, 5035, 5088, 53, 5141,
+ 5194, 5247, 5300, 5353, 5406, 5459, 1696, 5512, 5565, 2067,
+ 5618, 5671, 5724, 5777, 5830, 4929, 5883, 5936, 5989, 6042,
+ 6095, 6148, 53, 6201, 6254, 6307, 6360, 6413, 1696, 6466,
+ 6519, 6572, 6625, 6678, 1537, 6731, 6784, 5035, 5989, 6837,
+ 6890, 6943, 5088, 6095, 6996, 5141, 7049, 7102, 7155, 7208,
+ 7261, 7314, 7367, 7420, 7473, 7526, 5989, 7579, 6095, 7632,
+ 7685, 53, 7738, 7791, 7844, 7897, 7950, 8003, 8056, 8109,
+ 8162, 8162, 8215, 8268, 8321, 8374, 8427, 8480, 8533, 8586,
+ 7897, 7897, 8639, 8692, 8745, 8798, 8851, 8798, 8162, 8904,
+ 8957, 9010, 9063, 9116, 9169, 53, 9222, 9275, 9328, 9381,
+ 9434, 53, 9487, 9540, 9593, 9646, 9699, 1696, 9752, 9805,
+ 9858, 9911, 9911, 9964, 10017, 10070, 10123, 10176, 8056, 10229,
+ 10282, 10335, 10388, 10441, 8798, 10494, 10547, 10600, 9328, 10653,
+ 53, 10706, 10759, 10812, 10865, 10918, 10971, 11024, 11077, 11130,
+ 11183, 11236, 11289, 11342, 11289, 11395, 11448, 11501, 9063, 11554,
+ 11607, 11660, 11713, 11766, 11819, 11872, 11925, 11978, 12031, 12084,
+ 12137, 12190, 9911, 53, 12243, 12296, 12349, 12402, 12455, 12508,
+ 12561, 12614, 12667, 12720, 12773, 12826, 12879, 12932, 12985, 13038,
+ 13091, 13144, 13197, 13250, 13303
+ };
+
+ /**
+ * The packed transition table of the DFA (part 0)
+ */
+ final private static String yy_packed0 =
+ "\1\2\1\3\1\4\1\5\1\2\1\5\2\6\1\7"+
+ "\1\10\1\11\1\12\1\13\6\6\2\2\1\14\1\15"+
+ "\1\16\1\6\1\17\3\6\1\20\1\21\1\22\1\23"+
+ "\1\15\1\24\1\25\1\15\1\26\1\2\1\27\6\6"+
+ "\1\15\4\6\1\2\1\15\66\0\1\3\1\30\1\31"+
+ "\1\0\1\31\3\6\2\0\1\32\1\0\6\6\1\33"+
+ "\2\0\1\34\2\6\1\0\3\6\1\35\12\0\6\6"+
+ "\1\0\4\6\1\36\2\0\1\37\1\6\1\0\2\6"+
+ "\1\37\41\6\5\37\10\6\3\0\1\5\1\0\1\5"+
+ "\30\0\1\21\1\22\26\0\1\6\1\30\1\31\1\0"+
+ "\1\31\3\6\4\0\6\6\1\33\2\0\1\34\2\6"+
+ "\1\0\3\6\1\35\12\0\6\6\1\0\4\6\12\0"+
+ "\1\40\54\0\2\41\1\42\6\41\1\43\53\41\2\44"+
+ "\1\45\7\44\1\43\52\44\1\0\1\46\64\0\1\47"+
+ "\1\50\3\0\1\47\2\51\4\0\6\51\4\0\2\51"+
+ "\1\0\3\51\13\0\5\47\1\51\1\0\4\51\3\0"+
+ "\1\6\1\30\1\31\1\0\1\31\3\6\4\0\2\6"+
+ "\1\52\3\6\1\33\2\0\1\34\2\6\1\0\3\6"+
+ "\1\35\12\0\6\6\1\53\4\6\3\0\1\54\1\55"+
+ "\3\0\2\54\5\0\3\54\1\56\2\54\4\0\2\54"+
+ "\1\0\1\57\1\60\1\54\13\0\3\54\1\61\1\62"+
+ "\1\54\1\0\4\54\5\0\1\21\1\0\1\21\62\0"+
+ "\1\22\1\0\1\22\120\0\1\63\66\0\1\64\57\0"+
+ "\1\65\74\0\1\66\64\0\1\67\21\0\1\31\1\0"+
+ "\1\31\20\0\1\34\6\0\1\35\30\0\1\70\1\71"+
+ "\1\34\1\0\1\34\2\70\5\0\6\70\4\0\2\70"+
+ "\1\0\3\70\13\0\6\70\1\0\4\70\3\0\1\72"+
+ "\1\30\1\6\1\0\1\6\1\72\2\6\4\0\6\6"+
+ "\1\33\2\0\1\34\2\6\1\0\3\6\1\35\12\0"+
+ "\5\72\1\6\1\0\4\6\46\0\1\73\20\0\2\41"+
+ "\1\42\6\41\1\74\53\41\2\44\1\45\7\44\1\75"+
+ "\52\44\1\0\1\46\13\0\1\36\1\0\1\76\1\77"+
+ "\1\0\1\100\7\0\1\101\1\102\15\0\1\103\1\104"+
+ "\1\0\1\105\3\0\1\106\1\0\1\107\1\36\2\0"+
+ "\1\110\1\50\3\0\1\110\2\51\4\0\6\51\4\0"+
+ "\2\51\1\0\3\51\13\0\5\110\1\51\1\0\4\51"+
+ "\3\0\1\111\1\51\1\0\2\51\1\111\41\51\5\111"+
+ "\10\51\1\0\1\51\1\50\3\0\3\51\4\0\6\51"+
+ "\4\0\2\51\1\0\3\51\13\0\6\51\1\0\4\51"+
+ "\3\0\1\6\1\30\1\31\1\0\1\31\3\6\4\0"+
+ "\6\6\1\33\2\0\1\34\1\6\1\112\1\0\3\6"+
+ "\1\35\12\0\6\6\1\0\4\6\3\0\1\113\4\0"+
+ "\1\113\15\0\1\114\23\0\5\113\10\0\1\115\1\54"+
+ "\1\116\3\115\3\54\1\0\3\115\6\54\4\115\2\54"+
+ "\1\115\3\54\1\117\1\115\1\117\10\115\6\54\1\115"+
+ "\4\54\2\115\1\0\1\120\1\54\1\0\2\54\1\120"+
+ "\41\54\5\120\10\54\1\115\1\54\1\116\3\115\3\54"+
+ "\1\0\3\115\6\54\4\115\2\54\1\115\1\121\2\54"+
+ "\1\117\1\115\1\117\10\115\6\54\1\115\4\54\3\115"+
+ "\1\54\1\116\3\115\3\54\1\0\3\115\6\54\4\115"+
+ "\2\54\1\115\3\54\1\117\1\115\1\117\10\115\1\54"+
+ "\1\122\4\54\1\115\4\54\3\115\1\54\1\116\3\115"+
+ "\3\54\1\0\3\115\6\54\4\115\2\54\1\115\3\54"+
+ "\1\117\1\115\1\117\10\115\1\123\5\54\1\115\4\54"+
+ "\3\115\1\54\1\116\3\115\3\54\1\0\3\115\6\54"+
+ "\4\115\2\54\1\115\2\54\1\124\1\117\1\115\1\117"+
+ "\10\115\6\54\1\115\4\54\3\115\1\54\1\116\3\115"+
+ "\3\54\1\0\3\115\6\54\4\115\2\54\1\115\3\54"+
+ "\1\117\1\115\1\117\10\115\6\54\1\115\1\54\1\125"+
+ "\2\54\2\115\41\63\1\126\23\63\10\0\1\127\131\0"+
+ "\1\130\10\0\1\70\1\71\1\31\1\0\1\31\3\70"+
+ "\4\0\6\70\3\0\1\34\2\70\1\0\3\70\1\35"+
+ "\12\0\6\70\1\0\4\70\3\0\1\131\1\70\1\0"+
+ "\2\70\1\131\41\70\5\131\10\70\1\0\1\132\1\30"+
+ "\1\6\1\0\1\6\1\132\2\6\4\0\6\6\1\33"+
+ "\2\0\1\34\2\6\1\0\3\6\1\35\12\0\5\132"+
+ "\1\6\1\0\4\6\52\0\1\133\35\0\1\36\62\0"+
+ "\1\76\62\0\1\36\14\0\1\36\106\0\1\36\2\0"+
+ "\1\36\37\0\1\36\24\0\1\36\56\0\1\134\45\0"+
+ "\1\36\113\0\1\36\63\0\1\106\5\0\1\135\1\50"+
+ "\3\0\1\135\2\51\4\0\6\51\4\0\2\51\1\0"+
+ "\3\51\13\0\5\135\1\51\1\0\4\51\3\0\1\136"+
+ "\1\50\1\51\1\0\1\51\1\136\2\51\4\0\6\51"+
+ "\4\0\2\51\1\0\3\51\13\0\5\136\1\51\1\0"+
+ "\4\51\3\0\1\6\1\30\1\31\1\0\1\31\3\6"+
+ "\4\0\6\6\1\137\2\0\1\34\2\6\1\0\3\6"+
+ "\1\35\12\0\6\6\1\0\4\6\3\0\1\140\4\0"+
+ "\1\140\1\0\1\141\13\0\1\142\23\0\5\140\34\0"+
+ "\1\142\40\0\11\115\1\0\23\115\1\117\1\115\1\117"+
+ "\26\115\1\120\1\54\1\115\2\54\1\120\26\54\1\143"+
+ "\1\54\1\143\10\54\5\120\10\54\1\115\1\144\1\116"+
+ "\1\54\1\115\1\54\1\144\2\54\1\0\3\115\6\54"+
+ "\4\115\2\54\1\115\3\54\1\117\1\115\1\117\10\115"+
+ "\5\144\1\54\1\115\4\54\3\115\1\54\1\116\3\115"+
+ "\3\54\1\0\3\115\6\54\4\115\2\54\1\115\1\54"+
+ "\1\145\1\54\1\117\1\115\1\117\10\115\6\54\1\115"+
+ "\4\54\3\115\1\54\1\116\3\115\3\54\1\0\3\115"+
+ "\6\54\4\115\2\54\1\115\3\54\1\117\1\115\1\117"+
+ "\10\115\2\54\1\146\3\54\1\115\4\54\3\115\1\54"+
+ "\1\116\3\115\3\54\1\0\3\115\5\54\1\147\4\115"+
+ "\2\54\1\115\3\54\1\117\1\115\1\117\10\115\6\54"+
+ "\1\115\4\54\3\115\1\54\1\116\3\115\3\54\1\0"+
+ "\3\115\4\54\1\150\1\54\4\115\2\54\1\115\3\54"+
+ "\1\117\1\115\1\117\10\115\6\54\1\115\4\54\3\115"+
+ "\1\54\1\116\3\115\3\54\1\0\3\115\6\54\4\115"+
+ "\2\54\1\115\3\54\1\117\1\115\1\117\10\115\1\151"+
+ "\5\54\1\115\4\54\2\115\40\63\1\152\1\153\23\63"+
+ "\10\0\1\154\113\0\1\155\26\0\1\156\1\71\1\70"+
+ "\1\0\1\70\1\156\2\70\4\0\6\70\3\0\1\34"+
+ "\2\70\1\0\3\70\1\35\12\0\5\156\1\70\1\0"+
+ "\4\70\3\0\1\157\1\30\1\6\1\0\1\6\1\157"+
+ "\2\6\4\0\6\6\1\33\2\0\1\34\2\6\1\0"+
+ "\3\6\1\35\12\0\5\157\1\6\1\0\4\6\54\0"+
+ "\1\36\34\0\1\36\43\0\1\160\1\50\3\0\1\160"+
+ "\2\51\4\0\6\51\4\0\2\51\1\0\3\51\13\0"+
+ "\5\160\1\51\1\0\4\51\3\0\1\161\1\50\1\51"+
+ "\1\0\1\51\1\161\2\51\4\0\6\51\4\0\2\51"+
+ "\1\0\3\51\13\0\5\161\1\51\1\0\4\51\2\0"+
+ "\2\162\1\163\1\164\1\162\1\164\3\162\1\165\1\166"+
+ "\1\162\1\167\6\162\1\170\41\162\1\0\1\171\4\0"+
+ "\1\171\1\0\1\141\13\0\1\172\23\0\5\171\11\0"+
+ "\1\173\4\0\1\173\41\0\5\173\34\0\1\172\40\0"+
+ "\1\115\1\174\1\116\1\54\1\115\1\54\1\174\2\54"+
+ "\1\0\3\115\6\54\4\115\2\54\1\115\3\54\1\117"+
+ "\1\115\1\117\10\115\5\174\1\54\1\115\4\54\3\115"+
+ "\1\54\1\116\3\115\3\54\1\0\3\115\6\54\4\115"+
+ "\2\54\1\115\2\54\1\175\1\117\1\115\1\117\10\115"+
+ "\6\54\1\115\4\54\3\115\1\54\1\116\3\115\3\54"+
+ "\1\0\3\115\3\54\1\176\2\54\4\115\2\54\1\115"+
+ "\3\54\1\117\1\115\1\117\10\115\6\54\1\115\4\54"+
+ "\3\115\1\54\1\116\3\115\3\54\1\0\3\115\6\54"+
+ "\4\115\2\54\1\115\3\54\1\117\1\115\1\117\10\115"+
+ "\1\54\1\177\4\54\1\115\4\54\3\115\1\54\1\116"+
+ "\3\115\3\54\1\0\3\115\1\54\1\200\4\54\4\115"+
+ "\2\54\1\115\3\54\1\117\1\115\1\117\10\115\6\54"+
+ "\1\115\4\54\3\115\1\54\1\116\3\115\3\54\1\0"+
+ "\3\115\2\54\1\201\3\54\4\115\2\54\1\115\3\54"+
+ "\1\117\1\115\1\117\10\115\6\54\1\115\4\54\2\115"+
+ "\40\63\1\202\1\153\23\63\20\0\1\203\45\0\1\204"+
+ "\1\71\1\70\1\0\1\70\1\204\2\70\4\0\6\70"+
+ "\3\0\1\34\2\70\1\0\3\70\1\35\12\0\5\204"+
+ "\1\70\1\0\4\70\3\0\1\205\1\30\1\6\1\0"+
+ "\1\6\1\205\2\6\4\0\6\6\1\33\2\0\1\34"+
+ "\2\6\1\0\3\6\1\35\12\0\5\205\1\6\1\0"+
+ "\4\6\3\0\1\206\1\50\3\0\1\206\2\51\4\0"+
+ "\6\51\4\0\2\51\1\0\3\51\13\0\5\206\1\51"+
+ "\1\0\4\51\3\0\1\207\1\50\1\51\1\0\1\51"+
+ "\1\207\2\51\4\0\6\51\4\0\2\51\1\0\3\51"+
+ "\13\0\5\207\1\51\1\0\4\51\2\0\2\162\1\163"+
+ "\6\162\2\170\1\162\1\167\6\162\1\170\43\162\1\163"+
+ "\11\162\1\210\50\162\2\165\1\211\6\165\1\212\2\165"+
+ "\1\213\50\165\2\166\1\214\7\166\1\212\1\166\1\215"+
+ "\50\166\2\170\1\216\11\170\1\217\50\170\1\0\1\220"+
+ "\4\0\1\220\1\0\1\141\13\0\1\221\23\0\5\220"+
+ "\34\0\1\221\41\0\1\222\4\0\1\222\41\0\5\222"+
+ "\10\0\1\115\1\223\1\116\1\54\1\115\1\54\1\223"+
+ "\2\54\1\0\3\115\6\54\4\115\2\54\1\115\3\54"+
+ "\1\117\1\115\1\117\10\115\5\223\1\54\1\115\4\54"+
+ "\3\115\1\54\1\116\3\115\3\54\1\0\3\115\2\54"+
+ "\1\224\3\54\4\115\2\54\1\115\3\54\1\117\1\115"+
+ "\1\117\10\115\6\54\1\115\4\54\3\115\1\54\1\116"+
+ "\3\115\3\54\1\0\3\115\6\54\4\115\2\54\1\115"+
+ "\3\54\1\117\1\115\1\117\10\115\1\225\5\54\1\115"+
+ "\4\54\3\115\1\54\1\116\3\115\2\54\1\226\1\0"+
+ "\3\115\6\54\4\115\2\54\1\115\3\54\1\117\1\115"+
+ "\1\117\10\115\6\54\1\115\4\54\3\115\1\54\1\116"+
+ "\3\115\3\54\1\0\3\115\1\227\5\54\4\115\2\54"+
+ "\1\115\3\54\1\117\1\115\1\117\10\115\6\54\1\115"+
+ "\4\54\2\115\32\0\1\230\33\0\1\231\1\71\1\70"+
+ "\1\0\1\70\1\231\2\70\4\0\6\70\3\0\1\34"+
+ "\2\70\1\0\3\70\1\35\12\0\5\231\1\70\1\0"+
+ "\4\70\3\0\1\232\1\30\1\6\1\0\1\6\1\232"+
+ "\2\6\4\0\6\6\1\33\2\0\1\34\2\6\1\0"+
+ "\3\6\1\35\12\0\5\232\1\6\1\0\4\6\3\0"+
+ "\1\233\1\50\3\0\1\233\2\51\4\0\6\51\4\0"+
+ "\2\51\1\0\3\51\13\0\5\233\1\51\1\0\4\51"+
+ "\3\0\1\234\1\50\1\51\1\0\1\51\1\234\2\51"+
+ "\4\0\6\51\4\0\2\51\1\0\3\51\13\0\5\234"+
+ "\1\51\1\0\4\51\2\0\2\165\1\211\6\165\1\235"+
+ "\2\165\1\236\50\165\2\170\1\216\1\212\1\170\1\212"+
+ "\6\170\1\167\50\170\2\237\1\240\6\237\1\241\53\237"+
+ "\2\166\1\214\7\166\1\242\1\166\1\243\50\166\2\244"+
+ "\1\245\7\244\1\241\52\244\2\170\1\216\11\170\1\246"+
+ "\50\170\1\0\1\247\4\0\1\247\1\0\1\141\13\0"+
+ "\1\250\23\0\5\247\34\0\1\250\41\0\1\251\4\0"+
+ "\1\251\41\0\5\251\10\0\1\115\1\252\1\116\1\54"+
+ "\1\115\1\54\1\252\2\54\1\0\3\115\6\54\4\115"+
+ "\2\54\1\115\3\54\1\117\1\115\1\117\10\115\5\252"+
+ "\1\54\1\115\4\54\3\115\1\54\1\116\3\115\3\54"+
+ "\1\0\3\115\1\54\1\253\4\54\4\115\2\54\1\115"+
+ "\3\54\1\117\1\115\1\117\10\115\6\54\1\115\4\54"+
+ "\3\115\1\54\1\116\3\115\3\54\1\0\3\115\6\54"+
+ "\4\115\2\54\1\115\3\54\1\117\1\115\1\117\10\115"+
+ "\3\54\1\254\2\54\1\115\4\54\3\115\1\54\1\116"+
+ "\3\115\3\54\1\0\3\115\6\54\4\115\2\54\1\115"+
+ "\3\54\1\117\1\115\1\117\10\115\1\54\1\255\4\54"+
+ "\1\115\4\54\2\115\33\0\1\256\32\0\1\257\1\71"+
+ "\1\70\1\0\1\70\1\257\2\70\4\0\6\70\3\0"+
+ "\1\34\2\70\1\0\3\70\1\35\12\0\5\257\1\70"+
+ "\1\0\4\70\3\0\1\6\1\30\1\6\1\0\4\6"+
+ "\4\0\6\6\1\33\2\0\1\34\2\6\1\0\3\6"+
+ "\1\35\12\0\6\6\1\0\4\6\3\0\1\260\1\50"+
+ "\1\51\1\0\1\51\1\260\2\51\4\0\6\51\4\0"+
+ "\2\51\1\0\3\51\13\0\5\260\1\51\1\0\4\51"+
+ "\2\0\2\165\1\211\1\235\1\165\1\235\3\165\1\212"+
+ "\2\165\1\261\50\165\2\237\1\240\6\237\1\262\53\237"+
+ "\3\0\1\241\1\0\1\241\6\0\1\167\50\0\2\166"+
+ "\1\214\1\242\1\166\1\242\4\166\1\212\1\166\1\263"+
+ "\50\166\2\244\1\245\7\244\1\264\52\244\1\0\1\265"+
+ "\4\0\1\265\1\0\1\141\13\0\1\266\23\0\5\265"+
+ "\34\0\1\266\41\0\1\267\4\0\1\267\41\0\5\267"+
+ "\10\0\1\115\1\270\1\116\1\54\1\115\1\54\1\270"+
+ "\2\54\1\0\3\115\6\54\4\115\2\54\1\115\3\54"+
+ "\1\117\1\115\1\117\10\115\5\270\1\54\1\115\4\54"+
+ "\2\115\1\271\1\272\1\273\1\274\1\271\1\274\3\272"+
+ "\1\275\1\276\1\271\1\115\6\272\1\115\3\271\2\272"+
+ "\1\271\3\272\1\277\1\271\1\300\10\271\6\272\1\271"+
+ "\4\272\2\271\1\115\1\54\1\116\3\115\3\54\1\0"+
+ "\3\115\6\54\4\115\2\54\1\115\3\54\1\117\1\115"+
+ "\1\117\10\115\1\301\5\54\1\115\4\54\3\115\1\54"+
+ "\1\116\3\115\3\54\1\0\3\115\1\54\1\302\4\54"+
+ "\4\115\2\54\1\115\3\54\1\117\1\115\1\117\10\115"+
+ "\6\54\1\115\4\54\2\115\34\0\1\303\31\0\1\304"+
+ "\1\71\1\70\1\0\1\70\1\304\2\70\4\0\6\70"+
+ "\3\0\1\34\2\70\1\0\3\70\1\35\12\0\5\304"+
+ "\1\70\1\0\4\70\3\0\1\51\1\50\1\51\1\0"+
+ "\4\51\4\0\6\51\4\0\2\51\1\0\3\51\13\0"+
+ "\6\51\1\0\4\51\2\0\2\237\1\240\1\262\1\237"+
+ "\1\262\3\237\1\241\2\237\1\261\50\237\2\244\1\245"+
+ "\1\264\1\244\1\264\4\244\1\241\1\244\1\263\50\244"+
+ "\10\0\1\141\55\0\1\305\4\0\1\305\41\0\5\305"+
+ "\10\0\1\115\1\54\1\116\1\54\1\115\4\54\1\0"+
+ "\3\115\6\54\4\115\2\54\1\115\3\54\1\117\1\115"+
+ "\1\117\10\115\6\54\1\115\4\54\2\115\2\271\1\306"+
+ "\6\271\1\0\1\115\1\271\1\115\6\271\1\115\11\271"+
+ "\1\277\1\271\1\300\26\271\1\272\1\273\3\271\3\272"+
+ "\1\0\1\115\1\271\1\115\6\272\1\115\3\271\2\272"+
+ "\1\271\3\272\1\277\1\271\1\300\10\271\6\272\1\271"+
+ "\4\272\3\271\1\307\1\310\1\271\2\272\1\307\26\272"+
+ "\1\311\1\272\1\312\10\272\5\307\10\272\2\271\1\306"+
+ "\1\274\1\271\1\274\3\271\1\275\1\276\1\271\1\115"+
+ "\6\271\1\115\11\271\1\277\1\271\1\300\25\271\2\275"+
+ "\1\313\6\275\1\314\53\275\2\276\1\315\6\276\1\316"+
+ "\1\317\22\276\1\320\1\276\1\320\25\276\2\321\1\322"+
+ "\6\321\2\0\1\321\1\0\6\321\1\0\11\321\1\277"+
+ "\27\321\1\115\1\54\1\116\3\115\3\54\1\0\3\115"+
+ "\6\54\4\115\2\54\1\115\3\54\1\117\1\115\1\117"+
+ "\10\115\4\54\1\323\1\54\1\115\4\54\3\115\1\54"+
+ "\1\116\1\324\1\115\1\324\3\54\1\325\1\326\2\115"+
+ "\6\54\4\115\2\54\1\115\3\54\1\117\1\115\1\117"+
+ "\10\115\6\54\1\115\4\54\2\115\17\0\1\327\46\0"+
+ "\1\70\1\71\1\70\1\0\4\70\4\0\6\70\3\0"+
+ "\1\34\2\70\1\0\3\70\1\35\12\0\6\70\1\0"+
+ "\4\70\3\0\1\330\4\0\1\330\41\0\5\330\10\0"+
+ "\2\271\1\306\6\271\1\321\23\271\1\277\1\271\1\300"+
+ "\26\271\1\331\1\273\1\272\1\271\1\272\1\331\2\272"+
+ "\1\0\1\115\1\271\1\115\6\272\1\115\3\271\2\272"+
+ "\1\271\3\272\1\277\1\271\1\300\10\271\5\331\1\272"+
+ "\1\271\4\272\3\271\1\272\1\273\3\271\3\272\1\321"+
+ "\3\271\6\272\4\271\2\272\1\271\3\272\1\277\1\271"+
+ "\1\300\10\271\6\272\1\271\4\272\2\271\2\275\1\313"+
+ "\6\275\1\332\53\275\1\0\1\333\1\334\1\335\1\0"+
+ "\1\335\2\333\5\0\6\333\4\0\2\333\1\0\3\333"+
+ "\1\336\12\0\6\333\1\0\4\333\2\0\2\276\1\315"+
+ "\6\276\1\316\1\337\22\276\1\320\1\276\1\320\25\276"+
+ "\2\316\1\340\7\316\1\314\52\316\1\115\1\341\1\342"+
+ "\1\343\1\115\1\343\2\341\1\115\1\0\3\115\6\341"+
+ "\4\115\2\341\1\115\3\341\1\336\1\115\1\117\10\115"+
+ "\6\341\1\115\4\341\2\115\2\321\1\322\32\321\1\277"+
+ "\27\321\1\115\1\54\1\116\3\115\3\54\1\0\3\115"+
+ "\6\54\4\115\2\54\1\115\3\54\1\117\1\115\1\117"+
+ "\10\115\1\54\1\344\4\54\1\115\4\54\5\115\1\324"+
+ "\1\115\1\324\3\115\1\325\1\326\22\115\1\117\1\115"+
+ "\1\117\25\115\2\325\1\345\6\325\1\346\53\325\2\326"+
+ "\1\347\6\326\1\350\1\115\22\326\1\351\1\326\1\351"+
+ "\25\326\16\0\1\352\46\0\1\271\1\353\1\273\1\272"+
+ "\1\271\1\272\1\353\2\272\1\0\1\115\1\271\1\115"+
+ "\6\272\1\115\3\271\2\272\1\271\3\272\1\277\1\271"+
+ "\1\300\10\271\5\353\1\272\1\271\4\272\2\271\1\275"+
+ "\1\354\1\355\1\356\1\275\1\356\2\354\1\275\1\314"+
+ "\3\275\6\354\4\275\2\354\1\275\3\354\1\357\12\275"+
+ "\6\354\1\275\4\354\2\275\1\0\1\333\1\334\1\360"+
+ "\1\0\1\360\3\333\4\0\6\333\3\0\1\335\2\333"+
+ "\1\0\3\333\1\336\12\0\6\333\1\0\4\333\3\0"+
+ "\1\361\1\333\1\0\2\333\1\361\41\333\5\361\10\333"+
+ "\1\0\1\333\1\334\1\335\1\0\1\335\2\333\5\0"+
+ "\6\333\4\0\2\333\1\0\3\333\13\0\6\333\1\0"+
+ "\4\333\2\0\1\276\1\362\1\363\1\364\1\276\1\364"+
+ "\2\362\1\276\1\316\1\317\2\276\6\362\4\276\2\362"+
+ "\1\276\3\362\1\365\1\276\1\320\10\276\6\362\1\276"+
+ "\4\362\2\276\2\316\1\340\7\316\1\366\52\316\1\115"+
+ "\1\341\1\342\1\367\1\115\1\367\3\341\1\0\3\115"+
+ "\6\341\3\115\1\343\2\341\1\115\3\341\1\336\1\115"+
+ "\1\117\10\115\6\341\1\115\4\341\3\115\1\370\1\341"+
+ "\1\115\2\341\1\370\2\341\1\333\23\341\1\371\1\341"+
+ "\1\371\10\341\5\370\10\341\1\115\1\341\1\342\1\343"+
+ "\1\115\1\343\2\341\1\115\1\0\3\115\6\341\4\115"+
+ "\2\341\1\115\3\341\1\117\1\115\1\117\10\115\6\341"+
+ "\1\115\4\341\2\115\2\325\1\345\6\325\1\372\53\325"+
+ "\3\0\1\346\1\0\1\346\27\0\1\373\27\0\2\326"+
+ "\1\347\6\326\1\350\23\326\1\351\1\326\1\351\25\326"+
+ "\2\350\1\374\7\350\1\346\52\350\50\0\1\375\14\0"+
+ "\1\271\1\376\1\273\1\272\1\271\1\272\1\376\2\272"+
+ "\1\0\1\115\1\271\1\115\6\272\1\115\3\271\2\272"+
+ "\1\271\3\272\1\277\1\271\1\300\10\271\5\376\1\272"+
+ "\1\271\4\272\2\271\1\275\1\354\1\355\1\377\1\275"+
+ "\1\377\3\354\1\314\3\275\6\354\3\275\1\356\2\354"+
+ "\1\275\3\354\1\357\12\275\6\354\1\275\4\354\3\275"+
+ "\1\u0100\1\u0101\1\275\2\354\1\u0100\2\354\1\u0102\36\354"+
+ "\5\u0100\10\354\1\275\1\354\1\355\1\356\1\275\1\356"+
+ "\2\354\1\275\1\314\3\275\6\354\4\275\2\354\1\275"+
+ "\3\354\13\275\6\354\1\275\4\354\2\275\3\0\1\360"+
+ "\1\0\1\360\20\0\1\335\6\0\1\336\30\0\1\u0103"+
+ "\1\334\1\333\1\0\1\333\1\u0103\2\333\4\0\6\333"+
+ "\3\0\1\335\2\333\1\0\3\333\1\336\12\0\5\u0103"+
+ "\1\333\1\0\4\333\2\0\1\276\1\362\1\363\1\u0104"+
+ "\1\276\1\u0104\3\362\1\316\1\317\2\276\6\362\3\276"+
+ "\1\364\2\362\1\276\3\362\1\365\1\276\1\320\10\276"+
+ "\6\362\1\276\4\362\3\276\1\u0105\1\u0106\1\276\2\362"+
+ "\1\u0105\2\362\1\u0107\1\u0108\22\362\1\u0109\1\362\1\u0109"+
+ "\10\362\5\u0105\10\362\1\276\1\362\1\363\1\364\1\276"+
+ "\1\364\2\362\1\276\1\316\1\317\2\276\6\362\4\276"+
+ "\2\362\1\276\3\362\1\320\1\276\1\320\10\276\6\362"+
+ "\1\276\4\362\2\276\1\316\1\u0107\1\u010a\1\u010b\1\316"+
+ "\1\u010b\2\u0107\2\316\1\314\2\316\6\u0107\4\316\2\u0107"+
+ "\1\316\3\u0107\1\365\12\316\6\u0107\1\316\4\u0107\2\316"+
+ "\3\115\1\367\1\115\1\367\3\115\1\0\14\115\1\343"+
+ "\6\115\1\336\1\115\1\117\26\115\1\u010c\1\342\1\341"+
+ "\1\115\1\341\1\u010c\2\341\1\0\3\115\6\341\3\115"+
+ "\1\343\2\341\1\115\3\341\1\336\1\115\1\117\10\115"+
+ "\5\u010c\1\341\1\115\4\341\2\115\2\325\1\345\1\372"+
+ "\1\325\1\372\3\325\1\346\23\325\1\u010d\27\325\2\350"+
+ "\1\374\7\350\1\u010e\52\350\21\0\1\u010f\43\0\1\271"+
+ "\1\u0110\1\273\1\272\1\271\1\272\1\u0110\2\272\1\0"+
+ "\1\115\1\271\1\115\6\272\1\115\3\271\2\272\1\271"+
+ "\3\272\1\277\1\271\1\300\10\271\5\u0110\1\272\1\271"+
+ "\4\272\2\271\2\275\1\313\1\377\1\275\1\377\3\275"+
+ "\1\314\14\275\1\356\6\275\1\357\30\275\1\u0111\1\355"+
+ "\1\354\1\275\1\354\1\u0111\2\354\1\314\3\275\6\354"+
+ "\3\275\1\356\2\354\1\275\3\354\1\357\12\275\5\u0111"+
+ "\1\354\1\275\4\354\3\275\1\354\1\355\1\377\1\275"+
+ "\1\377\3\354\1\332\3\275\6\354\3\275\1\356\2\354"+
+ "\1\275\3\354\1\357\12\275\6\354\1\275\4\354\3\275"+
+ "\1\354\1\355\1\u0112\1\275\1\u0112\3\354\1\314\3\275"+
+ "\6\354\3\275\1\356\2\354\1\275\3\354\1\357\12\275"+
+ "\6\354\1\275\4\354\2\275\1\0\1\u0113\1\334\1\333"+
+ "\1\0\1\333\1\u0113\2\333\4\0\6\333\3\0\1\335"+
+ "\2\333\1\0\3\333\1\336\12\0\5\u0113\1\333\1\0"+
+ "\4\333\2\0\2\276\1\315\1\u0104\1\276\1\u0104\3\276"+
+ "\1\316\1\317\13\276\1\364\6\276\1\365\1\276\1\320"+
+ "\26\276\1\u0114\1\363\1\362\1\276\1\362\1\u0114\2\362"+
+ "\1\316\1\317\2\276\6\362\3\276\1\364\2\362\1\276"+
+ "\3\362\1\365\1\276\1\320\10\276\5\u0114\1\362\1\276"+
+ "\4\362\3\276\1\362\1\363\1\u0104\1\276\1\u0104\3\362"+
+ "\1\316\1\337\2\276\6\362\3\276\1\364\2\362\1\276"+
+ "\3\362\1\365\1\276\1\320\10\276\6\362\1\276\4\362"+
+ "\2\276\1\316\1\u0107\1\u010a\1\u0115\1\316\1\u0115\3\u0107"+
+ "\1\316\1\314\2\316\6\u0107\3\316\1\u010b\2\u0107\1\316"+
+ "\3\u0107\1\365\12\316\6\u0107\1\316\4\u0107\2\316\1\276"+
+ "\1\362\1\363\1\u0116\1\276\1\u0116\3\362\1\316\1\317"+
+ "\2\276\6\362\3\276\1\364\2\362\1\276\3\362\1\365"+
+ "\1\276\1\320\10\276\6\362\1\276\4\362\2\276\1\316"+
+ "\1\u0117\1\u0118\1\316\2\u0107\1\u0117\3\u0107\1\u0119\35\u0107"+
+ "\5\u0117\10\u0107\1\316\1\u0107\1\u010a\1\u010b\1\316\1\u010b"+
+ "\2\u0107\2\316\1\314\2\316\6\u0107\4\316\2\u0107\1\316"+
+ "\3\u0107\13\316\6\u0107\1\316\4\u0107\2\316\1\115\1\u011a"+
+ "\1\342\1\341\1\115\1\341\1\u011a\2\341\1\0\3\115"+
+ "\6\341\3\115\1\343\2\341\1\115\3\341\1\336\1\115"+
+ "\1\117\10\115\5\u011a\1\341\1\115\4\341\2\115\2\350"+
+ "\1\374\1\u010e\1\350\1\u010e\4\350\1\346\22\350\1\u011b"+
+ "\27\350\16\0\1\u011c\46\0\1\271\1\u011d\1\273\1\272"+
+ "\1\271\1\272\1\u011d\2\272\1\0\1\115\1\271\1\115"+
+ "\6\272\1\115\3\271\2\272\1\271\3\272\1\277\1\271"+
+ "\1\300\10\271\5\u011d\1\272\1\271\4\272\2\271\1\275"+
+ "\1\u011e\1\355\1\354\1\275\1\354\1\u011e\2\354\1\314"+
+ "\3\275\6\354\3\275\1\356\2\354\1\275\3\354\1\357"+
+ "\12\275\5\u011e\1\354\1\275\4\354\3\275\1\354\1\355"+
+ "\1\u0112\1\275\1\u0112\2\354\1\275\1\314\3\275\6\354"+
+ "\3\275\1\356\2\354\1\275\3\354\1\357\12\275\6\354"+
+ "\1\275\4\354\2\275\1\0\1\u011f\1\334\1\333\1\0"+
+ "\1\333\1\u011f\2\333\4\0\6\333\3\0\1\335\2\333"+
+ "\1\0\3\333\1\336\12\0\5\u011f\1\333\1\0\4\333"+
+ "\2\0\1\276\1\u0120\1\363\1\362\1\276\1\362\1\u0120"+
+ "\2\362\1\316\1\317\2\276\6\362\3\276\1\364\2\362"+
+ "\1\276\3\362\1\365\1\276\1\320\10\276\5\u0120\1\362"+
+ "\1\276\4\362\2\276\2\316\1\340\1\u0115\1\316\1\u0115"+
+ "\4\316\1\314\13\316\1\u010b\6\316\1\365\27\316\1\276"+
+ "\1\362\1\363\1\u0116\1\276\1\u0116\2\362\1\276\1\316"+
+ "\1\317\2\276\6\362\3\276\1\364\2\362\1\276\3\362"+
+ "\1\365\1\276\1\320\10\276\6\362\1\276\4\362\2\276"+
+ "\1\316\1\u0121\1\u010a\1\u0107\1\316\1\u0107\1\u0121\2\u0107"+
+ "\1\316\1\314\2\316\6\u0107\3\316\1\u010b\2\u0107\1\316"+
+ "\3\u0107\1\365\12\316\5\u0121\1\u0107\1\316\4\u0107\3\316"+
+ "\1\u0107\1\u010a\1\u0115\1\316\1\u0115\3\u0107\1\316\1\366"+
+ "\2\316\6\u0107\3\316\1\u010b\2\u0107\1\316\3\u0107\1\365"+
+ "\12\316\6\u0107\1\316\4\u0107\3\316\1\u0107\1\u010a\1\u0122"+
+ "\1\316\1\u0122\3\u0107\1\316\1\314\2\316\6\u0107\3\316"+
+ "\1\u010b\2\u0107\1\316\3\u0107\1\365\12\316\6\u0107\1\316"+
+ "\4\u0107\2\316\1\115\1\u0123\1\342\1\341\1\115\1\341"+
+ "\1\u0123\2\341\1\0\3\115\6\341\3\115\1\343\2\341"+
+ "\1\115\3\341\1\336\1\115\1\117\10\115\5\u0123\1\341"+
+ "\1\115\4\341\2\115\1\271\1\272\1\273\1\272\1\271"+
+ "\4\272\1\0\1\115\1\271\1\115\6\272\1\115\3\271"+
+ "\2\272\1\271\3\272\1\277\1\271\1\300\10\271\6\272"+
+ "\1\271\4\272\2\271\1\275\1\u0124\1\355\1\354\1\275"+
+ "\1\354\1\u0124\2\354\1\314\3\275\6\354\3\275\1\356"+
+ "\2\354\1\275\3\354\1\357\12\275\5\u0124\1\354\1\275"+
+ "\4\354\2\275\1\0\1\u0125\1\334\1\333\1\0\1\333"+
+ "\1\u0125\2\333\4\0\6\333\3\0\1\335\2\333\1\0"+
+ "\3\333\1\336\12\0\5\u0125\1\333\1\0\4\333\2\0"+
+ "\1\276\1\u0126\1\363\1\362\1\276\1\362\1\u0126\2\362"+
+ "\1\316\1\317\2\276\6\362\3\276\1\364\2\362\1\276"+
+ "\3\362\1\365\1\276\1\320\10\276\5\u0126\1\362\1\276"+
+ "\4\362\2\276\1\316\1\u0127\1\u010a\1\u0107\1\316\1\u0107"+
+ "\1\u0127\2\u0107\1\316\1\314\2\316\6\u0107\3\316\1\u010b"+
+ "\2\u0107\1\316\3\u0107\1\365\12\316\5\u0127\1\u0107\1\316"+
+ "\4\u0107\3\316\1\u0107\1\u010a\1\u0122\1\316\1\u0122\2\u0107"+
+ "\2\316\1\314\2\316\6\u0107\3\316\1\u010b\2\u0107\1\316"+
+ "\3\u0107\1\365\12\316\6\u0107\1\316\4\u0107\2\316\1\115"+
+ "\1\u0128\1\342\1\341\1\115\1\341\1\u0128\2\341\1\0"+
+ "\3\115\6\341\3\115\1\343\2\341\1\115\3\341\1\336"+
+ "\1\115\1\117\10\115\5\u0128\1\341\1\115\4\341\2\115"+
+ "\1\275\1\u0129\1\355\1\354\1\275\1\354\1\u0129\2\354"+
+ "\1\314\3\275\6\354\3\275\1\356\2\354\1\275\3\354"+
+ "\1\357\12\275\5\u0129\1\354\1\275\4\354\2\275\1\0"+
+ "\1\u012a\1\334\1\333\1\0\1\333\1\u012a\2\333\4\0"+
+ "\6\333\3\0\1\335\2\333\1\0\3\333\1\336\12\0"+
+ "\5\u012a\1\333\1\0\4\333\2\0\1\276\1\u012b\1\363"+
+ "\1\362\1\276\1\362\1\u012b\2\362\1\316\1\317\2\276"+
+ "\6\362\3\276\1\364\2\362\1\276\3\362\1\365\1\276"+
+ "\1\320\10\276\5\u012b\1\362\1\276\4\362\2\276\1\316"+
+ "\1\u012c\1\u010a\1\u0107\1\316\1\u0107\1\u012c\2\u0107\1\316"+
+ "\1\314\2\316\6\u0107\3\316\1\u010b\2\u0107\1\316\3\u0107"+
+ "\1\365\12\316\5\u012c\1\u0107\1\316\4\u0107\2\316\1\115"+
+ "\1\u012d\1\342\1\341\1\115\1\341\1\u012d\2\341\1\0"+
+ "\3\115\6\341\3\115\1\343\2\341\1\115\3\341\1\336"+
+ "\1\115\1\117\10\115\5\u012d\1\341\1\115\4\341\2\115"+
+ "\1\275\1\u012e\1\355\1\354\1\275\1\354\1\u012e\2\354"+
+ "\1\314\3\275\6\354\3\275\1\356\2\354\1\275\3\354"+
+ "\1\357\12\275\5\u012e\1\354\1\275\4\354\2\275\1\0"+
+ "\1\333\1\334\1\333\1\0\4\333\4\0\6\333\3\0"+
+ "\1\335\2\333\1\0\3\333\1\336\12\0\6\333\1\0"+
+ "\4\333\2\0\1\276\1\u012f\1\363\1\362\1\276\1\362"+
+ "\1\u012f\2\362\1\316\1\317\2\276\6\362\3\276\1\364"+
+ "\2\362\1\276\3\362\1\365\1\276\1\320\10\276\5\u012f"+
+ "\1\362\1\276\4\362\2\276\1\316\1\u0130\1\u010a\1\u0107"+
+ "\1\316\1\u0107\1\u0130\2\u0107\1\316\1\314\2\316\6\u0107"+
+ "\3\316\1\u010b\2\u0107\1\316\3\u0107\1\365\12\316\5\u0130"+
+ "\1\u0107\1\316\4\u0107\2\316\1\115\1\341\1\342\1\341"+
+ "\1\115\4\341\1\0\3\115\6\341\3\115\1\343\2\341"+
+ "\1\115\3\341\1\336\1\115\1\117\10\115\6\341\1\115"+
+ "\4\341\2\115\1\275\1\354\1\355\1\354\1\275\4\354"+
+ "\1\314\3\275\6\354\3\275\1\356\2\354\1\275\3\354"+
+ "\1\357\12\275\6\354\1\275\4\354\2\275\1\276\1\362"+
+ "\1\363\1\362\1\276\4\362\1\316\1\317\2\276\6\362"+
+ "\3\276\1\364\2\362\1\276\3\362\1\365\1\276\1\320"+
+ "\10\276\6\362\1\276\4\362\2\276\1\316\1\u0131\1\u010a"+
+ "\1\u0107\1\316\1\u0107\1\u0131\2\u0107\1\316\1\314\2\316"+
+ "\6\u0107\3\316\1\u010b\2\u0107\1\316\3\u0107\1\365\12\316"+
+ "\5\u0131\1\u0107\1\316\4\u0107\3\316\1\u0107\1\u010a\1\u0107"+
+ "\1\316\4\u0107\1\316\1\314\2\316\6\u0107\3\316\1\u010b"+
+ "\2\u0107\1\316\3\u0107\1\365\12\316\6\u0107\1\316\4\u0107"+
+ "\2\316";
+
+ /**
+ * The transition table of the DFA
+ */
+ final private static int yytrans [] = yy_unpack();
+
+
+ /* error codes */
+ final private static int YY_UNKNOWN_ERROR = 0;
+ final private static int YY_ILLEGAL_STATE = 1;
+ final private static int YY_NO_MATCH = 2;
+ final private static int YY_PUSHBACK_2BIG = 3;
+
+ /* error messages for the codes above */
+ final private static String YY_ERROR_MSG[] = {
+ "Unkown internal scanner error",
+ "Internal error: unknown state",
+ "Error: could not match input",
+ "Error: pushback value was too large"
+ };
+
+ /**
+ * YY_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
+ */
+ private final static byte YY_ATTRIBUTE[] = {
+ 0, 9, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 9, 1, 1, 9,
+ 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 9, 0, 9, 9, 1, 0,
+ 0, 0, 9, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 9, 9, 0, 0, 1, 9, 1, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 9, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
+ 0, 1, 1, 0, 0, 0, 0, 0, 0, 9, 0, 9, 0, 0, 1, 1,
+ 1, 0, 0, 0, 0, 0, 9, 0, 1, 1, 1, 0, 0, 0, 1, 0,
+ 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 9, 1,
+ 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0,
+ 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,
+ 1, 0, 1, 0, 1, 9, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,
+ 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
+ 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 9, 0, 0,
+ 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
+ 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 9, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 9, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+
+ /** the input device */
+ private java.io.Reader yy_reader;
+
+ /** the current state of the DFA */
+ private int yy_state;
+
+ /** the current lexical state */
+ private int yy_lexical_state = YYINITIAL;
+
+ /** this buffer contains the current text to be matched and is
+ the source of the yytext() string */
+ private char yy_buffer[] = new char[YY_BUFFERSIZE];
+
+ /** the textposition at the last accepting state */
+ private int yy_markedPos;
+
+ /** the textposition at the last state to be included in yytext */
+ private int yy_pushbackPos;
+
+ /** the current text position in the buffer */
+ private int yy_currentPos;
+
+ /** startRead marks the beginning of the yytext() string in the buffer */
+ private int yy_startRead;
+
+ /** endRead marks the last character in the buffer, that has been read
+ from input */
+ private int yy_endRead;
+
+ /** number of newlines encountered up to the start of the matched text */
+ private int yyline;
+
+ /** the number of characters up to the start of the matched text */
+ private int yychar;
+
+ /**
+ * the number of characters from the last newline up to the start of the
+ * matched text
+ */
+ private int yycolumn;
+
+ /**
+ * yy_atBOL == true <=> the scanner is currently at the beginning of a line
+ */
+ private boolean yy_atBOL = true;
+
+ /** yy_atEOF == true <=> the scanner is at the EOF */
+ private boolean yy_atEOF;
+
+ /* user code: */
+ // Put stuff to include in the class here
+ String detectedCharset;
+
+ // External flag
+ boolean paranoidStringCheck = false;
+ boolean deleteErrors = true;
+ boolean debug = true;
+
+ // Internal flags
+ boolean postBadImportFlag = false; // both URLs and @import's
+ boolean importFlag = false;
+ boolean urlFlag = false;
+
+ // Writer
+ Writer w = null; // Will NPE if not initialized properly
+
+ public void parse () throws IOException, DataFilterException {
+ while (yylex() != null);
+ }
+
+ CSSTokenizerFilter(Reader r, Writer w, boolean paranoidStringCheck) {
+ this(r);
+ this.w = w;
+ this.paranoidStringCheck = paranoidStringCheck;
+ }
+
+ void throwError(String s) throws IOException, DataFilterException {
+ throw new IllegalStateException("You MUST override
throwError!");
+ }
+
+ String processImportURL(String s) {
+ throw new IllegalStateException("You MUST override
processImportURL!");
+ }
+
+ String processURL(String s) {
+ throw new IllegalStateException("You MUST override
processURL!");
+ }
+
+ void log(String s) {
+ System.err.println("CSSTokenizerFilter: "+s);
+ }
+
+ void logError(String s) {
+ System.err.println("CSSTokenizerFilter ERROR: "+s);
+ }
+
+ static String unquote(String s, char c) {
+ if(s.length() > 1) {
+ if(s.charAt(s.length()-1) == c) {
+ s = s.substring(1, s.length()-1);
+ return s;
+ } else return "";
+ } else return "";
+ }
+
+ // This is not very efficient. The parser below knows the quoting rules
too.
+
+ static boolean isHexDigit(char c) {
+ return ('a' <= c && c <= 'f' ||
+ 'A' <= c && c <= 'F' ||
+ '0' <= c && c <= '9');
+ }
+
+ class DecodedStringThingy {
+ char quote; // " " means not quoted
+ boolean url; // in a url() ?
+ String data;
+ public String suffix; // includes any whitespace
+ public DecodedStringThingy(String s) {
+ if(s.startsWith("url(")) {
+ s = s.substring("url(".length());
+ url = true;
+ }
+ char q = s.charAt(0);
+ if(q == '\'' || q == '\"') {
+ quote = q;
+ s = s.substring(1);
+ } else quote = ' ';
+ StringBuffer buffer = new StringBuffer();
+ int x = 0;
+ boolean justEscaping = false;
+ boolean stillEscaping = false;
+ StringBuffer hexEscape = new StringBuffer();
+ while(x < s.length()-1) {
+ char c = s.charAt(x);
+ x++;
+ if(justEscaping) {
+ if(c == '\n') {
+ buffer.append(c);
+ justEscaping = false;
+ } else if(isHexDigit(c)) {
+ hexEscape.append(c);
+ justEscaping = false;
+ stillEscaping = true;
+ } else {
+ buffer.append(c);
+ // Will need to be reencoded if
quote or \n
+ justEscaping = false;
+ }
+ } else if(stillEscaping) {
+ if(isHexDigit(c) && hexEscape.length()
< 6) {
+ hexEscape.append(c);
+ } else if(Character.isWhitespace(c)) {
+ // Ignore one whitespace char
after an escape
+ int d =
Integer.parseInt(hexEscape.toString(),
+ 16);
+ if(d > 0xFFFF) {
+ String error =
+ "UCS-4 CHARACTERS
OVER 0xFFFF NOT SUPPORTED!";
+ logError(error);
+ try {
+ w.write("/*
"+error+"*/");
+ } catch (IOException e)
{};
+ } else {
+ c = (char)d;
+ buffer.append(c);
+ }
+ stillEscaping = false;
+ hexEscape = new StringBuffer();
+ } else {
+ int d =
Integer.parseInt(hexEscape.toString(),
+ 16);
+ if(d > 0xFFFF) {
+ String error =
+ "UCS-4 CHARACTERS
OVER 0xFFFF NOT SUPPORTED!";
+ logError(error);
+ try {
+ w.write("/*
"+error+"*/");
+ } catch (IOException e)
{};
+ } else {
+ char o = (char)d;
+ buffer.append(o);
+ }
+ buffer.append(c);
+ stillEscaping = false;
+ hexEscape = new StringBuffer();
+ }
+ } else {
+ if(quote != ' ' && c == quote) {
+ break;
+ } else if (c == '\\') {
+ justEscaping = true;
+ } else {
+ buffer.append(c);
+ }
+ }
+ }
+ data = buffer.toString();
+ if(s.length() > (x+1))
+ suffix = s.substring(x+1);
+ else suffix = "";
+ }
+
+ public String toString() {
+ StringBuffer out = new StringBuffer();
+ if(url) out.append("url(");
+ if(quote != ' ') out.append(quote);
+ out.append(unescapeData());
+ if(quote != ' ') out.append(quote);
+ if(url) out.append(")");
+ out.append(suffix);
+ return out.toString();
+ }
+
+ public String unescapeData() {
+ StringBuffer sb = new StringBuffer();
+ for(int i=0;i<data.length();i++) {
+ char c = data.charAt(i);
+ if(c == quote || c == '\n') {
+ sb.append('\\');
+ }
+ sb.append(c);
+ }
+ return sb.toString();
+ }
+ }
+
+
+ /**
+ * Creates a new scanner
+ * There is also a java.io.InputStream version of this constructor.
+ *
+ * @param in the java.io.Reader to read input from.
+ */
+ CSSTokenizerFilter(java.io.Reader in) {
+ this.yy_reader = in;
+ }
+
+ /**
+ * Creates a new scanner.
+ * There is also java.io.Reader version of this constructor.
+ *
+ * @param in the java.io.Inputstream to read input from.
+ */
+ CSSTokenizerFilter(java.io.InputStream in) {
+ this(new java.io.InputStreamReader(in));
+ }
+
+ /**
+ * Unpacks the split, compressed DFA transition table.
+ *
+ * @return the unpacked transition table
+ */
+ private static int [] yy_unpack() {
+ int [] trans = new int[13356];
+ int offset = 0;
+ offset = yy_unpack(yy_packed0, offset, trans);
+ return trans;
+ }
+
+ /**
+ * Unpacks the compressed DFA transition table.
+ *
+ * @param packed the packed transition table
+ * @return the index of the last entry
+ */
+ private static int yy_unpack(String packed, int offset, int [] trans) {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.length();
+ while (i < l) {
+ int count = packed.charAt(i++);
+ int value = packed.charAt(i++);
+ value--;
+ do trans[j++] = value; while (--count > 0);
+ }
+ return j;
+ }
+
+ /**
+ * Unpacks the compressed character translation table.
+ *
+ * @param packed the packed character translation table
+ * @return the unpacked character translation table
+ */
+ private static char [] yy_unpack_cmap(String packed) {
+ char [] map = new char[0x10000];
+ int i = 0; /* index in packed string */
+ int j = 0; /* index in unpacked array */
+ while (i < 182) {
+ int count = packed.charAt(i++);
+ char value = packed.charAt(i++);
+ do map[j++] = value; while (--count > 0);
+ }
+ return map;
+ }
+
+
+ /**
+ * Refills the input buffer.
+ *
+ * @return <code>false</code>, iff there was new input.
+ *
+ * @exception IOException if any I/O-Error occurs
+ */
+ private boolean yy_refill() throws java.io.IOException {
+
+ /* first: make room (if you can) */
+ if (yy_startRead > 0) {
+ System.arraycopy(yy_buffer, yy_startRead,
+ yy_buffer, 0,
+ yy_endRead-yy_startRead);
+
+ /* translate stored positions */
+ yy_endRead-= yy_startRead;
+ yy_currentPos-= yy_startRead;
+ yy_markedPos-= yy_startRead;
+ yy_pushbackPos-= yy_startRead;
+ yy_startRead = 0;
+ }
+
+ /* is the buffer big enough? */
+ if (yy_currentPos >= yy_buffer.length) {
+ /* if not: blow it up */
+ char newBuffer[] = new char[yy_currentPos*2];
+ System.arraycopy(yy_buffer, 0, newBuffer, 0, yy_buffer.length);
+ yy_buffer = newBuffer;
+ }
+
+ /* finally: fill the buffer with new input */
+ int numRead = yy_reader.read(yy_buffer, yy_endRead,
+ yy_buffer.length-yy_endRead);
+
+ if (numRead < 0) {
+ return true;
+ }
+ else {
+ yy_endRead+= numRead;
+ return false;
+ }
+ }
+
+
+ /**
+ * Closes the input stream.
+ */
+ final public void yyclose() throws java.io.IOException {
+ yy_atEOF = true; /* indicate end of file */
+ yy_endRead = yy_startRead; /* invalidate buffer */
+
+ if (yy_reader != null)
+ yy_reader.close();
+ }
+
+
+ /**
+ * Closes the current stream, and resets the
+ * scanner to read from a new input stream.
+ *
+ * All internal variables are reset, the old input stream
+ * <b>cannot</b> be reused (internal buffer is discarded and lost).
+ * Lexical state is set to <tt>YY_INITIAL</tt>.
+ *
+ * @param reader the new input stream
+ */
+ final public void yyreset(java.io.Reader reader) throws java.io.IOException {
+ yyclose();
+ yy_reader = reader;
+ yy_atBOL = true;
+ yy_atEOF = false;
+ yy_endRead = yy_startRead = 0;
+ yy_currentPos = yy_markedPos = yy_pushbackPos = 0;
+ yyline = yychar = yycolumn = 0;
+ yy_lexical_state = YYINITIAL;
+ }
+
+
+ /**
+ * Returns the current lexical state.
+ */
+ final public int yystate() {
+ return yy_lexical_state;
+ }
+
+
+ /**
+ * Enters a new lexical state
+ *
+ * @param newState the new lexical state
+ */
+ final public void yybegin(int newState) {
+ yy_lexical_state = newState;
+ }
+
+
+ /**
+ * Returns the text matched by the current regular expression.
+ */
+ final public String yytext() {
+ return new String( yy_buffer, yy_startRead, yy_markedPos-yy_startRead );
+ }
+
+
+ /**
+ * Returns the character at position <tt>pos</tt> from the
+ * matched text.
+ *
+ * It is equivalent to yytext().charAt(pos), but faster
+ *
+ * @param pos the position of the character to fetch.
+ * A value from 0 to yylength()-1.
+ *
+ * @return the character at position pos
+ */
+ final public char yycharat(int pos) {
+ return yy_buffer[yy_startRead+pos];
+ }
+
+
+ /**
+ * Returns the length of the matched text region.
+ */
+ final public int yylength() {
+ return yy_markedPos-yy_startRead;
+ }
+
+
+ /**
+ * Reports an error that occured while scanning.
+ *
+ * In a wellformed scanner (no or only correct usage of
+ * yypushback(int) and a match-all fallback rule) this method
+ * will only be called with things that "Can't Possibly Happen".
+ * If this method is called, something is seriously wrong
+ * (e.g. a JFlex bug producing a faulty scanner etc.).
+ *
+ * Usual syntax/scanner level error handling should be done
+ * in error fallback rules.
+ *
+ * @param errorCode the code of the errormessage to display
+ */
+ private void yy_ScanError(int errorCode) {
+ String message;
+ try {
+ message = YY_ERROR_MSG[errorCode];
+ }
+ catch (ArrayIndexOutOfBoundsException e) {
+ message = YY_ERROR_MSG[YY_UNKNOWN_ERROR];
+ }
+
+ throw new Error(message);
+ }
+
+
+ /**
+ * Pushes the specified amount of characters back into the input stream.
+ *
+ * They will be read again by then next call of the scanning method
+ *
+ * @param number the number of characters to be read again.
+ * This number must not be greater than yylength()!
+ */
+ private void yypushback(int number) {
+ if ( number > yylength() )
+ yy_ScanError(YY_PUSHBACK_2BIG);
+
+ yy_markedPos -= number;
+ }
+
+
+ /**
+ * Resumes scanning until the next regular expression is matched,
+ * the end of input is encountered or an I/O-Error occurs.
+ *
+ * @return the next token
+ * @exception IOException if any I/O-Error occurs
+ * @throws DataFilterException
+ */
+ public Yytoken yylex() throws java.io.IOException, DataFilterException {
+ int yy_input;
+ int yy_action;
+
+ // cached fields:
+ int yy_currentPos_l;
+ int yy_startRead_l;
+ int yy_markedPos_l;
+ int yy_endRead_l = yy_endRead;
+ char [] yy_buffer_l = yy_buffer;
+ char [] yycmap_l = yycmap;
+
+ int [] yytrans_l = yytrans;
+ int [] yy_rowMap_l = yy_rowMap;
+ byte [] yy_attr_l = YY_ATTRIBUTE;
+
+ while (true) {
+ yy_markedPos_l = yy_markedPos;
+
+ yy_action = -1;
+
+ yy_startRead_l = yy_currentPos_l = yy_currentPos =
+ yy_startRead = yy_markedPos_l;
+
+ yy_state = yy_lexical_state;
+
+
+ yy_forAction: {
+ while (true) {
+
+ if (yy_currentPos_l < yy_endRead_l)
+ yy_input = yy_buffer_l[yy_currentPos_l++];
+ else if (yy_atEOF) {
+ yy_input = YYEOF;
+ break yy_forAction;
+ }
+ else {
+ // store back cached positions
+ yy_currentPos = yy_currentPos_l;
+ yy_markedPos = yy_markedPos_l;
+ boolean eof = yy_refill();
+ // get translated positions and possibly new buffer
+ yy_currentPos_l = yy_currentPos;
+ yy_markedPos_l = yy_markedPos;
+ yy_buffer_l = yy_buffer;
+ yy_endRead_l = yy_endRead;
+ if (eof) {
+ yy_input = YYEOF;
+ break yy_forAction;
+ }
+ else {
+ yy_input = yy_buffer_l[yy_currentPos_l++];
+ }
+ }
+ int yy_next = yytrans_l[ yy_rowMap_l[yy_state] + yycmap_l[yy_input]
];
+ if (yy_next == -1) break yy_forAction;
+ yy_state = yy_next;
+
+ int yy_attributes = yy_attr_l[yy_state];
+ if ( (yy_attributes & 1) == 1 ) {
+ yy_action = yy_state;
+ yy_markedPos_l = yy_currentPos_l;
+ if ( (yy_attributes & 8) == 8 ) break yy_forAction;
+ }
+
+ }
+ }
+
+ // store back cached position
+ yy_markedPos = yy_markedPos_l;
+
+ switch (yy_action) {
+
+ case 78:
+ case 98:
+ case 191:
+ case 201:
+ case 207:
+ case 232:
+ case 248:
+ case 264:
+ {
+ if(!deleteErrors) {
+ throwError("Unknown @identifier "+yytext());
+ } else {
+ String s = yytext();
+ if(debug) log("Discarded identifier: "+s);
+ // Ignore
+ }
+ }
+ case 306: break;
+ case 118:
+ case 135:
+ case 176:
+ case 178:
+ {
+ // This is horrible. However it seems that there is no other way to do
it with either jflex or CUP, as {URL} cannot be an unambiguous token :(
+ String s = yytext();
+ if(debug) log("Recognized URL: "+s);
+
+ DecodedStringThingy dst = new DecodedStringThingy(s);
+
+ if(!dst.url) {
+ throw new IllegalStateException("parsing url().. isn't a
url()");
+ }
+ if(dst.suffix.length() > 0) {
+ yypushback(dst.suffix.length());
+ dst.suffix = "";
+ }
+
+ s = dst.data;
+ if(debug) log("URL now: "+s);
+ s = processURL(s);
+ dst.data = s;
+ if(s == null || s.equals("")) {
+ if(debug) log("URL invalid");
+ w.write("url()");
+ } else {
+ s = dst.toString();
+ if(debug) log("Writing: "+s);
+ w.write(s);
+ }
+ }
+ case 307: break;
+ case 26:
+ case 94:
+ {
+ String s = yytext();
+ if(s.startsWith("url")) throwError("Invalid contents of url()");
+ w.write(s);
+ if(debug) log("Matched function start: "+s);
+ }
+ case 308: break;
+ case 28:
+ {
+ if(postBadImportFlag) {
+ // Ignore
+ postBadImportFlag = false;
+ if(debug) log("Ignoring mediums list because after bad import:
"+
+ yytext());
+ } else {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched and passing on mediums list: "+s);
+ }
+ }
+ case 309: break;
+ case 190:
+ case 200:
+ case 221:
+ case 238:
+ case 244:
+ {
+ String s = yytext();
+ if(debug) log("Found @import: "+s);
+ s = s.substring("@import".length());
+ s = s.trim();
+ DecodedStringThingy dst = new DecodedStringThingy(s);
+ s = dst.data;
+ if(debug) log("URL: "+s);
+ s = processURL(s);
+ if (!(s == null || s.equals(""))) {
+ if(debug) log("URL now: "+s);
+ s = "@import "+dst.toString();
+ if(debug) log("Writing: "+s);
+ w.write(s);
+ } else
+ if(debug) log("Dropped @import");
+ }
+ case 310: break;
+ case 105:
+ case 129:
+ {
+ // Comment
+ // CSS comments are harmless? - FIXME check
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched comment: "+s);
+ }
+ case 311: break;
+ case 107:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched HTML comment: "+s);
+ }
+ case 312: break;
+ case 148:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched @media: "+s);
+ }
+ case 313: break;
+ case 74:
+ case 75:
+ case 95:
+ case 97:
+ case 120:
+ case 121:
+ case 143:
+ case 144:
+ case 166:
+ case 167:
+ case 180:
+ case 181:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched unicode: "+s);
+ }
+ case 314: break;
+ case 37:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched number: "+s);
+ }
+ case 315: break;
+ case 54:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched |=: "+s);
+ }
+ case 316: break;
+ case 126:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched @page: "+s);
+ }
+ case 317: break;
+ case 92:
+ case 154:
+ {
+ String s = yytext();
+ if(debug) log("Got hexcolor: "+s);
+ w.write(s);
+ }
+ case 318: break;
+ case 2:
+ case 5:
+ case 13:
+ case 30:
+ case 41:
+ case 57:
+ case 73:
+ case 89:
+ case 110:
+ case 132:
+ case 153:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched ident: "+s);
+ }
+ case 319: break;
+ case 34:
+ case 59:
+ case 60:
+ {
+ String s = yytext();
+ if(debug) log("Matched string: "+s);
+ if(paranoidStringCheck && s.indexOf(':') != -1) {
+ w.write("/* Deleted disallowed string */");
+ log("Deleted disallowed string: "+s);
+ } else {
+ w.write(s);
+ }
+ }
+ case 320: break;
+ case 38:
+ case 40:
+ case 71:
+ case 72:
+ case 93:
+ case 111:
+ case 112:
+ case 133:
+ case 134:
+ case 155:
+ case 175:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched #name: "+s);
+ }
+ case 321: break;
+ case 283:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched important: "+s);
+ }
+ case 322: break;
+ case 15:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched semicolon: "+s);
+ }
+ case 323: break;
+ case 138:
+ case 140:
+ case 142:
+ case 157:
+ case 162:
+ case 165:
+ {
+ String s = yytext();
+ if(debug) log("Ignoring unrecognizable url: "+s);
+ w.write("/* Ignoring unmatchable URL */url()");
+ }
+ case 324: break;
+ case 53:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched ~=: "+s);
+ }
+ case 325: break;
+ case 122:
+ case 145:
+ case 168:
+ case 182:
+ case 196:
+ case 215:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched unicode range: "+s);
+ }
+ case 326: break;
+ case 17:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched close braces: "+s);
+ }
+ case 327: break;
+ case 10:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched function end: "+s);
+ }
+ case 328: break;
+ case 250:
+ case 268:
+ case 282:
+ {
+ String s = yytext();
+ detectedCharset = s;
+ if(debug) log("Matched and ignoring charset: "+s);
+ // Ignore
+ }
+ case 329: break;
+ case 1:
+ case 3:
+ case 7:
+ case 8:
+ case 11:
+ case 14:
+ case 19:
+ case 20:
+ case 21:
+ case 22:
+ {
+ String s = yytext();
+ char c = s.charAt(0);
+ log("Matched anything: "+yytext()+" - ignoring");
+ w.write("/* ignored unmatched char: "+c+" */"); // single char cannot
break out of comment
+ }
+ case 330: break;
+ case 227:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched @font-face: "+s);
+ }
+ case 331: break;
+ case 16:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched open braces: "+s);
+ }
+ case 332: break;
+ case 6:
+ case 9:
+ case 12:
+ case 18:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched single char: "+s);
+ }
+ case 333: break;
+ case 4:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched whitespace: "+s);
+ }
+ case 334: break;
+ case 29:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched measurement: "+s);
+ }
+ case 335: break;
+ case 58:
+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched HTML comment: "+s);
+ }
+ case 336: break;
+ default:
+ if (yy_input == YYEOF && yy_startRead == yy_currentPos) {
+ yy_atEOF = true;
+ return null;
+ }
+ else {
+ yy_ScanError(YY_NO_MATCH);
+ }
+ }
+ }
+ }
+
+
+}
Added: trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.jflex
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.jflex
2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.jflex
2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,443 @@
+package freenet.clients.http.filter;
+import java.io.*;
+import java.util.*;
+/* This class tokenizes a CSS2 Reader stream, writes it out to the output
Writer, and filters any URLs found */
+// WARNING: this is not as thorough as the HTML parser - new versions of the
standard could lead to anonymity risks. See comments in SaferFilter.java
+// Mostly from http://www.w3.org/TR/REC-CSS2/grammar.html
+
+%%
+
+%{
+ // Put stuff to include in the class here
+ String detectedCharset;
+
+ // External flag
+ boolean paranoidStringCheck = false;
+ boolean deleteErrors = true;
+ boolean debug = true;
+
+ // Internal flags
+ boolean postBadImportFlag = false; // both URLs and @import's
+ boolean importFlag = false;
+ boolean urlFlag = false;
+
+ // Writer
+ Writer w = null; // Will NPE if not initialized properly
+
+ public void parse () throws IOException {
+ while (yylex() != null);
+ }
+
+ CSSTokenizerFilter(Reader r, Writer w, boolean paranoidStringCheck) {
+ this(r);
+ this.w = w;
+ this.paranoidStringCheck = paranoidStringCheck;
+ }
+
+ void throwError(String s) throws IOException {
+ throw new IllegalStateException("You MUST override
throwError!");
+ }
+
+ String processImportURL(String s) {
+ throw new IllegalStateException("You MUST override
processImportURL!");
+ }
+
+ String processURL(String s) {
+ throw new IllegalStateException("You MUST override
processURL!");
+ }
+
+ void log(String s) {
+ System.err.println("CSSTokenizerFilter: "+s);
+ }
+
+ void logError(String s) {
+ System.err.println("CSSTokenizerFilter ERROR: "+s);
+ }
+
+ static String unquote(String s, char c) {
+ if(s.length() > 1) {
+ if(s.charAt(s.length()-1) == c) {
+ s = s.substring(1, s.length()-1);
+ return s;
+ } else return "";
+ } else return "";
+ }
+
+ // This is not very efficient. The parser below knows the quoting rules
too.
+
+ static boolean isHexDigit(char c) {
+ return ('a' <= c && c <= 'f' ||
+ 'A' <= c && c <= 'F' ||
+ '0' <= c && c <= '9');
+ }
+
+ class DecodedStringThingy {
+ char quote; // " " means not quoted
+ boolean url; // in a url() ?
+ String data;
+ public String suffix; // includes any whitespace
+ public DecodedStringThingy(String s) {
+ if(s.startsWith("url(")) {
+ s = s.substring("url(".length());
+ url = true;
+ }
+ char q = s.charAt(0);
+ if(q == '\'' || q == '\"') {
+ quote = q;
+ s = s.substring(1);
+ } else quote = ' ';
+ StringBuffer buffer = new StringBuffer();
+ int x = 0;
+ boolean justEscaping = false;
+ boolean stillEscaping = false;
+ StringBuffer hexEscape = new StringBuffer();
+ while(x < s.length()-1) {
+ char c = s.charAt(x);
+ x++;
+ if(justEscaping) {
+ if(c == '\n') {
+ buffer.append(c);
+ justEscaping = false;
+ } else if(isHexDigit(c)) {
+ hexEscape.append(c);
+ justEscaping = false;
+ stillEscaping = true;
+ } else {
+ buffer.append(c);
+ // Will need to be reencoded if
quote or \n
+ justEscaping = false;
+ }
+ } else if(stillEscaping) {
+ if(isHexDigit(c) && hexEscape.length()
< 6) {
+ hexEscape.append(c);
+ } else if(Character.isWhitespace(c)) {
+ // Ignore one whitespace char
after an escape
+ int d =
Integer.parseInt(hexEscape.toString(),
+ 16);
+ if(d > 0xFFFF) {
+ String error =
+ "UCS-4 CHARACTERS
OVER 0xFFFF NOT SUPPORTED!";
+ logError(error);
+ try {
+ w.write("/*
"+error+"*/");
+ } catch (IOException e)
{};
+ } else {
+ c = (char)d;
+ buffer.append(c);
+ }
+ stillEscaping = false;
+ hexEscape = new StringBuffer();
+ } else {
+ int d =
Integer.parseInt(hexEscape.toString(),
+ 16);
+ if(d > 0xFFFF) {
+ String error =
+ "UCS-4 CHARACTERS
OVER 0xFFFF NOT SUPPORTED!";
+ logError(error);
+ try {
+ w.write("/*
"+error+"*/");
+ } catch (IOException e)
{};
+ } else {
+ char o = (char)d;
+ buffer.append(o);
+ }
+ buffer.append(c);
+ stillEscaping = false;
+ hexEscape = new StringBuffer();
+ }
+ } else {
+ if(quote != ' ' && c == quote) {
+ break;
+ } else if (c == '\\') {
+ justEscaping = true;
+ } else {
+ buffer.append(c);
+ }
+ }
+ }
+ data = buffer.toString();
+ if(s.length() > (x+1))
+ suffix = s.substring(x+1);
+ else suffix = "";
+ }
+
+ public String toString() {
+ StringBuffer out = new StringBuffer();
+ if(url) out.append("url(");
+ if(quote != ' ') out.append(quote);
+ out.append(unescapeData());
+ if(quote != ' ') out.append(quote);
+ if(url) out.append(")");
+ out.append(suffix);
+ return out.toString();
+ }
+
+ public String unescapeData() {
+ StringBuffer sb = new StringBuffer();
+ for(int i=0;i<data.length();i++) {
+ char c = data.charAt(i);
+ if(c == quote || c == '\n') {
+ sb.append('\\');
+ }
+ sb.append(c);
+ }
+ return sb.toString();
+ }
+ }
+%}
+
+%class CSSTokenizerFilter
+%unicode
+%ignorecase
+
+// Case sensitivity DOES NOT AFFECT CHARACTER CLASSES!
+H=[0-9a-fA-F]
+NONASCII=[\200-\4177777]
+UNICODE=\\{H}{1,6}[ \t\r\n\f]?
+ESCAPE={UNICODE}|\\[ -~\200-\4177777]
+NMSTART=[a-zA-Z]|{NONASCII}|{ESCAPE}
+NMCHAR=[a-zA-Z0-9-]|{NONASCII}|{ESCAPE}
+
+// The spec (http://www.w3.org/TR/REC-CSS2/grammar.html, mostly D.2 for this
bit)
+// is on crack wrt string/url, so this is guesswork
+STRING1=\"(\\{NL}|\'|\\\"|{NONASCII}|{ESCAPE}|[^\"])*\"
+STRING2=\'(\\{NL}|\"|\\\'|{NONASCII}|{ESCAPE}|[^\'])*\'
+
+IDENT={NMSTART}{NMCHAR}*
+NAME={NMCHAR}+
+NUM=[0-9]+|[0-9]*"."[0-9]+
+STRING={STRING1}|{STRING2}
+INBRACKET=([^\)]|"\\)"|STRING)*
+
+// See comments for STRING1/STRING2 :)
+URL=([^\(\)\"\']|{NONASCII}|{ESCAPE})*
+
+W=[ \t\r\n\f]*
+NL=\n|\r\n|\r|\f
+RANGE=\?{1,6}|{H}(\?{0,5}|{H}(\?{0,4}|{H}(\?{0,3}|{H}(\?{0,2}|{H}(\??|{H})))))
+HEXCOLOR="#"(({H}{H}{H})|({H}{H}{H}{H}{H}{H}))
+
+// From grammer
+MEDIUM={IDENT}{W}*
+// As distinct from MEDIA, which allows rulesets
+MEDIUMS={MEDIUM}(","{W}*{MEDIUM})*
+
+// This is rather incomprehensible, so I am adding log messages for every
token. They will not actually call log() unless debug is true.
+
+// Loosly based on http://www.w3.org/TR/REC-CSS2/grammar.html
+%%
+
+{HEXCOLOR} {
+ String s = yytext();
+ if(debug) log("Got hexcolor: "+s);
+ w.write(s);
+}
+"url("{W}*({STRING}|{URL}){W}")" {
+ // This is horrible. However it seems that there is no other way to do
it with either jflex or CUP, as {URL} cannot be an unambiguous token :(
+ String s = yytext();
+ if(debug) log("Recognized URL: "+s);
+
+ DecodedStringThingy dst = new DecodedStringThingy(s);
+
+ if(!dst.url) {
+ throw new IllegalStateException("parsing url().. isn't a
url()");
+ }
+ if(dst.suffix.length() > 0) {
+ yypushback(dst.suffix.length());
+ dst.suffix = "";
+ }
+
+ s = dst.data;
+ if(debug) log("URL now: "+s);
+ s = processURL(s);
+ dst.data = s;
+ if(s == null || s.equals("")) {
+ if(debug) log("URL invalid");
+ w.write("url()");
+ } else {
+ s = dst.toString();
+ if(debug) log("Writing: "+s);
+ w.write(s);
+ }
+}
+"@import"{W}{W}*({STRING}|{URL})({W}*{W}{MEDIUMS})?";" {
+ String s = yytext();
+ if(debug) log("Found @import: "+s);
+ s = s.substring("@import".length());
+ s = s.trim();
+ DecodedStringThingy dst = new DecodedStringThingy(s);
+ s = dst.data;
+ if(debug) log("URL: "+s);
+ s = processURL(s);
+ if (!(s == null || s.equals(""))) {
+ if(debug) log("URL now: "+s);
+ s = "@import "+dst.toString();
+ if(debug) log("Writing: "+s);
+ w.write(s);
+ } else
+ if(debug) log("Dropped @import");
+}
+{W}"{"{W} {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched open braces: "+s);
+}
+{W}"}"{W} {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched close braces: "+s);
+}
+[ \t\r\n\f]+ {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched whitespace: "+s);
+}
+\/\*[^*]*\*+([^/][^*]*\*+)*\/ {
+ // Comment
+ // CSS comments are harmless? - FIXME check
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched comment: "+s);
+}
+"<!--" {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched HTML comment: "+s);
+}
+"-->" {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched HTML comment: "+s);
+}
+"~=" {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched ~=: "+s);
+}
+"|=" {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched |=: "+s);
+}
+{IDENT} {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched ident: "+s);
+}
+"@page" {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched @page: "+s);
+}
+"@media" {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched @media: "+s);
+}
+"@font-face" {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched @font-face: "+s);
+}
+"@"{IDENT}[^;\}\"]*[;\}] {
+ if(!deleteErrors) {
+ throwError("Unknown @identifier "+yytext());
+ } else {
+ String s = yytext();
+ if(debug) log("Discarded identifier: "+s);
+ // Ignore
+ }
+}
+"#"{NAME} {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched #name: "+s);
+}
+"!{W}important" {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched important: "+s);
+}
+U\+{RANGE} {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched unicode: "+s);
+}
+U\+{H}{1,6}-{H}{1,6} {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched unicode range: "+s);
+}
+{NUM}("em"|"ex"|"px"|"cm"|"mm"|"in"|"pc"|"deg"|"rad"|"grad"|"ms"|"s"|"Hz"|"kHz"|"%")
{
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched measurement: "+s);
+}
+{NUM} {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched number: "+s);
+}
+
+{MEDIUMS}{W}*";" {
+ if(postBadImportFlag) {
+ // Ignore
+ postBadImportFlag = false;
+ if(debug) log("Ignoring mediums list because after bad import:
"+
+ yytext());
+ } else {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched and passing on mediums list: "+s);
+ }
+}
+
+"@charset"{W}*{STRING}{W}*";" {
+ String s = yytext();
+ detectedCharset = s;
+ if(debug) log("Matched and ignoring charset: "+s);
+ // Ignore
+}
+"url("{INBRACKET}")" {
+ String s = yytext();
+ if(debug) log("Ignoring unrecognizable url: "+s);
+ w.write("/* Ignoring unmatchable URL */url()");
+}
+{IDENT}"(" {
+ String s = yytext();
+ if(s.startsWith("url")) throwError("Invalid contents of url()");
+ w.write(s);
+ if(debug) log("Matched function start: "+s);
+}
+")" {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched function end: "+s);
+}
+";" {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched semicolon: "+s);
+}
+{STRING} {
+ String s = yytext();
+ if(debug) log("Matched string: "+s);
+ if(paranoidStringCheck && s.indexOf(':') != -1) {
+ w.write("/* Deleted disallowed string */");
+ log("Deleted disallowed string: "+s);
+ } else {
+ w.write(s);
+ }
+}
+// These are plain chars, which would be passed through as tokens somehow by
the spec'd tokenizer
+","|":"|"/"|">"|"-"|"+"|"."|"*" {
+ String s = yytext();
+ w.write(s);
+ if(debug) log("Matched single char: "+s);
+}
+. {
+ String s = yytext();
+ char c = s.charAt(0);
+ log("Matched anything: "+yytext()+" - ignoring");
+ w.write("/* ignored unmatched char: "+c+" */"); // single char cannot
break out of comment
+}
Modified: trunk/freenet/src/freenet/clients/http/filter/CharsetExtractor.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/CharsetExtractor.java
2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/CharsetExtractor.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -1,5 +1,7 @@
package freenet.clients.http.filter;
+import java.io.IOException;
+
import freenet.support.Bucket;
/**
@@ -8,6 +10,6 @@
*/
public interface CharsetExtractor {
- String getCharset(Bucket data);
+ String getCharset(Bucket data, String parseCharset) throws
DataFilterException, IOException;
}
Modified: trunk/freenet/src/freenet/clients/http/filter/ContentDataFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/ContentDataFilter.java
2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/ContentDataFilter.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -1,12 +1,17 @@
package freenet.clients.http.filter;
+import java.io.IOException;
+import java.util.HashMap;
+
import freenet.support.Bucket;
+import freenet.support.BucketFactory;
/**
* Data filter for a specific MIME type.
*/
public interface ContentDataFilter {
- public Bucket filter(Bucket data, String charset, FilterCallback cb);
+ public Bucket readFilter(Bucket data, BucketFactory bf, String charset,
HashMap otherParams, FilterCallback cb) throws DataFilterException, IOException;
+ public Bucket writeFilter(Bucket data, BucketFactory bf, String
charset, HashMap otherParams, FilterCallback cb) throws DataFilterException,
IOException;
}
Modified: trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java
2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -1,7 +1,16 @@
package freenet.clients.http.filter;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
import java.util.Hashtable;
+import freenet.support.Bucket;
+import freenet.support.BucketFactory;
+import freenet.support.BucketTools;
+import freenet.support.Logger;
+
/**
* Freenet content filter. This doesn't actually do any filtering,
* it organizes everything and maintains the database.
@@ -22,7 +31,7 @@
true, true, null, null, false, false, false,
false, false, false,
"Plain text - not dangerous unless your browser
is stupid (e.g. Internet Explorer)",
"Plain text - not dangerous unless you include
compromizing information",
- true, "iso-8859-1", null));
+ true, "US-ASCII", null));
// GIF - probably safe - FIXME check this out, write filters
register(new MIMEType("image/gif", "gif", new String[0], new
String[0],
@@ -46,18 +55,17 @@
// PDF - very dangerous - FIXME ideally we would have a filter,
this is such a common format...
register(new MIMEType("application/pdf", "pdf", new String[] {
"application/x-pdf" }, new String[0],
- false, false, null, null, true, true, true,
true, true, true,
+ false, false, null, null, true, true, true,
false, true, true,
"Adobe(R) PDF document - VERY DANGEROUS!",
"Adobe(R) PDF document - VERY DANGEROUS!",
false, null, null));
// HTML - dangerous if not filtered
register(new MIMEType("text/html", "html", new String[] {
"text/xhtml", "text/xml+xhtml" }, new String[] { "htm" },
- false, false /* maybe? */, new
HTMLReadFilter(), new HTMLWriteFilter(),
- true, true, true, true, true, true, false,
- "HTML - not dangerous if filtered",
+ false, false /* maybe? */, new HTMLFilter(),
null /* FIXME */,
+ true, true, true, true, true, true, "HTML - not
dangerous if filtered",
"HTML - may contain dangerous metadata etc;
suggest you check it by hand",
- true, "iso-8859-1", new
HTMLCharsetExtractor()));
+ true, "iso-8859-1", new HTMLFilter()));
// CSS - danagerous if not filtered, not sure about the filter
register(new MIMEType("text/css", "css", new String[0], new
String[0],
@@ -65,7 +73,7 @@
true, true, true, true, true, false,
"CSS (cascading style sheet, usually used with
HTML) - probably not dangerous if filtered, but the filter is not a whitelist
filter so take care",
"CSS (cascading style sheet, usually used with
HTML) - this can probably contain metadata, check it by hand",
- true, "iso-8859-1", new CSSCharsetExtractor()));
+ true, "utf-8", new CSSReadFilter()));
}
@@ -83,7 +91,173 @@
public static MIMEType getMIMEType(String mimeType) {
return (MIMEType) mimeTypesByName.get(mimeType);
}
+
+ /**
+ * Filter some data.
+ * @throws IOException If an internal error involving buckets occurred.
+ */
+ public static Bucket filter(Bucket data, BucketFactory bf, String
typeName) throws UnsafeContentTypeException, IOException {
+ String type = typeName;
+ String options = "";
+ String charset = null;
+ HashMap otherParams = null;
+
+ // First parse the MIME type
+
+ int idx = type.indexOf(';');
+ if(idx != -1) {
+ options = type.substring(idx+1);
+ type = type.substring(0, idx);
+ // Parse options
+ // Format: <type>/<subtype>[ optional white space ];[
optional white space ]<param>=<value>; <param2>=<value2>; ...
+ String[] rawOpts = options.split(";");
+ for(int i=0;i<rawOpts.length;i++) {
+ String raw = rawOpts[i];
+ idx = raw.indexOf('=');
+ if(idx == -1) {
+ Logger.error(ContentFilter.class, "idx
= -1 for '=' on option: "+raw+" from "+typeName);
+ continue;
+ }
+ String before = raw.substring(0, idx).trim();
+ String after = raw.substring(idx+1).trim();
+ if(before.equals("charset")) {
+ charset = after;
+ } else {
+ if(otherParams == null) otherParams =
new HashMap();
+ otherParams.put(before, after);
+ }
+ }
+ }
+
+ // Now look for a MIMEType handler
+
+ MIMEType handler = getMIMEType(type);
+
+ if(handler == null)
+ throw new UnknownContentTypeException(typeName);
+ else {
+
+ if(handler.safeToRead) {
+ return data;
+ }
+
+ if(handler.readFilter != null) {
+ if(handler.takesACharset && (charset == null ||
charset.length() == 0)) {
+ charset = detectCharset(data, handler);
+ }
+
+ return handler.readFilter.readFilter(data, bf,
charset, otherParams, new GenericReadFilterCallback());
+ }
+ handler.throwUnsafeContentTypeException();
+ return null;
+ }
+ }
+
+ private static String detectCharset(Bucket data, MIMEType handler)
throws IOException {
+
+ // Detect charset
+
+ String charset = detectBOM(data);
+
+ if(charset == null && handler.charsetExtractor != null) {
+
+ // Obviously, this is slow!
+ // This is why we need to detect on insert.
+
+ if(handler.defaultCharset != null) {
+ try {
+ if((charset =
handler.charsetExtractor.getCharset(data, handler.defaultCharset)) != null)
+ return charset;
+ } catch (DataFilterException e) {
+ // Ignore
+ }
+ }
+ try {
+ if((charset =
handler.charsetExtractor.getCharset(data, "ISO-8859-1")) != null)
+ return charset;
+ } catch (DataFilterException e) {
+ // Ignore
+ }
+ try {
+ if((charset =
handler.charsetExtractor.getCharset(data, "UTF-8")) != null)
+ return charset;
+ } catch (DataFilterException e) {
+ // Ignore
+ }
+ try {
+ if((charset =
handler.charsetExtractor.getCharset(data, "UTF-16")) != null)
+ return charset;
+ } catch (DataFilterException e) {
+ // Ignore
+ }
+ try {
+ if((charset =
handler.charsetExtractor.getCharset(data, "UTF-32")) != null)
+ return charset;
+ } catch (DataFilterException e) {
+ // Ignore
+ }
+
+ }
+
+ // If it doesn't have a BOM, then it's *probably* safe to use
as default.
+
+ return handler.defaultCharset;
+ }
+
+ /**
+ * Detect a Byte Order Mark, a sequence of bytes which identifies a
document as encoded with a
+ * specific charset.
+ * @throws IOException
+ */
+ private static String detectBOM(Bucket bucket) throws IOException {
+ byte[] data = new byte[5];
+ InputStream is = bucket.getInputStream();
+ int read = 0;
+ while(read < data.length) {
+ int x;
+ try {
+ x = is.read(data, read, data.length - read);
+ } catch (EOFException e) {
+ x = -1;
+ }
+ if(x <= 0) break;
+ }
+ is.close();
+ if(startsWith(data, bom_utf8)) return "UTF-8";
+ if(startsWith(data, bom_utf16_be) || startsWith(data,
bom_utf16_le)) return "UTF-16";
+ if(startsWith(data, bom_utf32_be) || startsWith(data,
bom_utf32_le)) return "UTF-32";
+ if(startsWith(data, bom_scsu)) return "SCSU";
+ if(startsWith(data, bom_utf7_1) || startsWith(data, bom_utf7_2)
+ || startsWith(data, bom_utf7_3) ||
startsWith(data, bom_utf7_4)
+ || startsWith(data, bom_utf7_5)) return "UTF-7";
+ if(startsWith(data, bom_utf_ebcdic)) return "UTF-EBCDIC";
+ if(startsWith(data, bom_bocu_1)) return "BOCU-1";
+ return null;
+ }
- public static
+ // Byte Order Mark's - from Wikipedia. We keep all of them because a
rare encoding might
+ // be deliberately used by an attacker to confuse the filter, because
at present a charset
+ // is not mandatory, and because some browsers may pick these up anyway
even if one is present.
+ static byte[] bom_utf8 = new byte[] { (byte)0xEF, (byte)0xBB,
(byte)0xBF };
+ static byte[] bom_utf16_be = new byte[] { (byte)0xFE, (byte)0xFF };
+ static byte[] bom_utf16_le = new byte[] { (byte)0xFF, (byte)0xFE };
+ static byte[] bom_utf32_be = new byte[] { (byte)0, (byte)0, (byte)0xFE,
(byte)0xFF };
+ static byte[] bom_utf32_le = new byte[] { (byte)0xFF, (byte)0xFE,
(byte)0, (byte)0 };
+ static byte[] bom_scsu = new byte[] { (byte)0x0E, (byte)0xFE,
(byte)0xFF };
+ static byte[] bom_utf7_1 = new byte[] { (byte)0x2B, (byte)0x2F,
(byte)0x76, (byte) 0x38 };
+ static byte[] bom_utf7_2 = new byte[] { (byte)0x2B, (byte)0x2F,
(byte)0x76, (byte) 0x39 };
+ static byte[] bom_utf7_3 = new byte[] { (byte)0x2B, (byte)0x2F,
(byte)0x76, (byte) 0x2B };
+ static byte[] bom_utf7_4 = new byte[] { (byte)0x2B, (byte)0x2F,
(byte)0x76, (byte) 0x2F };
+ static byte[] bom_utf7_5 = new byte[] { (byte)0x2B, (byte)0x2F,
(byte)0x76, (byte) 0x38, (byte) 0x2D };
+ static byte[] bom_utf_ebcdic = new byte[] { (byte)0xDD, (byte)0x73,
(byte)0x66, (byte)0x73 };
+ static byte[] bom_bocu_1 = new byte[] { (byte)0xFB, (byte)0xEE,
(byte)0x28 };
+
+ private static boolean startsWith(byte[] data, byte[] cmp) {
+ for(int i=0;i<cmp.length;i++) {
+ if(data[i] != cmp[i]) return false;
+ }
+ return true;
+ }
+
}
Added: trunk/freenet/src/freenet/clients/http/filter/DataFilterException.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/DataFilterException.java
2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/DataFilterException.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,30 @@
+package freenet.clients.http.filter;
+
+/**
+ * Exception thrown when the data cannot be filtered.
+ */
+public class DataFilterException extends UnsafeContentTypeException {
+
+ final String rawTitle;
+ final String encodedTitle;
+ final String explanation;
+
+ DataFilterException(String raw, String encoded, String explanation) {
+ this.rawTitle = raw;
+ this.encodedTitle = encoded;
+ this.explanation = explanation;
+ }
+
+ public String getExplanation() {
+ return explanation;
+ }
+
+ public String getHTMLEncodedTitle() {
+ return encodedTitle;
+ }
+
+ public String getRawTitle() {
+ return rawTitle;
+ }
+
+}
Modified: trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java
2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -1,6 +1,6 @@
package freenet.clients.http.filter;
-import freenet.keys.FreenetURI;
+import java.net.URI;
/**
* Callback to be provided to a content filter.
@@ -10,8 +10,9 @@
/**
* Process a URI.
* If it cannot be turned into something sufficiently safe, then return
null.
+ * @param overrideType Force the return type.
*/
- public FreenetURI processURI(FreenetURI uri);
+ public String processURI(String uri, String overrideType);
/**
* Should we allow GET forms?
Added:
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
===================================================================
---
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
2006-03-18 14:52:17 UTC (rev 8274)
+++
trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,69 @@
+package freenet.clients.http.filter;
+
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import freenet.keys.FreenetURI;
+import freenet.pluginmanager.HTTPRequest;
+import freenet.support.Logger;
+
+public class GenericReadFilterCallback implements FilterCallback {
+
+ public boolean allowGetForms() {
+ return false;
+ }
+
+ public boolean allowPostForms() {
+ return false;
+ }
+
+ public String processURI(String u, String overrideType) {
+ URI uri;
+ try {
+ uri = new URI(u);
+ } catch (URISyntaxException e1) {
+ return null;
+ }
+ String path = uri.getPath();
+ if(path.startsWith("/")) {
+ // Try to make it into a FreenetURI
+ try {
+ FreenetURI furi = new
FreenetURI(path.substring(1));
+ return processURI(furi, uri, overrideType);
+ } catch (MalformedURLException e) {
+ // Obviously not a Freenet URI!
+ }
+ } else {
+ // Relative URI
+ // FIXME resolve it
+ // FIXME Note that we allow links to / inlines from
fproxy services.
+ // This is okay because we don't allow forms.
+ HTTPRequest req = new HTTPRequest(uri);
+ return finishProcess(req, overrideType, path);
+ }
+ Logger.normal(this, "Unrecognized URI, dropped: "+uri);
+ return null;
+ }
+
+ private String finishProcess(HTTPRequest req, String overrideType,
String path) {
+ String typeOverride = req.getParam("type", null);
+ if(overrideType != null)
+ typeOverride = overrideType;
+ // REDFLAG any other options we should support?
+ // Obviously we don't want to support ?force= !!
+ // At the moment, ?type= and ?force= are the only options
supported by Fproxy anyway.
+ String ret = path;
+ if(typeOverride != null)
+ ret = ret + "?type=" + typeOverride;
+ return ret;
+ }
+
+ private String processURI(FreenetURI furi, URI uri, String
overrideType) {
+ // Valid freenet URI, allow it
+ // Now what about the queries?
+ HTTPRequest req = new HTTPRequest(uri);
+ return finishProcess(req, overrideType, "/" +
furi.toString(false));
+ }
+
+}
Added: trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,1803 @@
+/* -*- Mode: java; c-basic-indent: 4; tab-width: 4 -*- */
+
+package freenet.clients.http.filter;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.StringTokenizer;
+import java.util.Vector;
+
+import freenet.support.Bucket;
+import freenet.support.BucketFactory;
+import freenet.support.HTMLDecoder;
+import freenet.support.HTMLEncoder;
+import freenet.support.Logger;
+import freenet.support.io.NullBucket;
+import freenet.support.io.NullWriter;
+
+public class HTMLFilter implements ContentDataFilter, CharsetExtractor {
+
+ private boolean debug = false;
+ private static boolean deleteWierdStuff = true;
+ private static boolean deleteErrors = true;
+ private boolean allowSecurityErrors = false;
+ private boolean allowSecurityWarnings = false;
+ private boolean cssParanoidStringCheck = false;
+
+ private final static String possibleAnonCompromiseMsg =
+ "You have retrieved some content which is not recognised by
FProxy, and so we "
+ + "don't know what your web browser might do with it.
It could be harmless, "
+ + "but it could make your web browser do something
which would compromise your "
+ + "anonymity.";
+ public void setParanoidStringCheck(boolean b) {
+ cssParanoidStringCheck = b;
+ }
+
+ public void setAllowSecurityWarnings(boolean value) {
+ allowSecurityWarnings = value;
+ }
+
+ public void setAllowSecurityErrors(boolean value) {
+ allowSecurityErrors = value;
+ }
+
+ public void setDebug(boolean debug) {
+ this.debug = debug;
+ }
+
+ public Bucket readFilter(Bucket bucket, BucketFactory bf, String
charset, HashMap otherParams, FilterCallback cb) throws DataFilterException,
IOException {
+ Logger.minor(this, "readFilter(): charset="+charset);
+ InputStream strm = bucket.getInputStream();
+ Bucket temp = bf.makeBucket(bucket.size());
+ OutputStream os = temp.getOutputStream();
+ Reader r;
+ Writer w;
+ try {
+ r = new BufferedReader(new InputStreamReader(strm,
charset), 32768);
+ w = new BufferedWriter(new OutputStreamWriter(os,
charset), 32768);
+ } catch (UnsupportedEncodingException e) {
+ os.close();
+ strm.close();
+ throw new DataFilterException("Warning: Unknown
character set ("+charset+")", "Warning: Unknown character set
("+HTMLEncoder.encode(charset)+")",
+ "<p><b>Unknown character set</b> The
page you are about to display has an unknown character set. "+
+ "This means that we are not able to
filter the page, and it may compromize your anonymity.");
+ }
+ HTMLParseContext pc = new HTMLParseContext(r, w, charset, cb);
+ pc.run(temp);
+ r.close();
+ w.close();
+ return temp;
+ }
+
+ public Bucket writeFilter(Bucket bucket, BucketFactory bf, String
charset, HashMap otherParams, FilterCallback cb) throws DataFilterException,
IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ public String getCharset(Bucket bucket, String parseCharset) throws
DataFilterException, IOException {
+ Logger.minor(this, "getCharset(): default="+parseCharset);
+ InputStream strm = bucket.getInputStream();
+ Writer w = new NullWriter();
+ Reader r;
+ try {
+ r = new BufferedReader(new InputStreamReader(strm,
parseCharset), 32768);
+ } catch (UnsupportedEncodingException e) {
+ throw new Error(e);
+ }
+ HTMLParseContext pc = new HTMLParseContext(r, w, null, new
NullFilterCallback());
+ try {
+ pc.run(null);
+ } catch (Throwable t) {
+ // Ignore ALL errors
+ Logger.minor(this, "Caught "+t+" trying to detect MIME
type with "+parseCharset);
+ }
+ return pc.detectedCharset;
+ }
+
+ class HTMLParseContext {
+ Reader r;
+ Writer w;
+ String charset;
+ String detectedCharset;
+ final FilterCallback cb;
+
+ HTMLParseContext(Reader r, Writer w, String charset,
FilterCallback cb) {
+ this.r = r;
+ this.w = w;
+ this.charset = charset;
+ this.cb = cb;
+ }
+
+ Bucket run(Bucket temp) throws IOException, DataFilterException
{
+
+ /**
+ * TOKENIZE Modes:
+ * <p>0) in text transitions: '<' ->(1) 1) in tag, not
in
+ * quotes/comment/whitespace transitions: whitespace ->
(4) (save
+ * current element) '"' -> (2) '--' at beginning of tag
-> (3) '>' ->
+ * process whole tag 2) in tag, in quotes transitions:
'"' -> (1)
+ * '>' -> grumble about markup in quotes in tag might
confuse older
+ * user-agents (stay in current state) 3) in tag, in
comment
+ * transitions: '-->' -> save/ignore comment, go to (0)
'<' or '>' ->
+ * grumble about markup in comments 4) in tag, in
whitespace
+ * transitions: '"' -> (2) '>' -> save tag, (0)
anything else not
+ * whitespace -> (1)
+ * </p>
+ */
+ StringBuffer b = new StringBuffer(100);
+ Vector splitTag = new Vector();
+ char pprevC = 0;
+ char prevC = 0;
+ char c = 0;
+ mode = INTEXT;
+ while (true) {
+ int x = r.read();
+ if (x == -1) {
+ switch (mode) {
+ case INTEXT :
+ saveText(b, w, this);
+ break;
+ default :
+ // Dump unfinished tag
+ break;
+ }
+ break;
+ } else {
+ pprevC = prevC;
+ prevC = c;
+ c = (char) x;
+ switch (mode) {
+ case INTEXT :
+ if (c == '<') {
+ saveText(b, w,
this);
+ b.setLength(0);
+ mode = INTAG;
+ } else {
+ b.append(c);
+ }
+ break;
+ case INTAG :
+ if
(HTMLDecoder.isWhitespace(c)) {
+
splitTag.add(b.toString());
+ mode =
INTAGWHITESPACE;
+ b.setLength(0);
+ } else if (c == '>') {
+
splitTag.add(b.toString());
+ b.setLength(0);
+
processTag(splitTag, w, this);
+
splitTag.clear();
+ mode = INTEXT;
+ } else if (
+ b.length() == 2
+ && c ==
'-'
+ &&
prevC == '-'
+ &&
pprevC == '!') {
+ mode =
INTAGCOMMENT;
+ b.append(c);
+ } else if (c == '"') {
+ mode =
INTAGQUOTES;
+ b.append(c);
+ } else if (c == '\'') {
+ mode =
INTAGSQUOTES;
+ b.append(c);
+ } else {
+ b.append(c);
+ }
+ break;
+ case INTAGQUOTES :
+ if (c == '"') {
+ mode = INTAG;
+ b.append(c); //
Part of the element
+ } else if (c == '>' ||
c == '<') {
+ if
(!deleteErrors) {
+
throwFilterException("Tags in markup");
+
b.append(c);
+ return
new NullBucket();
+ } else {
+ if (c
== '>') {
+
w.write(
+
"<!-- Tags in string attribute -->");
+
splitTag.clear();
+
b.setLength(0);
+
mode = INTEXT;
+
// End tag now
+ } else {
+
killTag = true;
+
writeAfterTag
+
+= "<!-- Tags in string attribute -->";
+
// Wait for end of tag then zap it
+ }
+ }
+ } else {
+ b.append(c);
+ }
+ break;
+ case INTAGSQUOTES :
+ if (c == '\'') {
+ mode = INTAG;
+ b.append(c); //
Part of the element
+ } else if (c == '>' ||
c == '<') {
+ if
(!deleteErrors) {
+
throwFilterException("Tags in markup");
+
b.append(c);
+ return
new NullBucket();
+ } else {
+ if (c
== '>') {
+
w.write(
+
"<!-- Tags in string attribute -->");
+
splitTag.clear();
+
b.setLength(0);
+
mode = INTEXT;
+
// End tag now
+ } else {
+
killTag = true;
+
writeAfterTag
+
+= "<!-- Tags in string attribute -->";
+
// Wait for end of tag then zap it
+ }
+
writeAfterTag
+
+= "<!-- Tags in string attribute -->";
+ killTag
= true;
+ }
+ } else {
+ b.append(c);
+ }
+ break;
+ /*
+ * Comments are often
used to temporarily disable
+ * markup; I shall
allow it. (avian) White space is
+ * not permitted
between the markup declaration
+ * open delimiter ("
+ * <!") and the comment
open delimiter ("--"), but
+ * is permitted between
the comment close delimiter
+ * ("--") and the
markup declaration close
+ * delimiter (">"). A
common error is to include a
+ * string of hyphens
("---") within a comment.
+ * Authors should avoid
putting two or more
+ * adjacent hyphens
inside comments. However, the
+ * only browser that
actually gets it right is IE
+ * (others either don't
allow it or allow other
+ * chars as well). The
only safe course of action
+ * is to allow any and
all chars, but eat them.
+ * (avian)
+ */
+ case INTAGCOMMENT :
+ if (b.length() >= 4 &&
c == '-' && prevC == '-') {
+ b.append(c);
+ mode =
INTAGCOMMENTCLOSING;
+ } else
+ b.append(c);
+ break;
+ case INTAGCOMMENTCLOSING :
+ if (c == '>') {
+ saveComment(b,
w, this);
+ b.setLength(0);
+ mode = INTEXT;
+ }
+ break;
+ case INTAGWHITESPACE :
+ if (c == '"') {
+ mode =
INTAGQUOTES;
+ b.append(c);
+ } else if (c == '\'') {
+ // e.g. <div
align = 'center'> (avian)
+ mode =
INTAGSQUOTES;
+ b.append(c);
+ } else if (c == '>') {
+ if (!killTag)
+
processTag(splitTag, w, this);
+ killTag = false;
+
splitTag.clear();
+ mode = INTEXT;
+ } else if
(HTMLDecoder.isWhitespace(c)) {
+ // More
whitespace, what fun
+ } else {
+ mode = INTAG;
+ b.append(c);
+ }
+ }
+ }
+ }
+ return temp;
+ }
+
+ int mode;
+ static final int INTEXT = 0;
+ static final int INTAG = 1;
+ static final int INTAGQUOTES = 2;
+ static final int INTAGSQUOTES = 3;
+ static final int INTAGCOMMENT = 4;
+ static final int INTAGCOMMENTCLOSING = 5;
+ static final int INTAGWHITESPACE = 6;
+ boolean killTag = false; // just this one
+ boolean writeStyleScriptWithTag = false; // just this one
+ boolean expectingBadComment = false;
+ // has to be set on or off explicitly by tags
+ boolean inStyle = false; // has to be set on or off explicitly
by tags
+ boolean inScript = false; // has to be set on or off explicitly
by tags
+ boolean killText = false; // has to be set on or off explicitly
by tags
+ int styleScriptRecurseCount = 0;
+ String currentStyleScriptChunk = new String();
+ String writeAfterTag = "";
+ }
+
+ void saveText(StringBuffer s, Writer w, HTMLParseContext pc)
+ throws IOException {
+ if (pc.killText) {
+ return;
+ }
+
+ String style = s.toString();
+ if (pc.inStyle) {
+ pc.currentStyleScriptChunk += style;
+ return; // is parsed and written elsewhere
+ }
+ w.write(style);
+ }
+
+ void processTag(Vector splitTag, Writer w, HTMLParseContext pc)
+ throws IOException, DataFilterException {
+ // First, check that it is a recognized tag
+ ParsedTag t = new ParsedTag(splitTag);
+ if (!pc.killTag) {
+ t = t.sanitize(pc);
+ if (t != null) {
+ boolean deletedStyle = false;
+ if (pc.writeStyleScriptWithTag) {
+ pc.writeStyleScriptWithTag = false;
+ String style =
pc.currentStyleScriptChunk;
+ if (style == null || style.length() ==
0)
+ pc.writeAfterTag += "<!--
deleted unknown style -->";
+ else
+ w.write(style);
+ pc.currentStyleScriptChunk = "";
+ }
+ t.write(w);
+ if (pc.writeAfterTag.length() > 0) {
+ w.write(pc.writeAfterTag);
+ pc.writeAfterTag = "";
+ }
+ } else
+ pc.writeStyleScriptWithTag = false;
+ } else {
+ pc.killTag = false;
+ pc.writeStyleScriptWithTag = false;
+ }
+ }
+
+ void saveComment(StringBuffer s, Writer w, HTMLParseContext pc)
+ throws IOException {
+ if (pc.expectingBadComment)
+ return; // ignore it
+
+ if (pc.inStyle || pc.inScript) {
+ pc.currentStyleScriptChunk += "<" + s + ">";
+ return; // </style> handler should write
+ }
+ if (pc.killTag) {
+ pc.killTag = false;
+ return;
+ }
+ w.write('<');
+ w.write(s.toString());
+ w.write('>');
+ }
+
+ static void throwFilterException(String s) throws DataFilterException {
+ // FIXME
+ throw new DataFilterException(s, s,
+ "The HTML filter failed to parse the page: "+s);
+ }
+
+ static class ParsedTag {
+ String element = null;
+ String[] unparsedAttrs = null;
+ boolean startSlash = false;
+ boolean endSlash = false;
+ /*
+ * public ParsedTag(ParsedTag t) { this.element = t.element;
+ * this.unparsedAttrs = (String[]) t.unparsedAttrs.clone();
+ * this.startSlash = t.startSlash; this.endSlash = t.endSlash; }
+ */
+ public ParsedTag(ParsedTag t, String[] outAttrs) {
+ this.element = t.element;
+ this.unparsedAttrs = outAttrs;
+ this.startSlash = t.startSlash;
+ this.endSlash = t.endSlash;
+ }
+
+ public ParsedTag(Vector v) {
+ int len = v.size();
+ if (len == 0)
+ return;
+ String s = (String) v.elementAt(len - 1);
+ if ((len - 1 != 0 || s.length() > 1) &&
s.endsWith("/")) {
+ s = s.substring(0, s.length() - 1);
+ v.setElementAt(s, len - 1);
+ if (s.length() == 0)
+ len--;
+ endSlash = true;
+ // Don't need to set it back because everything
is an I-value
+ }
+ s = (String) v.elementAt(0);
+ if (s.length() > 1 && s.startsWith("/")) {
+ s = s.substring(1);
+ v.setElementAt(s, 0);
+ startSlash = true;
+ }
+ element = (String) v.elementAt(0);
+ if (len > 1) {
+ unparsedAttrs = new String[len - 1];
+ for (int x = 1; x < len; x++)
+ unparsedAttrs[x - 1] = (String)
v.elementAt(x);
+ }
+ }
+
+ public ParsedTag sanitize(HTMLParseContext pc) throws
DataFilterException {
+ TagVerifier tv =
+ (TagVerifier)
allowedTagsVerifiers.get(element.toLowerCase());
+ if (tv == null) {
+ if (deleteWierdStuff) {
+ return null;
+ } else {
+ String err = "<!-- unknown tag ";
+ boolean safe = true;
+ for (int x = 0; x < element.length();
x++) {
+ if
(!Character.isLetter(element.charAt(x))) {
+ safe = false;
+ break;
+ }
+ }
+ if (safe)
+ err += element + " ";
+ err += "-->";
+ // FIXME: Hmmm, why did we just do all
this, err is not
+ // used beyond this point... (avian)
+ if (!deleteErrors)
+ throwFilterException(
+ "Unknown tag: " +
HTMLEncoder.encode(element));
+ return null;
+ }
+ }
+ return tv.sanitize(this, pc);
+ }
+
+ public String toString() {
+ if (element == null)
+ return null;
+ StringBuffer sb = new StringBuffer("<");
+ if (startSlash)
+ sb.append('/');
+ sb.append(element);
+ if (unparsedAttrs != null) {
+ int n = unparsedAttrs.length;
+ for (int i = 0; i < n; i++) {
+ sb.append(' ').append(unparsedAttrs[i]);
+ }
+ }
+ if (endSlash)
+ sb.append(" /");
+ sb.append('>');
+ return sb.toString();
+ }
+
+ public void write(Writer w) throws IOException {
+ String s = toString();
+ if (s != null)
+ w.write(s);
+ }
+ }
+
+ static final Hashtable allowedTagsVerifiers = new Hashtable();
+ static final String[] emptyStringArray = new String[0];
+
+ static {
+ allowedTagsVerifiers.put("?xml", new XmlTagVerifier());
+ allowedTagsVerifiers.put(
+ "!doctype",
+ new DocTypeTagVerifier("!doctype"));
+ allowedTagsVerifiers.put("html", new HtmlTagVerifier());
+ allowedTagsVerifiers.put(
+ "head",
+ new TagVerifier(
+ "head",
+ new String[] { "id" },
+ new String[] { "profile" }));
+ allowedTagsVerifiers.put(
+ "title",
+ new TagVerifier("title", new String[] { "id" }));
+ allowedTagsVerifiers.put("meta", new MetaTagVerifier());
+ allowedTagsVerifiers.put(
+ "body",
+ new CoreTagVerifier(
+ "body",
+ new String[] { "bgcolor", "text", "link",
"vlink", "alink" },
+ new String[] { "background" },
+ new String[] { "onload", "onunload" }));
+ String[] group =
+ { "div", "h1", "h2", "h3", "h4", "h5", "h6", "p",
"caption" };
+ for (int x = 0; x < group.length; x++)
+ allowedTagsVerifiers.put(
+ group[x],
+ new CoreTagVerifier(
+ group[x],
+ new String[] { "align" },
+ emptyStringArray,
+ emptyStringArray));
+ String[] group2 =
+ {
+ "span",
+ "address",
+ "em",
+ "strong",
+ "dfn",
+ "code",
+ "samp",
+ "kbd",
+ "var",
+ "cite",
+ "abbr",
+ "acronym",
+ "sub",
+ "sup",
+ "dt",
+ "dd",
+ "tt",
+ "i",
+ "b",
+ "big",
+ "small",
+ "strike",
+ "s",
+ "u",
+ "noframes",
+ "fieldset",
+ "noscript",
+ "xmp",
+ "listing",
+ "plaintext",
+ "center",
+ "bdo" };
+ for (int x = 0; x < group2.length; x++)
+ allowedTagsVerifiers.put(
+ group2[x],
+ new CoreTagVerifier(
+ group2[x],
+ emptyStringArray,
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "blockquote",
+ new CoreTagVerifier(
+ "blockquote",
+ emptyStringArray,
+ new String[] { "cite" },
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "q",
+ new CoreTagVerifier(
+ "q",
+ emptyStringArray,
+ new String[] { "cite" },
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "br",
+ new BaseCoreTagVerifier(
+ "br",
+ new String[] { "clear" },
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "pre",
+ new CoreTagVerifier(
+ "pre",
+ new String[] { "width", "xml:space" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "ins",
+ new CoreTagVerifier(
+ "ins",
+ new String[] { "datetime" },
+ new String[] { "cite" },
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "del",
+ new CoreTagVerifier(
+ "del",
+ new String[] { "datetime" },
+ new String[] { "cite" },
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "ul",
+ new CoreTagVerifier(
+ "ul",
+ new String[] { "type", "compact" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "ol",
+ new CoreTagVerifier(
+ "ol",
+ new String[] { "type", "compact", "start" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "li",
+ new CoreTagVerifier(
+ "li",
+ new String[] { "type", "value" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "dl",
+ new CoreTagVerifier(
+ "dl",
+ new String[] { "compact" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "dir",
+ new CoreTagVerifier(
+ "dir",
+ new String[] { "compact" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "menu",
+ new CoreTagVerifier(
+ "menu",
+ new String[] { "compact" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "table",
+ new CoreTagVerifier(
+ "table",
+ new String[] {
+ "summary",
+ "width",
+ "border",
+ "frame",
+ "rules",
+ "cellspacing",
+ "cellpadding",
+ "align",
+ "bgcolor" },
+ new String[] { "background" },
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "thead",
+ new CoreTagVerifier(
+ "thead",
+ new String[] { "align", "char", "charoff",
"valign" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "tfoot",
+ new CoreTagVerifier(
+ "tfoot",
+ new String[] { "align", "char", "charoff",
"valign" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "tbody",
+ new CoreTagVerifier(
+ "tbody",
+ new String[] { "align", "char", "charoff",
"valign" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "colgroup",
+ new CoreTagVerifier(
+ "colgroup",
+ new String[] {
+ "span",
+ "width",
+ "align",
+ "char",
+ "charoff",
+ "valign" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "col",
+ new CoreTagVerifier(
+ "col",
+ new String[] {
+ "span",
+ "width",
+ "align",
+ "char",
+ "charoff",
+ "valign" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "tr",
+ new CoreTagVerifier(
+ "tr",
+ new String[] {
+ "align",
+ "char",
+ "charoff",
+ "valign",
+ "bgcolor" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "th",
+ new CoreTagVerifier(
+ "th",
+ new String[] {
+ "abbr",
+ "axis",
+ "headers",
+ "scope",
+ "rowspan",
+ "colspan",
+ "align",
+ "char",
+ "charoff",
+ "valign",
+ "nowrap",
+ "bgcolor",
+ "width",
+ "height" },
+ new String[] { "background" },
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "td",
+ new CoreTagVerifier(
+ "td",
+ new String[] {
+ "abbr",
+ "axis",
+ "headers",
+ "scope",
+ "rowspan",
+ "colspan",
+ "align",
+ "char",
+ "charoff",
+ "valign",
+ "nowrap",
+ "bgcolor",
+ "width",
+ "height" },
+ new String[] { "background" },
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "a",
+ new LinkTagVerifier(
+ "a",
+ new String[] {
+ "accesskey",
+ "tabindex",
+ "name",
+ "shape",
+ "coords",
+ "target" },
+ emptyStringArray,
+ new String[] { "onfocus", "onblur" }));
+ allowedTagsVerifiers.put(
+ "link",
+ new LinkTagVerifier(
+ "link",
+ new String[] { "media", "target" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "base",
+ new TagVerifier(
+ "base",
+ new String[] { "id", "target" },
+ new String[] { "href" }));
+ allowedTagsVerifiers.put(
+ "img",
+ new CoreTagVerifier(
+ "img",
+ new String[] {
+ "alt",
+ "name",
+ "height",
+ "width",
+ "ismap",
+ "align",
+ "border",
+ "hspace",
+ "vspace" },
+ new String[] { "src", "longdesc", "usemap" },
+ emptyStringArray));
+ // FIXME: object tag -
+ // http://www.w3.org/TR/html4/struct/objects.html#h-13.3
+ // FIXME: param tag -
+ // http://www.w3.org/TR/html4/struct/objects.html#h-13.3.2
+ // applet tag PROHIBITED - we do not support applets (FIXME?)
+ allowedTagsVerifiers.put(
+ "map",
+ new CoreTagVerifier(
+ "map",
+ new String[] { "name" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "area",
+ new CoreTagVerifier(
+ "area",
+ new String[] {
+ "accesskey",
+ "tabindex",
+ "shape",
+ "coords",
+ "nohref",
+ "alt",
+ "target" },
+ new String[] { "href" },
+ new String[] { "onfocus", "onblur" }));
+ allowedTagsVerifiers.put("style", new StyleTagVerifier());
+ allowedTagsVerifiers.put(
+ "font",
+ new BaseCoreTagVerifier(
+ "font",
+ new String[] { "size", "color", "face" },
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "basefont",
+ new BaseCoreTagVerifier(
+ "basefont",
+ new String[] { "size", "color", "face" },
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "hr",
+ new CoreTagVerifier(
+ "hr",
+ new String[] { "align", "noshade", "size",
"width" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "frameset",
+ new CoreTagVerifier(
+ "frameset",
+ new String[] { "rows", "cols" },
+ emptyStringArray,
+ new String[] { "onload", "onunload" },
+ false));
+ allowedTagsVerifiers.put(
+ "frame",
+ new BaseCoreTagVerifier(
+ "frame",
+ new String[] {
+ "name",
+ "frameborder",
+ "marginwidth",
+ "marginheight",
+ "noresize",
+ "scrolling" },
+ new String[] { "longdesc", "src" }));
+ allowedTagsVerifiers.put(
+ "iframe",
+ new BaseCoreTagVerifier(
+ "iframe",
+ new String[] {
+ "name",
+ "frameborder",
+ "marginwidth",
+ "marginheight",
+ "scrolling",
+ "align",
+ "height",
+ "width" },
+ new String[] { "longdesc", "src" }));
+
+ // FIXME no form support for now; when we have NIM posting
support, reinstate, and
+ // LIMIT TO METHOD=GET !!!
+
+// allowedTagsVerifiers.put(
+// "form",
+// new CoreTagVerifier(
+// "form",
+// new String[] {
+// "method",
+// "name",
+// "enctype",
+// "accept",
+// "accept-charset",
+// "target" },
+// new String[] { "action" },
+// new String[] { "onsubmit", "onreset" }));
+ allowedTagsVerifiers.put(
+ "input",
+ new CoreTagVerifier(
+ "input",
+ new String[] {
+ "accesskey",
+ "tabindex",
+ "type",
+ "name",
+ "value",
+ "checked",
+ "disabled",
+ "readonly",
+ "size",
+ "maxlength",
+ "alt",
+ "ismap",
+ "accept",
+ "align" },
+ new String[] { "src", "usemap" },
+ new String[] { "onfocus", "onblur", "onselect",
"onchange" }));
+ allowedTagsVerifiers.put(
+ "button",
+ new CoreTagVerifier(
+ "button",
+ new String[] {
+ "accesskey",
+ "tabindex",
+ "name",
+ "value",
+ "type",
+ "disabled" },
+ emptyStringArray,
+ new String[] { "onfocus", "onblur" }));
+ allowedTagsVerifiers.put(
+ "select",
+ new CoreTagVerifier(
+ "select",
+ new String[] {
+ "name",
+ "size",
+ "multiple",
+ "disabled",
+ "tabindex" },
+ emptyStringArray,
+ new String[] { "onfocus", "onblur", "onchange"
}));
+ allowedTagsVerifiers.put(
+ "optgroup",
+ new CoreTagVerifier(
+ "optgroup",
+ new String[] { "disabled", "label" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "option",
+ new CoreTagVerifier(
+ "option",
+ new String[] { "selected", "disabled", "label",
"value" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "textarea",
+ new CoreTagVerifier(
+ "textarea",
+ new String[] {
+ "accesskey",
+ "tabindex",
+ "name",
+ "rows",
+ "cols",
+ "disabled",
+ "readonly" },
+ emptyStringArray,
+ new String[] { "onfocus", "onblur", "onselect",
"onchange" }));
+ allowedTagsVerifiers.put(
+ "isindex",
+ new BaseCoreTagVerifier(
+ "isindex",
+ new String[] { "prompt" },
+ emptyStringArray));
+ allowedTagsVerifiers.put(
+ "label",
+ new CoreTagVerifier(
+ "label",
+ new String[] { "for", "accesskey" },
+ emptyStringArray,
+ new String[] { "onfocus", "onblur" }));
+ allowedTagsVerifiers.put(
+ "legend",
+ new CoreTagVerifier(
+ "legend",
+ new String[] { "accesskey", "align" },
+ emptyStringArray,
+ emptyStringArray));
+ allowedTagsVerifiers.put("script", new ScriptTagVerifier());
+ }
+
+ static class TagVerifier {
+ final String tag;
+ final HashSet allowedAttrs;
+ final HashSet uriAttrs;
+
+ TagVerifier(String tag, String[] allowedAttrs) {
+ this(tag, allowedAttrs, null);
+ }
+
+ TagVerifier(String tag, String[] allowedAttrs, String[]
uriAttrs) {
+ this.tag = tag;
+ this.allowedAttrs = new HashSet();
+ if (allowedAttrs != null) {
+ for (int x = 0; x < allowedAttrs.length; x++)
+ this.allowedAttrs.add(allowedAttrs[x]);
+ }
+ this.uriAttrs = new HashSet();
+ if (uriAttrs != null) {
+ for (int x = 0; x < uriAttrs.length; x++)
+ this.uriAttrs.add(uriAttrs[x]);
+ }
+ }
+
+ ParsedTag sanitize(ParsedTag t, HTMLParseContext pc) throws
DataFilterException {
+ Hashtable h = new Hashtable();
+ boolean equals = false;
+ String prevX = "";
+ if (t.unparsedAttrs != null)
+ for (int i = 0; i < t.unparsedAttrs.length;
i++) {
+ String s = t.unparsedAttrs[i];
+ if (equals) {
+ equals = false;
+ s = stripQuotes(s);
+ h.remove(prevX);
+ h.put(prevX, s);
+ prevX = "";
+ } else {
+ int idx = s.indexOf('=');
+ if (idx == s.length() - 1) {
+ equals = true;
+ if (idx == 0) {
+ // prevX
already set
+ } else {
+ prevX =
s.substring(0, s.length() - 1);
+ prevX =
prevX.toLowerCase();
+ }
+ } else if (idx > -1) {
+ String x =
s.substring(0, idx);
+ if (x.length() == 0)
+ x = prevX;
+ x = x.toLowerCase();
+ String y;
+ if (idx == s.length() -
1)
+ y = "";
+ else
+ y =
s.substring(idx + 1, s.length());
+ y = stripQuotes(y);
+ h.remove(x);
+ h.put(x, y);
+ prevX = x;
+ } else {
+ h.remove(s);
+ h.put(s, new Object());
+ prevX = s;
+ }
+ }
+ }
+ h = sanitizeHash(h, t, pc);
+ if (h == null)
+ return null;
+ if (t.startSlash)
+ return new ParsedTag(t, null);
+ String[] outAttrs = new String[h.size()];
+ int i = 0;
+ for (Enumeration e = h.keys(); e.hasMoreElements();) {
+ String x = (String) e.nextElement();
+ Object o = h.get(x);
+ String y;
+ if (o instanceof String)
+ y = (String) o;
+ else
+ y = null;
+ String out = x;
+ if (y != null)
+ out += "=\"" + y + '"';
+ outAttrs[i++] = out;
+ }
+ return new ParsedTag(t, outAttrs);
+ }
+
+ Hashtable sanitizeHash(
+ Hashtable h,
+ ParsedTag p,
+ HTMLParseContext pc) throws DataFilterException {
+ Hashtable hn = new Hashtable();
+ for (Enumeration e = h.keys(); e.hasMoreElements();) {
+ String x = (String) e.nextElement();
+ Object o = h.get(x);
+ // Straight attribs
+ if (allowedAttrs.contains(x)) {
+ hn.put(x, o);
+ continue;
+ }
+ if (uriAttrs.contains(x)) {
+ // URI
+ if (o instanceof String) {
+ // Java's URL handling doesn't
seem suitable
+ String uri = (String) o;
+ uri = HTMLDecoder.decode(uri);
+ uri = sanitizeURI(uri, null,
null, pc.cb);
+ if (uri != null) {
+ uri =
HTMLEncoder.encode(uri);
+ hn.put(x, uri);
+ }
+ }
+ // FIXME: rewrite absolute URLs, handle
?date= etc
+ }
+ }
+ // lang, xml:lang and dir can go on anything
+ // lang or xml:lang = language [ "-" country [ "-"
variant ] ]
+ // The variant can be just about anything; no way to
test (avian)
+ String s = getHashString(h, "lang");
+ if (s != null)
+ hn.put("lang", s);
+ s = getHashString(h, "xml:lang");
+ if (s != null)
+ hn.put("xml:lang", s);
+ s = getHashString(h, "dir");
+ if (s != null
+ && (s.equalsIgnoreCase("ltr") ||
s.equalsIgnoreCase("rtl")))
+ hn.put("dir", s);
+ return hn;
+ }
+ }
+
+ static String stripQuotes(String s) {
+ final String quotes = "\"'";
+ if (s.length() >= 2) {
+ int n = quotes.length();
+ for (int x = 0; x < n; x++) {
+ char cc = quotes.charAt(x);
+ if (s.charAt(0) == cc && s.charAt(s.length() -
1) == cc) {
+ if (s.length() > 2)
+ s = s.substring(1, s.length() -
1);
+ else
+ s = "";
+ break;
+ }
+ }
+ }
+ return s;
+ }
+
+ // static String[] titleString = new String[] {"title"};
+
+ static abstract class ScriptStyleTagVerifier extends TagVerifier {
+ ScriptStyleTagVerifier(
+ String tag,
+ String[] allowedAttrs,
+ String[] uriAttrs) {
+ super(tag, allowedAttrs, uriAttrs);
+ }
+
+ abstract void setStyle(boolean b, HTMLParseContext pc);
+
+ abstract boolean getStyle(HTMLParseContext pc);
+
+ abstract void processStyle(HTMLParseContext pc);
+
+ Hashtable sanitizeHash(
+ Hashtable h,
+ ParsedTag p,
+ HTMLParseContext pc) throws DataFilterException {
+ Hashtable hn = super.sanitizeHash(h, p, pc);
+ if (p.startSlash) {
+ return finish(h, hn, pc);
+ } else {
+ return start(h, hn, pc);
+ }
+ }
+
+ Hashtable finish(
+ Hashtable h,
+ Hashtable hn,
+ HTMLParseContext pc) throws DataFilterException {
+ // Finishing
+ pc.styleScriptRecurseCount--;
+ if (pc.styleScriptRecurseCount < 0) {
+ if (deleteErrors)
+ pc.writeAfterTag
+ += "<!-- Too many nested style
or script tags - ambiguous or invalid parsing -->";
+ else
+ throwFilterException("Too many nested
</style> tags - ambiguous or invalid parsing, can't reliably filter so removing
the inner tags - garbage may appear in browser");
+ return null;
+ }
+ setStyle(false, pc);
+ processStyle(pc);
+ pc.expectingBadComment = false;
+ pc.writeStyleScriptWithTag = true;
+ // Pass it on, no params for </style>
+ return hn;
+ }
+
+ Hashtable start(Hashtable h, Hashtable hn, HTMLParseContext pc)
throws DataFilterException {
+ pc.styleScriptRecurseCount++;
+ if (pc.styleScriptRecurseCount > 1) {
+ if (deleteErrors)
+ pc.writeAfterTag
+ += "<!-- Too many nested style
or script tags -->";
+ else
+ throwFilterException("Too many nested
</style> tags - ambiguous or invalid parsing, can't reliably filter so removing
the inner tags - garbage may appear in browser");
+ return null;
+ }
+ setStyle(true, pc);
+ String type = getHashString(h, "type");
+ if (type != null) {
+ if (!type.equalsIgnoreCase("text/css") /* FIXME
*/
+ ) {
+ pc.killText = true;
+ pc.expectingBadComment = true;
+ return null; // kill the tag
+ }
+ hn.put("type", "text/css");
+ }
+ return hn;
+ }
+ }
+
+ static class StyleTagVerifier extends ScriptStyleTagVerifier {
+ StyleTagVerifier() {
+ super(
+ "style",
+ new String[] { "id", "media", "title",
"xml:space" },
+ emptyStringArray);
+ }
+
+ void setStyle(boolean b, HTMLParseContext pc) {
+ pc.inStyle = b;
+ }
+
+ boolean getStyle(HTMLParseContext pc) {
+ return pc.inStyle;
+ }
+
+ void processStyle(HTMLParseContext pc) {
+ try {
+ pc.currentStyleScriptChunk =
+
sanitizeStyle(pc.currentStyleScriptChunk, pc.cb);
+ } catch (DataFilterException e) {
+ Logger.error(this, "Error parsing style: "+e,
e);
+ pc.currentStyleScriptChunk = "";
+ }
+ }
+ }
+
+ static class ScriptTagVerifier extends ScriptStyleTagVerifier {
+ ScriptTagVerifier() {
+ super(
+ "script",
+ new String[] {
+ "id",
+ "charset",
+ "type",
+ "language",
+ "defer",
+ "xml:space" },
+ new String[] { "src" });
+ /*
+ * FIXME: src not supported type ignored (we will need
to check
+ * this when if/when we support scripts charset ignored
+ */
+ }
+
+ Hashtable sanitizeHash(
+ Hashtable hn,
+ ParsedTag p,
+ HTMLParseContext pc) {
+ //Hashtable h = super.sanitizeHash(hn, p, pc);
+ return null; // Lose the tags
+ }
+
+ void setStyle(boolean b, HTMLParseContext pc) {
+ pc.inScript = b;
+ }
+
+ boolean getStyle(HTMLParseContext pc) {
+ return pc.inScript;
+ }
+
+ void processStyle(HTMLParseContext pc) {
+ pc.currentStyleScriptChunk =
+ sanitizeScripting(pc.currentStyleScriptChunk);
+ }
+ }
+
+ static class BaseCoreTagVerifier extends TagVerifier {
+ BaseCoreTagVerifier(
+ String tag,
+ String[] allowedAttrs,
+ String[] uriAttrs) {
+ super(tag, allowedAttrs, uriAttrs);
+ }
+
+ Hashtable sanitizeHash(
+ Hashtable h,
+ ParsedTag p,
+ HTMLParseContext pc) throws DataFilterException {
+ Hashtable hn = super.sanitizeHash(h, p, pc);
+ // %i18n dealt with by TagVerifier
+ // %coreattrs
+ String id = getHashString(h, "id");
+ if (id != null) {
+ hn.put("id", id);
+ // hopefully nobody will be stupid enough to
encode URLs into
+ // the unique ID... :)
+ }
+ String classNames = getHashString(h, "class");
+ if (classNames != null) {
+ hn.put("class", classNames);
+ // ditto
+ }
+ String style = getHashString(h, "style");
+ if (style != null) {
+ style = sanitizeStyle(style, pc.cb);
+ if (style != null)
+ style = escapeQuotes(style);
+ if (style != null)
+ hn.put("style", style);
+ }
+ String title = getHashString(h, "title");
+ if (title != null) {
+ // PARANOIA: title is PLAIN TEXT, right? In all
user agents? :)
+ hn.put("title", title);
+ }
+ return hn;
+ }
+ }
+
+ static class CoreTagVerifier extends BaseCoreTagVerifier {
+ final HashSet eventAttrs;
+ static final String[] stdEvents =
+ new String[] {
+ "onclick",
+ "ondblclick",
+ "onmousedown",
+ "onmouseup",
+ "onmouseover",
+ "onmousemove",
+ "onmouseout",
+ "onkeypress",
+ "onkeydown",
+ "onkeyup" };
+
+ CoreTagVerifier(
+ String tag,
+ String[] allowedAttrs,
+ String[] uriAttrs,
+ String[] eventAttrs) {
+ this(tag, allowedAttrs, uriAttrs, eventAttrs, true);
+ }
+
+ CoreTagVerifier(
+ String tag,
+ String[] allowedAttrs,
+ String[] uriAttrs,
+ String[] eventAttrs,
+ boolean addStdEvents) {
+ super(tag, allowedAttrs, uriAttrs);
+ this.eventAttrs = new HashSet();
+ if (eventAttrs != null) {
+ for (int x = 0; x < eventAttrs.length; x++)
+ this.eventAttrs.add(eventAttrs[x]);
+ }
+ if (addStdEvents) {
+ for (int x = 0; x < stdEvents.length; x++)
+ this.eventAttrs.add(stdEvents[x]);
+ }
+ }
+
+ Hashtable sanitizeHash(
+ Hashtable h,
+ ParsedTag p,
+ HTMLParseContext pc) throws DataFilterException {
+ Hashtable hn = super.sanitizeHash(h, p, pc);
+ // events (default and added)
+ for (Iterator e = eventAttrs.iterator(); e.hasNext();) {
+ String name = (String) e.next();
+ String arg = getHashString(h, name);
+ if (arg != null) {
+ arg = sanitizeScripting(arg);
+ if (arg != null)
+ hn.put(name, arg);
+ }
+ }
+ return hn;
+ }
+ }
+
+ static class LinkTagVerifier extends CoreTagVerifier {
+ LinkTagVerifier(
+ String tag,
+ String[] allowedAttrs,
+ String[] uriAttrs,
+ String[] eventAttrs) {
+ super(tag, allowedAttrs, uriAttrs, eventAttrs);
+ }
+
+ Hashtable sanitizeHash(
+ Hashtable h,
+ ParsedTag p,
+ HTMLParseContext pc) throws DataFilterException {
+ Hashtable hn = super.sanitizeHash(h, p, pc);
+ String hreflang = getHashString(h, "hreflang");
+ String charset = null;
+ String type = getHashString(h, "type");
+ if (type != null) {
+ String[] typesplit = splitType(type);
+ type = typesplit[0];
+ if (typesplit[1] != null &&
typesplit[1].length() > 0)
+ charset = typesplit[1];
+ Logger.debug(
+ this,
+ "Processing link tag, type="
+ + type
+ + ", charset="
+ + charset);
+ }
+ String c = getHashString(h, "charset");
+ if (c != null)
+ charset = c;
+ String href = getHashString(h, "href");
+ if (href != null) {
+ final String[] rels = new String[] { "rel",
"rev" };
+ for (int x = 0; x < rels.length; x++) {
+ String reltype = rels[x];
+ String rel = getHashString(h, reltype);
+ if (rel != null) {
+ StringTokenizer tok = new
StringTokenizer(rel, " ");
+ while (tok.hasMoreTokens()) {
+ String t =
tok.nextToken();
+ if
(t.equalsIgnoreCase("alternate")
+ ||
t.equalsIgnoreCase("stylesheet")) {
+ // FIXME:
hardcoding text/css
+ type =
"text/css";
+ } // FIXME: do we want
to do anything with the
+ // other possible rel's?
+ }
+ hn.put(reltype, rel);
+ }
+ }
+ //
Core.logger.log(this, "Sanitizing URI: "+href+" with type "+
+ // type+"
and charset "+charset,
+ //
Logger.DEBUG);
+ href = HTMLDecoder.decode(href);
+ href = sanitizeURI(href, type, charset, pc.cb);
+ if (href != null) {
+ href = HTMLEncoder.encode(href);
+ hn.put("href", href);
+ if (type != null)
+ hn.put("type", type);
+ if (charset != null)
+ hn.put("charset", charset);
+ if (charset != null && hreflang != null)
+ hn.put("hreflang", hreflang);
+ }
+ }
+ // FIXME: allow these if the charset and encoding are
encoded into
+ // the URL
+ // FIXME: link types -
+ // http://www.w3.org/TR/html4/types.html#type-links -
the
+ // stylesheet stuff, primarily - rel and rev properties
- parse
+ // these, use same fix as above (browser may assume
text/css for
+ // anything linked as a stylesheet)
+ return hn;
+ }
+ }
+
+ static class MetaTagVerifier extends TagVerifier {
+ MetaTagVerifier() {
+ super("meta", new String[] { "id" });
+ }
+
+ Hashtable sanitizeHash(
+ Hashtable h,
+ ParsedTag p,
+ HTMLParseContext pc,
+ int linkHtl) throws DataFilterException {
+ Hashtable hn = super.sanitizeHash(h, p, pc);
+ /*
+ * Several possibilities: a) meta http-equiv=X
content=Y b) meta
+ * name=X content=Y
+ */
+ String http_equiv = getHashString(h, "http-equiv");
+ String name = getHashString(h, "name");
+ String content = getHashString(h, "content");
+ String scheme = getHashString(h, "scheme");
+ if (content != null) {
+ if (name != null && http_equiv == null) {
+ if (name.equalsIgnoreCase("Author")) {
+ hn.put("name", name);
+ hn.put("content", content);
+ } else if
(name.equalsIgnoreCase("Keywords")) {
+ hn.put("name", name);
+ hn.put("content", content);
+ } else if
(name.equalsIgnoreCase("Description")) {
+ hn.put("name", name);
+ hn.put("content", content);
+ }
+ } else if (http_equiv != null && name == null) {
+ if
(http_equiv.equalsIgnoreCase("Expires")) {
+ hn.put("http-equiv",
http_equiv);
+ hn.put("content", content);
+ } else if (
+
http_equiv.equalsIgnoreCase("Content-Script-Type")) {
+ // We don't support script at
this time.
+ } else if (
+
http_equiv.equalsIgnoreCase("Content-Style-Type")) {
+ // FIXME: charsets
+ if
(content.equalsIgnoreCase("text/css")) {
+ // FIXME: selectable
style languages - only matters
+ // when we have
implemented more than one
+ // FIXME: if we ever do
allow it... the spec
+ //
http://www.w3.org/TR/html4/present/styles.html#h-14.2.1
+ // says only the last
definition counts...
+ // but it only
counts if it's in the HEAD section,
+ // so we DONT need to
parse the whole doc
+ hn.put("http-equiv",
http_equiv);
+ hn.put("content",
content);
+ }
+ // FIXME: add some more headers
- Dublin Core?
+ } else if
(http_equiv.equalsIgnoreCase("Content-Type")) {
+ String[] typesplit =
splitType(content);
+ if
(typesplit[0].equalsIgnoreCase("text/html")
+ && (typesplit[1] == null
+ ||
typesplit[1].equalsIgnoreCase(pc.charset))) {
+ hn.put("http-equiv",
http_equiv);
+ hn.put(
+ "content",
+ typesplit[0]
+ +
(typesplit[1] != null
+
? "; charset=" + typesplit[1]
+
: ""));
+ }
+ if(typesplit[1] != null)
+ pc.detectedCharset =
typesplit[1];
+ } else if (
+
http_equiv.equalsIgnoreCase("Content-Language")) {
+ hn.put("http-equiv",
"Content-Language");
+ hn.put("content", content);
+ }
+ }
+ }
+ if (hn.isEmpty())
+ return null;
+ return hn;
+ }
+ }
+
+ static class DocTypeTagVerifier extends TagVerifier {
+ DocTypeTagVerifier(String tag) {
+ super(tag, null);
+ }
+
+ static final Hashtable DTDs = new Hashtable();
+
+ static {
+ DTDs.put(
+ "-//W3C//DTD XHTML 1.0 Strict//EN",
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
+ DTDs.put(
+ "-//W3C//DTD XHTML 1.0 Transitional//EN",
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd");
+ DTDs.put(
+ "-//W3C//DTD XHTML 1.0 Frameset//EN",
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd");
+ DTDs.put(
+ "-//W3C//DTD HTML 4.01//EN",
+ "http://www.w3.org/TR/html4/strict.dtd");
+ DTDs.put(
+ "-//W3C//DTD HTML 4.01 Transitional//EN",
+ "http://www.w3.org/TR/html4/loose.dtd");
+ DTDs.put(
+ "-//W3C//DTD HTML 4.01 Frameset//EN",
+ "http://www.w3.org/TR/html4/frameset.dtd");
+ DTDs.put("-//W3C//DTD HTML 3.2 Final//EN", new
Object());
+ }
+
+ ParsedTag sanitize(ParsedTag t, HTMLParseContext pc) {
+ if (!(t.unparsedAttrs.length == 3 ||
t.unparsedAttrs.length == 4))
+ return null;
+ if (!t.unparsedAttrs[0].equalsIgnoreCase("html"))
+ return null;
+ if (!t.unparsedAttrs[1].equalsIgnoreCase("public"))
+ return null;
+ String s = stripQuotes(t.unparsedAttrs[2]);
+ if (!DTDs.containsKey(s))
+ return null;
+ if (t.unparsedAttrs.length == 4) {
+ String ss = stripQuotes(t.unparsedAttrs[3]);
+ String spec = getHashString(DTDs, s);
+ if (spec != null && !spec.equals(ss))
+ return null;
+ }
+ return t;
+ }
+ }
+
+ static class XmlTagVerifier extends TagVerifier {
+ XmlTagVerifier() {
+ super("?xml", null);
+ }
+
+ ParsedTag sanitize(ParsedTag t, HTMLParseContext pc) {
+ if (t.unparsedAttrs.length != 2)
+ return null;
+ if (!t.unparsedAttrs[0].equals("version=\"1.0\""))
+ return null;
+ if (!t.unparsedAttrs[1].startsWith("encoding=\"")
+ && !t.unparsedAttrs[1].endsWith("\"?"))
+ return null;
+ if (!t
+ .unparsedAttrs[1]
+ .substring(10, t.unparsedAttrs[1].length() - 2)
+ .equalsIgnoreCase(pc.charset))
+ return null;
+ return t;
+ }
+ }
+
+ static class HtmlTagVerifier extends TagVerifier {
+ HtmlTagVerifier() {
+ super("html", new String[] { "id", "version" });
+ }
+
+ Hashtable sanitizeHash(
+ Hashtable h,
+ ParsedTag p,
+ HTMLParseContext pc,
+ int linkHtl) throws DataFilterException {
+ Hashtable hn = super.sanitizeHash(h, p, pc);
+ String xmlns = getHashString(h, "xmlns");
+ if (xmlns != null &&
xmlns.equals("http://www.w3.org/1999/xhtml"))
+ hn.put("xmlns", xmlns);
+ return hn;
+ }
+ }
+
+ static String sanitizeStyle(String style, FilterCallback cb) throws
DataFilterException {
+ Logger.debug(
+ HTMLFilter.class,
+ "Sanitizing style: " + style);
+ Reader r = new StringReader(style);
+ Writer w = new StringWriter();
+ CSSParser pc = new CSSParser(r, w, false, cb);
+ try {
+ pc.parse();
+ } catch (IOException e) {
+ Logger.error(
+ HTMLFilter.class,
+ "IOException parsing inline CSS!");
+ } catch (Error e) {
+ if (e.getMessage().equals("Error: could not match
input")) {
+ // this sucks, it should be a proper exception
+ Logger.normal(
+ HTMLFilter.class,
+ "CSS Parse Error!",
+ e);
+ return "/* Could not match input style */";
+ } else
+ throw e;
+ }
+ String s = w.toString();
+ if (s == null || s.length() == 0)
+ return null;
+ // Core.logger.log(SaferFilter.class, "Style now:
" + s, Logger.DEBUG);
+ Logger.debug(HTMLFilter.class, "Style finally: " + s);
+ return s;
+ }
+
+ static String escapeQuotes(String s) {
+ StringBuffer buf = new StringBuffer(s.length());
+ for (int x = 0; x < s.length(); x++) {
+ char c = s.charAt(x);
+ if (c == '\"') {
+ buf.append(""");
+ } else {
+ buf.append(c);
+ }
+ }
+ return buf.toString();
+ }
+
+ static String sanitizeScripting(String script) {
+ // Kill it. At some point we may want to allow certain recipes
- FIXME
+ return null;
+ }
+
+ static String sanitizeURI(String uri, FilterCallback cb) {
+ return sanitizeURI(uri, null, null, cb);
+ }
+
+ /*
+ * While we're only interested in the type and the charset, the format
is a
+ * lot more flexible than that. (avian) TEXT/PLAIN; format=flowed;
+ * charset=US-ASCII IMAGE/JPEG; name=test.jpeg; x-unix-mode=0644
+ */
+ static String[] splitType(String type) {
+ StringFieldParser sfp;
+ String charset = null, param, name, value;
+ int x;
+
+ sfp = new StringFieldParser(type, ';');
+ type = sfp.nextField().trim();
+ while (sfp.hasMoreFields()) {
+ param = sfp.nextField();
+ x = param.indexOf('=');
+ if (x != -1) {
+ name = param.substring(0, x).trim();
+ value = param.substring(x + 1).trim();
+ if (name.equals("charset"))
+ charset = value;
+ }
+ }
+ return new String[] { type, charset };
+ }
+
+ // A simple string splitter
+ // StringTokenizer doesn't work well for our purpose. (avian)
+ static class StringFieldParser {
+ private String str;
+ private int maxPos, curPos;
+ private char c;
+
+ public StringFieldParser(String str) {
+ this(str, '\t');
+ }
+
+ public StringFieldParser(String str, char c) {
+ this.str = str;
+ this.maxPos = str.length();
+ this.curPos = 0;
+ this.c = c;
+ }
+
+ public boolean hasMoreFields() {
+ return curPos <= maxPos;
+ }
+
+ public String nextField() {
+ int start, end;
+
+ if (curPos > maxPos)
+ return null;
+ start = curPos;
+ while (curPos < maxPos && str.charAt(curPos) != c)
+ curPos++;
+ end = curPos;
+ curPos++;
+ return str.substring(start, end);
+ }
+ }
+
+ static String sanitizeURI(
+ String suri,
+ String overrideType,
+ String overrideCharset,
+ FilterCallback cb) {
+ if(overrideCharset != null && overrideCharset.length() > 0)
+ overrideType += ";charset="+overrideCharset;
+ return cb.processURI(suri, overrideType);
+ }
+
+ static String getHashString(Hashtable h, String key) {
+ Object o = h.get(key);
+ if (o == null)
+ return null;
+ if (o instanceof String)
+ return (String) o;
+ else
+ return null;
+ }
+
+}
Added:
trunk/freenet/src/freenet/clients/http/filter/KnownUnsafeContentTypeException.java
===================================================================
---
trunk/freenet/src/freenet/clients/http/filter/KnownUnsafeContentTypeException.java
2006-03-18 14:52:17 UTC (rev 8274)
+++
trunk/freenet/src/freenet/clients/http/filter/KnownUnsafeContentTypeException.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,48 @@
+package freenet.clients.http.filter;
+
+public class KnownUnsafeContentTypeException extends
UnsafeContentTypeException {
+
+ MIMEType type;
+
+ public KnownUnsafeContentTypeException(MIMEType type) {
+ this.type = type;
+ }
+
+ public String getExplanation() {
+ StringBuffer sb = new StringBuffer();
+ sb.append("<p><b>");
+ sb.append(type.readDescription);
+ sb.append("</b></p>\n" +
+ "<p>This is a potentially dangerous MIME type. If the
node lets it through, your browser may " +
+ "do bad things leading to compromize of your anonymity,
and your IP address being exposed in "+
+ "connection with this page. In particular:<ul>");
+ if(type.dangerousInlines)
+ sb.append("<li><font color=\"red\"><b>Dangerous
inlines:</b></font> This type of content can contain inline images or "+
+ "videos, and can therefore load content
from the non-anonymous open Web, exposing your "+
+ "IP address.</li>");
+ if(type.dangerousLinks)
+ sb.append("<li><font color=\"red\"><b>Dangerous
links:</b></font> This type of content can contain links to the "+
+ "non-anonymous Web; if you click on
them (and they may be disguised), this may expose "+
+ "your IP address.</li>");
+ if(type.dangerousScripting)
+ sb.append("<li><font color=\"red\"><b>Dangerous
scripting:</b></font> This type of content can contain dangerous scripts "+
+ "which when executed may compromize
your anonymity by connecting to the open Web or "+
+ "otherwise breach security.</li>");
+ if(type.dangerousReadMetadata)
+ sb.append("<li><font color=\"red\"><b>Dangerous
metadata:</b></font> This type of content can contain metadata which may "+
+ "be displayed by some browsers or other
software, which may contain dangerous links or inlines.</li>");
+
+ sb.append("</ul>Since there is no built-in filter for this
data, you should take the utmost of care!");
+
+ return sb.toString();
+ }
+
+ public String getHTMLEncodedTitle() {
+ return "Known dangerous type: "+type.primaryMimeType;
+ }
+
+ public String getRawTitle() {
+ return "Known dangerous type: "+type.primaryMimeType;
+ }
+
+}
Modified: trunk/freenet/src/freenet/clients/http/filter/MIMEType.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/MIMEType.java 2006-03-18
14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/MIMEType.java 2006-03-18
15:18:54 UTC (rev 8275)
@@ -5,49 +5,49 @@
*/
public class MIMEType {
- final String primaryMimeType;
- final String[] alternateMimeTypes;
+ public final String primaryMimeType;
+ public final String[] alternateMimeTypes;
- final String primaryExtension;
- final String[] alternateExtensions;
+ public final String primaryExtension;
+ public final String[] alternateExtensions;
/** Is the data safe to read as-is? This is true for text/plain. */
- final boolean safeToRead;
+ public final boolean safeToRead;
/** Is the data safe to write as-is? */
- final boolean safeToWrite;
+ public final boolean safeToWrite;
/** Content filter to make data safe to read */
- final ContentDataFilter readFilter;
+ public final ContentDataFilter readFilter;
/** Content filter to make data safe to write */
- final ContentDataFilter writeFilter;
+ public final ContentDataFilter writeFilter;
// Detail. Not necessarily an exhaustive list.
- final boolean dangerousLinks;
+ public final boolean dangerousLinks;
- final boolean dangerousInlines;
+ public final boolean dangerousInlines;
- final boolean dangerousScripting;
+ public final boolean dangerousScripting;
- final boolean dangerousReadMetadata;
+ public final boolean dangerousReadMetadata;
- final boolean dangerousWriteMetadata;
+ public final boolean dangerousWriteMetadata;
- final boolean dangerousToWriteEvenWithFilter;
+ public final boolean dangerousToWriteEvenWithFilter;
// These are in addition to the above
- final String readDescription;
+ public final String readDescription;
- final String writeDescription;
+ public final String writeDescription;
- final boolean takesACharset;
+ public final boolean takesACharset;
- final String defaultCharset;
+ public final String defaultCharset;
- final CharsetExtractor charsetExtractor;
+ public final CharsetExtractor charsetExtractor;
MIMEType(String type, String ext, String[] extraTypes, String[]
extraExts,
boolean safeToRead, boolean safeToWrite,
ContentDataFilter readFilter,
@@ -76,4 +76,11 @@
this.defaultCharset = defaultCharset;
this.charsetExtractor = charsetExtractor;
}
+
+ /**
+ * Throw an exception indicating that this is a dangerous content type.
+ */
+ public void throwUnsafeContentTypeException() throws
KnownUnsafeContentTypeException {
+ throw new KnownUnsafeContentTypeException(this);
+ }
}
Added: trunk/freenet/src/freenet/clients/http/filter/NullFilterCallback.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/NullFilterCallback.java
2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/NullFilterCallback.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,18 @@
+package freenet.clients.http.filter;
+
+
+public class NullFilterCallback implements FilterCallback {
+
+ public boolean allowGetForms() {
+ return false;
+ }
+
+ public boolean allowPostForms() {
+ return false;
+ }
+
+ public String processURI(String uri, String overrideType) {
+ return null;
+ }
+
+}
Added:
trunk/freenet/src/freenet/clients/http/filter/UnknownContentTypeException.java
===================================================================
---
trunk/freenet/src/freenet/clients/http/filter/UnknownContentTypeException.java
2006-03-18 14:52:17 UTC (rev 8274)
+++
trunk/freenet/src/freenet/clients/http/filter/UnknownContentTypeException.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,37 @@
+package freenet.clients.http.filter;
+
+import freenet.support.HTMLEncoder;
+
+public class UnknownContentTypeException extends UnsafeContentTypeException {
+
+ final String type;
+ final String encodedType;
+
+ public UnknownContentTypeException(String typeName) {
+ this.type = typeName;
+ encodedType = HTMLEncoder.encode(type);
+ }
+
+ public String getType() {
+ return type;
+ }
+
+ public String getHTMLEncodedTitle() {
+ return "Unknown and potentially dangerous content type:
"+encodedType;
+ }
+
+ public String getRawTitle() {
+ return "Unknown and potentially dangerous content type: "+type;
+ }
+
+ public String getExplanation() {
+ return "<p>Your Freenet node does not know anything about this
MIME type. " +
+ "This means that your browser might do
something dangerous in response " +
+ "to downloading this file. For example, many
formats can contain embedded images " +
+ "or videos, which are downloaded from the web;
this is by no means innocuous, " +
+ "because they can ruin your anonymity and
expose your IP address (if the attacker " +
+ "runs the web site or has access to its logs).
Hyperlinks to the Web can also be a " +
+ "threat, for much the same reason, as can
scripting, for this and other reasons.</p>";
+ }
+
+}
Added:
trunk/freenet/src/freenet/clients/http/filter/UnsafeContentTypeException.java
===================================================================
---
trunk/freenet/src/freenet/clients/http/filter/UnsafeContentTypeException.java
2006-03-18 14:52:17 UTC (rev 8274)
+++
trunk/freenet/src/freenet/clients/http/filter/UnsafeContentTypeException.java
2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,26 @@
+package freenet.clients.http.filter;
+
+/**
+ * Thrown by the filter when it cannot guarantee the safety of the data,
because it is an unknown type,
+ * because it cannot be filtered, or because we do not know how to filter it.
+ *
+ * Base class for UnknownContentTypeException and
KnownUnsafeContentTypeException.
+ */
+public abstract class UnsafeContentTypeException extends Exception {
+
+ /**
+ * Get the contents of the error page.
+ */
+ public abstract String getExplanation();
+
+ /**
+ * Get the title of the error page.
+ */
+ public abstract String getHTMLEncodedTitle();
+
+ /**
+ * Get the raw title of the error page. (May be unsafe for HTML).
+ */
+ public abstract String getRawTitle();
+
+}
Added: trunk/freenet/src/freenet/clients/http/filter/Yytoken.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/Yytoken.java 2006-03-18
14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/Yytoken.java 2006-03-18
15:18:54 UTC (rev 8275)
@@ -0,0 +1,32 @@
+package freenet.clients.http.filter;
+
+/**
+ * Parsing token.
+ *
+ * @author devrandom at hyper.to
+ */
+
+class Yytoken {
+ public int m_index;
+ public String m_text;
+ public int m_line;
+ public int m_charBegin;
+ public int m_charEnd;
+
+ Yytoken (int index, String text, int line, int charBegin, int charEnd) {
+ m_index = index;
+ m_text = text;
+ m_line = line;
+ m_charBegin = charBegin;
+ m_charEnd = charEnd;
+ }
+
+ public String toString() {
+ return "Text : "+m_text+
+ "\nindex : "+m_index+
+ "\nline : "+m_line+
+ "\ncBeg. : "+m_charBegin+
+ "\ncEnd. : "+m_charEnd;
+ }
+}
+
Modified: trunk/freenet/src/freenet/node/TextModeClientInterface.java
===================================================================
--- trunk/freenet/src/freenet/node/TextModeClientInterface.java 2006-03-18
14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/node/TextModeClientInterface.java 2006-03-18
15:18:54 UTC (rev 8275)
@@ -429,11 +429,11 @@
// FIXME depends on CHK's still being renamable
//uri = uri.setDocName(f.getName());
- outsb.append("URI: "+uri);
+ outsb.append("URI: "+uri+"\n");
long endTime = System.currentTimeMillis();
long sz = f.length();
double rate = 1000.0 * sz / (endTime-startTime);
- outsb.append("Upload rate: "+rate+" bytes / second");
+ outsb.append("Upload rate: "+rate+" bytes / second\n");
} catch (FileNotFoundException e1) {
outsb.append("File not found");
} catch (InserterException e) {
Modified: trunk/freenet/src/freenet/node/Version.java
===================================================================
--- trunk/freenet/src/freenet/node/Version.java 2006-03-18 14:52:17 UTC (rev
8274)
+++ trunk/freenet/src/freenet/node/Version.java 2006-03-18 15:18:54 UTC (rev
8275)
@@ -20,7 +20,7 @@
public static final String protocolVersion = "1.0";
/** The build number of the current revision */
- private static final int buildNumber = 543;
+ private static final int buildNumber = 544;
/** Oldest build of Fred we will talk to */
private static final int lastGoodBuild = 507;
Added: trunk/freenet/src/freenet/support/io/NullWriter.java
===================================================================
--- trunk/freenet/src/freenet/support/io/NullWriter.java 2006-03-18
14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/support/io/NullWriter.java 2006-03-18
15:18:54 UTC (rev 8275)
@@ -0,0 +1,17 @@
+package freenet.support.io;
+
+import java.io.IOException;
+import java.io.Writer;
+
+public class NullWriter extends Writer {
+
+ public void write(char[] cbuf, int off, int len) throws IOException {
+ }
+
+ public void flush() throws IOException {
+ }
+
+ public void close() throws IOException {
+ }
+
+}