Author: j16sdiz Date: 2008-09-08 11:35:01 +0000 (Mon, 08 Sep 2008) New Revision: 22551
Modified: trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.java trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.jflex Log: fix FIXMEs and regenerate from jflex Modified: trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.java =================================================================== --- trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.java 2008-09-08 11:34:38 UTC (rev 22550) +++ trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.java 2008-09-08 11:35:01 UTC (rev 22551) @@ -1,10 +1,11 @@ -/* The following code was generated by JFlex 1.4.1 on 13-aug-07 8:18:51 */ +/* The following code was generated by JFlex 1.4.1 on 9/8/08 7:31 PM */ /* This code is part of Freenet. It is distributed under the GNU General * Public License, version 2 (or at your option any later version). See * http://www.gnu.org/ for further details of the GPL. */ package freenet.clients.http.filter; import java.io.*; +import java.util.*; import freenet.l10n.L10n; /* This class tokenizes a CSS2 Reader stream, writes it out to the output Writer, and filters any URLs found */ // WARNING: this is not as thorough as the HTML parser - new versions of the standard could lead to anonymity risks. See comments in SaferFilter.java @@ -12,12 +13,13 @@ // just needs somebody to go over the standard carefully and eliminate everything that isn't sufficiently specific (e.g. matching a '-' on its own). // Mostly from http://www.w3.org/TR/REC-CSS2/grammar.html + at SuppressWarnings("fallthrough") /** * This class is a scanner generated by * <a href="http://www.jflex.de/">JFlex</a> 1.4.1 - * on 13-aug-07 8:18:51 from the specification file - * <tt>freenet/clients/http/filter/CSSTokenizerFilter.jflex</tt> + * on 9/8/08 7:31 PM from the specification file + * <tt>src/freenet/clients/http/filter/CSSTokenizerFilter.jflex</tt> */ class CSSTokenizerFilter { @@ -879,6 +881,23 @@ from input */ private int zzEndRead; + /** number of newlines encountered up to the start of the matched text */ + private int yyline; + + /** the number of characters up to the start of the matched text */ + private int yychar; + + /** + * the number of characters from the last newline up to the start of the + * matched text + */ + private int yycolumn; + + /** + * zzAtBOL == true <=> the scanner is currently at the beginning of a line + */ + private boolean zzAtBOL = true; + /** zzAtEOF == true <=> the scanner is at the EOF */ private boolean zzAtEOF; @@ -993,35 +1012,13 @@ // Ignore one whitespace char after an escape int d = Integer.parseInt(hexEscape.toString(), 16); - // FIXME once we can use 1.5, use Characters.toChars(int). - if(d > 0xFFFF) { - String error = - l10n("supplementalCharsNotSupported"); - logError(error); - try { - w.write("/* "+error+"*/"); - } catch (IOException e) {}; - } else { - c = (char)d; - buffer.append(c); - } + buffer.append(new String(Character.toChars(d))); stillEscaping = false; hexEscape = new StringBuilder(); } else { int d = Integer.parseInt(hexEscape.toString(), 16); - // FIXME once we can use 1.5, use Characters.toChars(int). - if(d > 0xFFFF) { - String error = - l10n("supplementalCharsNotSupported"); - logError(error); - try { - w.write("/* "+error+"*/"); - } catch (IOException e) {}; - } else { - char o = (char)d; - buffer.append(o); - } + buffer.append(new String(Character.toChars(d))); buffer.append(c); stillEscaping = false; hexEscape = new StringBuilder(); @@ -1193,9 +1190,11 @@ */ public final void yyreset(java.io.Reader reader) { zzReader = reader; + zzAtBOL = true; zzAtEOF = false; zzEndRead = zzStartRead = 0; zzCurrentPos = zzMarkedPos = zzPushbackPos = 0; + yyline = yychar = yycolumn = 0; zzLexicalState = YYINITIAL; } @@ -1370,9 +1369,37 @@ zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { - case 14: - { if(debug) log("Deleted unofficial ident: "+yytext()); - w.write("/* " + l10n("deletedUnofficialIdent") + " */"); + case 24: + { // This is horrible. However it seems that there is no other way to do it with either jflex or CUP, as {URL} cannot be an unambiguous token :( + String s = yytext(); + if(debug) log("Recognized URL: "+s); + + DecodedStringThingy dst = new DecodedStringThingy(s); + + if(!dst.url) { + throw new IllegalStateException("parsing url().. isn't a url()"); + } + if(dst.suffix.length() > 0) { + yypushback(dst.suffix.length()); + dst.suffix = ""; + } + + s = dst.data; + if(debug) log("URL now: "+s); + try { + s = processURL(s); + dst.data = s; + if(s == null || s.equals("")) { + if(debug) log("URL invalid"); + w.write("url()"); + } else { + s = dst.toString(); + if(debug) log("Writing: "+s); + w.write(s); + } + } catch (CommentException e) { + w.write("/* "+commentEncode(e.getMessage())+" */"); + } } case 33: break; case 20: @@ -1381,19 +1408,23 @@ if(debug) log("Matched unicode: "+s); } case 34: break; - case 2: - { String s = yytext(); - w.write(s); - if(debug) log("Matched ident: "+s); + case 28: + { if(debug) log("Deleted unofficial ident with url: "+yytext()); + w.write("/* " + l10n("deletedUnofficialIdentWithURL") + " */"); } case 35: break; - case 27: + case 8: { String s = yytext(); - s = s.substring("@media".length()).trim(); - w.write("@media "+s+" "); - if(debug) log("Matched @media: "+s); + w.write(s); + if(debug) log("Matched close braces: "+s); } case 36: break; + case 23: + { String s = yytext(); + w.write(s); + if(debug) log("Matched HTML comment: "+s); + } + case 37: break; case 12: { String s = yytext(); if(debug) log("Matched string: "+s); @@ -1404,98 +1435,86 @@ w.write(s); } } - case 37: break; - case 21: - { String s = yytext(); - if(debug) log("Got hexcolor: "+s); - w.write(s); - } case 38: break; - case 7: + case 17: { String s = yytext(); - w.write(s); - if(debug) log("Matched open braces: "+s); + w.write(s); + if(debug) log("Matched ~=: "+s); } case 39: break; - case 6: + case 26: { String s = yytext(); w.write(s); - if(debug) log("Matched semicolon: "+s); + if(debug) log("Matched @page: "+s); } case 40: break; - case 13: + case 22: { String s = yytext(); - w.write(s); - if(debug) log("Matched number: "+s); + StringBuilder sb = new StringBuilder(s.length()); + sb.append("/* "); + boolean inPrefix = true; + for(int i=2;i<s.length()-2;i++) { + char c = s.charAt(i); + if(inPrefix && Character.isWhitespace(c)) { + continue; + } + inPrefix = false; + if(Character.isDigit(c) || Character.isWhitespace(c) || + Character.isLetter(c) || c == '.' || c == '_' || c == '-') { + // No @, no !, etc; IE has been known to do things with comments + // in CSS, and other browsers may too + sb.append(c); + } + } + while(Character.isWhitespace(sb.charAt(sb.length()-1))) + sb.deleteCharAt(sb.length()-1); + sb.append(" */"); + w.write(sb.toString()); + if(debug) log("Matched comment: "+s+" -> "+sb.toString()); } case 41: break; - case 26: + case 13: { String s = yytext(); w.write(s); - if(debug) log("Matched @page: "+s); + if(debug) log("Matched number: "+s); } case 42: break; - case 17: + case 6: { String s = yytext(); - w.write(s); - if(debug) log("Matched ~=: "+s); + w.write(s); + if(debug) log("Matched semicolon: "+s); } case 43: break; - case 23: + case 21: { String s = yytext(); + if(debug) log("Got hexcolor: "+s); w.write(s); - if(debug) log("Matched HTML comment: "+s); } case 44: break; - case 28: - { if(debug) log("Deleted unofficial ident with url: "+yytext()); - w.write("/* " + l10n("deletedUnofficialIdentWithURL") + " */"); + case 7: + { String s = yytext(); + w.write(s); + if(debug) log("Matched open braces: "+s); } case 45: break; - case 8: + case 27: { String s = yytext(); - w.write(s); - if(debug) log("Matched close braces: "+s); + s = s.substring("@media".length()).trim(); + w.write("@media "+s+" "); + if(debug) log("Matched @media: "+s); } case 46: break; + case 2: + { String s = yytext(); + w.write(s); + if(debug) log("Matched ident: "+s); + } + case 47: break; case 25: { String s = yytext(); w.write(s); if(debug) log("Matched unicode range: "+s); } - case 47: break; - case 24: - { // This is horrible. However it seems that there is no other way to do it with either jflex or CUP, as {URL} cannot be an unambiguous token :( - String s = yytext(); - if(debug) log("Recognized URL: "+s); - - DecodedStringThingy dst = new DecodedStringThingy(s); - - if(!dst.url) { - throw new IllegalStateException("parsing url().. isn't a url()"); - } - if(dst.suffix.length() > 0) { - yypushback(dst.suffix.length()); - dst.suffix = ""; - } - - s = dst.data; - if(debug) log("URL now: "+s); - try { - s = processURL(s); - dst.data = s; - if(s == null || s.equals("")) { - if(debug) log("URL invalid"); - w.write("url()"); - } else { - s = dst.toString(); - if(debug) log("Writing: "+s); - w.write(s); - } - } catch (CommentException e) { - w.write("/* "+commentEncode(e.getMessage())+" */"); - } - } case 48: break; case 30: { String s = yytext(); @@ -1503,12 +1522,17 @@ if(debug) log("Matched @font-face: "+s); } case 49: break; - case 5: - { String s = yytext(); - w.write(s); - if(debug) log("Matched function end: "+s); + case 14: + { if(debug) log("Deleted unofficial ident: "+yytext()); + w.write("/* " + l10n("deletedUnofficialIdent") + " */"); } case 50: break; + case 19: + { String s = yytext(); + w.write(s); + if(debug) log("Matched HTML comment: "+s); + } + case 51: break; case 29: { String s = yytext(); if(debug) log("Found @import: "+s); @@ -1533,63 +1557,26 @@ w.write("/* " + commentEncode(e.getMessage()) + " */"); } } - case 51: break; - case 4: - { String s = yytext(); - w.write(s); - if(debug) log("Matched single char: "+s); - } case 52: break; - case 15: + case 3: { String s = yytext(); w.write(s); - if(debug) log("Matched #name: "+s); + if(debug) log("Matched whitespace: "+s); } case 53: break; - case 10: - { if(postBadImportFlag) { - // Ignore - postBadImportFlag = false; - if(debug) log("Ignoring mediums list because after bad import: "+ - yytext()); - } else { - String s = yytext(); - w.write(s); - if(debug) log("Matched and passing on mediums list: "+s); - } + case 1: + { String s = yytext(); + char c = s.charAt(0); + log("Matched anything: "+yytext()+" - ignoring"); + w.write("/* "+l10n("deletedUnmatchedChar")+" "+c+" */"); // single char cannot break out of comment } case 54: break; - case 18: + case 31: { String s = yytext(); w.write(s); - if(debug) log("Matched |=: "+s); + if(debug) log("Matched important: "+s); } case 55: break; - case 22: - { String s = yytext(); - StringBuilder sb = new StringBuilder(s.length()); - sb.append("/* "); - boolean inPrefix = true; - for(int i=2;i<s.length()-2;i++) { - char c = s.charAt(i); - if(inPrefix && Character.isWhitespace(c)) { - continue; - } - inPrefix = false; - if(Character.isDigit(c) || Character.isWhitespace(c) || - Character.isLetter(c) || c == '.' || c == '_' || c == '-') { - // No @, no !, etc; IE has been known to do things with comments - // in CSS, and other browsers may too - sb.append(c); - } - } - while(Character.isWhitespace(sb.charAt(sb.length()-1))) - sb.deleteCharAt(sb.length()-1); - sb.append(" */"); - w.write(sb.toString()); - if(debug) log("Matched comment: "+s+" -> "+sb.toString()); - } - case 56: break; case 16: { if(!deleteErrors) { throwError(l10n("unknownAtIdentifierLabel")+" "+yytext()); @@ -1599,25 +1586,38 @@ // Ignore } } - case 57: break; - case 31: + case 56: break; + case 11: { String s = yytext(); w.write(s); - if(debug) log("Matched important: "+s); + if(debug) log("Matched measurement: "+s); } - case 58: break; - case 11: + case 57: break; + case 18: { String s = yytext(); w.write(s); - if(debug) log("Matched measurement: "+s); + if(debug) log("Matched |=: "+s); } - case 59: break; + case 58: break; case 32: { String s = yytext(); detectedCharset = s; if(debug) log("Matched and ignoring charset: "+s); // Ignore } + case 59: break; + case 10: + { if(postBadImportFlag) { + // Ignore + postBadImportFlag = false; + if(debug) log("Ignoring mediums list because after bad import: "+ + yytext()); + } else { + String s = yytext(); + w.write(s); + if(debug) log("Matched and passing on mediums list: "+s); + } + } case 60: break; case 9: { String s = yytext(); @@ -1626,23 +1626,22 @@ if(debug) log("Matched function start: "+s); } case 61: break; - case 1: + case 4: { String s = yytext(); - char c = s.charAt(0); - log("Matched anything: "+yytext()+" - ignoring"); - w.write("/* "+l10n("deletedUnmatchedChar")+" "+c+" */"); // single char cannot break out of comment + w.write(s); + if(debug) log("Matched single char: "+s); } case 62: break; - case 3: + case 15: { String s = yytext(); w.write(s); - if(debug) log("Matched whitespace: "+s); + if(debug) log("Matched #name: "+s); } case 63: break; - case 19: + case 5: { String s = yytext(); - w.write(s); - if(debug) log("Matched HTML comment: "+s); + w.write(s); + if(debug) log("Matched function end: "+s); } case 64: break; default: Modified: trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.jflex =================================================================== --- trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.jflex 2008-09-08 11:34:38 UTC (rev 22550) +++ trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.jflex 2008-09-08 11:35:01 UTC (rev 22551) @@ -11,6 +11,7 @@ // just needs somebody to go over the standard carefully and eliminate everything that isn't sufficiently specific (e.g. matching a '-' on its own). // Mostly from http://www.w3.org/TR/REC-CSS2/grammar.html + at SuppressWarnings("fallthrough") %% %{ @@ -96,11 +97,11 @@ quote = q; s = s.substring(1); } else quote = ' '; - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); int x = 0; boolean justEscaping = false; boolean stillEscaping = false; - StringBuffer hexEscape = new StringBuffer(); + StringBuilder hexEscape = new StringBuilder(); while(x < s.length()) { char c = s.charAt(x); x++; @@ -124,38 +125,16 @@ // Ignore one whitespace char after an escape int d = Integer.parseInt(hexEscape.toString(), 16); - // FIXME once we can use 1.5, use Characters.toChars(int). - if(d > 0xFFFF) { - String error = - l10n("supplementalCharsNotSupported"); - logError(error); - try { - w.write("/* "+error+"*/"); - } catch (IOException e) {}; - } else { - c = (char)d; - buffer.append(c); - } + buffer.append(new String(Character.toChars(d))); stillEscaping = false; - hexEscape = new StringBuffer(); + hexEscape = new StringBuilder(); } else { int d = Integer.parseInt(hexEscape.toString(), 16); - // FIXME once we can use 1.5, use Characters.toChars(int). - if(d > 0xFFFF) { - String error = - l10n("supplementalCharsNotSupported"); - logError(error); - try { - w.write("/* "+error+"*/"); - } catch (IOException e) {}; - } else { - char o = (char)d; - buffer.append(o); - } + buffer.append(new String(Character.toChars(d))); buffer.append(c); stillEscaping = false; - hexEscape = new StringBuffer(); + hexEscape = new StringBuilder(); } } else { if(quote != ' ' && c == quote) { @@ -178,8 +157,9 @@ suffix = ""; } + @Override public String toString() { - StringBuffer out = new StringBuffer(); + StringBuilder out = new StringBuilder(); if(url) out.append("url("); if(quote != ' ') out.append(quote); out.append(unescapeData()); @@ -190,7 +170,7 @@ } public String unescapeData() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); for(int i=0;i<data.length();i++) { char c = data.charAt(i); if(c == quote || c == '\n') { @@ -203,7 +183,7 @@ } String commentEncode(String s) { - StringBuffer sb = new StringBuffer(s.length()); + StringBuilder sb = new StringBuilder(s.length()); for(int i=0;i<s.length();i++) { char c = s.charAt(i); if(c == '/') @@ -344,7 +324,7 @@ //"/*"([^*]|[\r\n]|("*"+([^*/]|[\r\n])))*"*"*"/" { "/*" ~"*/" { String s = yytext(); - StringBuffer sb = new StringBuffer(s.length()); + StringBuilder sb = new StringBuilder(s.length()); sb.append("/* "); boolean inPrefix = true; for(int i=2;i<s.length()-2;i++) {
