Here comes a rudimentary parser. This is capable for parsing the default.css from Swing, so it should be enough for a start.

Next thing will be to wire this into javax.swing.text.html in order to enable stylesheet support in Swing HTML.

2006-08-21  Roman Kennke  <[EMAIL PROTECTED]>

        * gnu/javax/swing/text/html/css/CSSParser.java:
        New class.
        * gnu/javax/swing/text/html/css/CSSParserCallback.java:
        New interface.
        * gnu/javax/swing/text/html/css/CSSParserException.java:
        New exception.
        * gnu/javax/swing/text/html/css/CSSScanner.java:
        Adjusted API comments. Made all constants package private.
        (EOF): New constant field.
        (parseBuffer): Made package private.
        (tokenEnd): Made package private.
        (CSSScanner): Initialize lookahead buffer with -1.
        (main): Print out to System.out rather then System.err.
        (nextToken): Push back character after IDENT.

/Roman
Index: gnu/javax/swing/text/html/css/CSSParser.java
===================================================================
RCS file: gnu/javax/swing/text/html/css/CSSParser.java
diff -N gnu/javax/swing/text/html/css/CSSParser.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gnu/javax/swing/text/html/css/CSSParser.java	21 Aug 2006 20:44:14 -0000
@@ -0,0 +1,470 @@
+/* CSSParser.java -- A parser for CSS stylesheets
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.css;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+
+/**
+ * A parser for CSS stylesheets.
+ *
+ * This parser is based on the simple CSS grammar describe in
+ *
+ * http://www.w3.org/TR/CSS21/syndata.html .
+ *
+ * @author Roman Kennke ([EMAIL PROTECTED])
+ */
+// TODO: Maybe use more restrictive grammar:
+// http://www.w3.org/TR/CSS21/grammar.html#q1
+public class CSSParser
+{
+
+  /**
+   * The scanner used to read the input streams into more usable tokens.
+   */
+  private CSSScanner scanner;
+
+  /**
+   * The parser callback.
+   */
+  private CSSParserCallback callback;
+
+  /**
+   * One lookahead token.
+   */
+  private int lookahead;
+
+  /**
+   * The parse error.
+   */
+  private String error;
+
+  /**
+   * Creates a new CSSParser that parses the specified input.
+   *
+   * @param in the source to parse
+   */
+  public CSSParser(Reader in, CSSParserCallback cb)
+  {
+    scanner = new CSSScanner(in);
+    callback = cb;
+    lookahead = -1;
+  }
+
+  /**
+   * Parses the input source specified in the constructor.
+   *
+   * @throws IOException if an IO or parse error occurs
+   */
+  public void parse()
+    throws IOException
+  {
+    boolean success = parseStylesheet();
+    if (! success)
+      {
+        throw new CSSParserException(error);
+      }
+  }
+
+  /**
+   * Parses a stylesheet.
+   *
+   * @return <code>true</code> if the stylesheet could be parsed successfully,
+   *         <code>false</code> otherwise
+   * 
+   * @throws IOException if an IO or parse error occurs
+   */
+  private boolean parseStylesheet()
+    throws IOException
+  {
+    int token = peekToken();
+    while (token != CSSScanner.EOF && (token == CSSScanner.CDC
+           || token == CSSScanner.CDO || token == CSSScanner.S
+           || parseStatement()))
+      {
+        if (token == CSSScanner.CDC || token == CSSScanner.CDO
+            || token == CSSScanner.S)
+          readToken();
+        token = peekToken();
+      }
+    // Last token must be EOF for valid stylesheets, I'd think.
+    return token == CSSScanner.EOF;
+  }
+
+  /**
+   * Parses a CSS statement.
+   * @return <code>true</code> if the stylesheet could be parsed successfully,
+   *         <code>false</code> otherwise
+   * 
+   * @throws IOException if an IO or parse error occurs
+   */
+  private boolean parseStatement()
+    throws IOException
+  {
+    return parseRuleset() || parseAtRule();
+  }
+
+  /**
+   * Parses a CSS rule set.
+   *
+   * @return <code>true</code> if the ruleset could be parsed successfully,
+   *         <code>false</code> otherwise
+   * 
+   * @throws IOException if an IO or parse error occurs
+   */
+  private boolean parseRuleset()
+    throws IOException
+  {
+    StringBuilder selector = new StringBuilder();
+    parseSelector(selector);
+    callback.startStatement(selector.toString());
+    // Read any number of whitespace.
+    int token;
+    do
+      {
+        token = readToken();
+      } while (token == CSSScanner.S);
+    boolean ret = true;
+
+    if (token == CSSScanner.CURLY_LEFT)
+      {
+        // Read any number of whitespace.
+        do
+          {
+            token = readToken();
+          } while (token == CSSScanner.S);
+        lookahead = token;
+
+        // Maybe read declaration.
+        boolean decl = parseDeclaration();
+        token = peekToken();
+        while (token == CSSScanner.SEMICOLON)
+          {
+            readToken(); // Read the semicolon.
+            // Read any number of whitespace.
+            do
+              {
+                token = readToken();
+              } while (token == CSSScanner.S);
+            lookahead = token;
+
+            // Maybe read declaration.
+            parseDeclaration();
+            token = peekToken();
+          }
+        if (token != CSSScanner.CURLY_RIGHT)
+          {
+            error = "Expected right curly brace";
+            ret = false;
+          }
+        else
+          {
+            readToken();
+            // Read any number of whitespace.
+            do
+              {
+                token = readToken();
+              } while (token == CSSScanner.S);
+            lookahead = token;
+            callback.endStatement();
+          }
+      }
+    else
+      {
+        ret = false;
+        error = "Expected left curly brace";
+      }
+    return ret;
+  }
+
+  /**
+   * Parses a CSS declaration.
+   *
+   * @return <code>true</code> if the ruleset could be parsed successfully,
+   *         <code>false</code> otherwise
+   * 
+   * @throws IOException if an IO or parse error occurs
+   */
+  private boolean parseDeclaration()
+   throws IOException
+  {
+    // Maybe fetch one DELIM.
+    int token = readToken();
+    if (token == CSSScanner.DELIM)
+      token = readToken();
+
+    boolean ret = true;
+
+    // Parse property
+    String property = null;
+    if (token == CSSScanner.IDENT)
+      {
+        property = new String(scanner.parseBuffer, 0, scanner.tokenEnd);
+        // Read any number of whitespace.
+        do
+          {
+            token = readToken();
+          } while (token == CSSScanner.S);
+
+        // Read ':'.
+        if (token == CSSScanner.DELIM && scanner.parseBuffer[0] == ':')
+          {
+            // Read any number of whitespace.
+            do
+              {
+                token = readToken();
+              } while (token == CSSScanner.S);
+            lookahead = token;
+
+            StringBuilder value = new StringBuilder();
+            if (parseValue(value))
+              {
+                callback.declaration(property, value.toString());
+              }
+            else
+              {
+                ret = false;
+                error = "Error while reading the property value";
+              }
+          }
+        else
+          {
+            ret = false;
+            error = "Expected colon to separate property and value";
+          }
+          
+      }
+    else
+      {
+        lookahead = token;
+        ret = false;
+        error = "Expected IDENT token for property";
+      }
+    return ret;
+  }
+
+  /**
+   * Parses a property value.
+   *
+   * @param s the string builder to read the value into
+   *
+   * @return <code>true</code> if the ruleset could be parsed successfully,
+   *         <code>false</code> otherwise
+   * 
+   * @throws IOException if an IO or parse error occurs
+   */
+  private boolean parseValue(StringBuilder s)
+    throws IOException
+  {
+    // FIXME: Handle block and ATKEYWORD.
+    return parseAny(s);
+  }
+
+  /**
+   * Parses a selector.
+   *
+   * @param sel the string buffer to put the selector into
+   *
+   * @return <code>true</code> if the ruleset could be parsed successfully,
+   *         <code>false</code> otherwise
+   * 
+   * @throws IOException if an IO or parse error occurs
+   */
+  private boolean parseSelector(StringBuilder sel)
+    throws IOException
+  {
+    // At least one any needs to be parsed.
+    boolean ret = parseAny(sel);
+    if (ret)
+      {
+        while (parseAny(sel));
+      }
+    return ret;
+  }
+
+  /**
+   * Parses the any rule. If s is not null, then the contents of the
+   * tokens is appended verbatim.
+   *
+   * @param s the string builder to append to
+   *
+   * @return <code>true</code> if the ruleset could be parsed successfully,
+   *         <code>false</code> otherwise
+   * 
+   * @throws IOException if an IO or parse error occurs
+   */
+  private boolean parseAny(StringBuilder s)
+    throws IOException
+  {
+    int token = peekToken();
+    boolean ret = false;
+    if (token == CSSScanner.IDENT || token == CSSScanner.NUMBER
+        || token == CSSScanner.PERCENTAGE || token == CSSScanner.DIMENSION
+        || token == CSSScanner.STRING || token == CSSScanner.DELIM
+        || token == CSSScanner.URI || token == CSSScanner.HASH
+        || token == CSSScanner.UNICODE_RANGE || token == CSSScanner.INCLUDES
+        || token == CSSScanner.DASHMATCH)
+      {
+        if (s != null)
+          s.append(scanner.parseBuffer, 0, scanner.tokenEnd);
+        readToken();
+        ret = true;
+      }
+    else if (token == CSSScanner.FUNCTION)
+      System.err.println("Implement parseAny for FUNCTION");
+    else if (token == CSSScanner.PAREN_LEFT)
+      System.err.println("Implement parseAny for (");
+    else if (token == CSSScanner.BRACE_LEFT)
+      System.err.println("Implement parseAny for [");
+
+    // Parse any following whitespace too.
+    token = peekToken();
+    while (token == CSSScanner.S)
+      {
+        if (s != null)
+          s.append(scanner.parseBuffer, 0, scanner.tokenEnd);
+        readToken();
+        token = peekToken();
+      }
+    return ret;
+  }
+
+  /**
+   * Parses a CSS at-rule.
+   *
+   * @return <code>true</code> if the at-rule could be parsed successfully,
+   *         <code>false</code> otherwise
+   * 
+   * @throws IOException if an IO or parse error occurs
+   */
+  private boolean parseAtRule()
+    throws IOException
+  {
+    // FIXME: Implement.
+    return false;
+  }
+
+  /**
+   * Reads the next token, and skips the comments.
+   *
+   * @return the next non-comment token
+   */
+  private int readToken()
+    throws IOException
+  {
+    int token;
+    if (lookahead == -1)
+      {
+        do
+          {
+            token = scanner.nextToken();
+          } while (token == CSSScanner.COMMENT);
+      }
+    else
+      {
+        token = lookahead;
+        lookahead = -1;
+      }
+    return token;
+  }
+
+  /**
+   * Returns the next token to be read, without really reading it. The next
+   * call to readToken() will return the same token again.
+   *
+   * @return the next token to be read, without really reading it
+   */
+  private int peekToken()
+    throws IOException
+  {
+    int token;
+    if (lookahead == -1)
+      {
+        do
+          {
+            token = scanner.nextToken();
+          } while (token == CSSScanner.COMMENT);
+        lookahead = token;
+      }
+    else
+      token = lookahead;
+    return token;
+  }
+
+  /**
+   * For testing, we read in the default.css in javax/swing/text/html
+   *
+   * @param args
+   */
+  public static void main(String[] args)
+  {
+    try
+      {
+        String name = "/javax/swing/text/html/default.css";
+        InputStream in = CSSScanner.class.getResourceAsStream(name);
+        BufferedInputStream bin = new BufferedInputStream(in);
+        InputStreamReader r = new InputStreamReader(bin);
+        CSSParserCallback cb = new CSSParserCallback()
+        {
+          public void startStatement(String selector)
+          {
+            System.out.println("startStatement: " + selector);
+          }
+          public void endStatement()
+          {
+            System.out.println("endStatement");
+          }
+          public void declaration(String property, String value)
+          {
+            System.out.println("declaration: " + property + ", " + value);
+          }
+        };
+        CSSParser p = new CSSParser(r, cb);
+        p.parse();
+      }
+    catch (IOException ex)
+      {
+        ex.printStackTrace();
+      }
+  }
+
+}
Index: gnu/javax/swing/text/html/css/CSSParserCallback.java
===================================================================
RCS file: gnu/javax/swing/text/html/css/CSSParserCallback.java
diff -N gnu/javax/swing/text/html/css/CSSParserCallback.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gnu/javax/swing/text/html/css/CSSParserCallback.java	21 Aug 2006 20:44:14 -0000
@@ -0,0 +1,81 @@
+/* CSSParserCallback.java -- Callback for parsing CSS
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.css;
+
+/**
+ * Defines the callback that is used by the CSSParser to notify the
+ * backend of the parsing process.
+ *
+ * @author Roman Kennke ([EMAIL PROTECTED])
+ */
+public interface CSSParserCallback
+{
+
+  /**
+   * Signals the beginning of a statement.
+   *
+   * A CSS statement is build up like follows:
+   * <pre>
+   * <selector> {
+   *   ... declarations...
+   * }
+   * </pre>
+   *
+   * After startStatement(), the callback will receive zero to n callbacks
+   * to declaration, followed by an endStatement() call.
+   *
+   * @param selector the selector of the statement.
+   */
+  void startStatement(String selector);
+
+  /**
+   * Signals the end of a statement.
+   */
+  void endStatement();
+
+  /**
+   * Signals the parsing of one declaration, which defines a mapping
+   * from a property to a value.
+   *
+   * @param property the property
+   * @param value the value
+   */
+  void declaration(String property, String value);
+
+}
Index: gnu/javax/swing/text/html/css/CSSParserException.java
===================================================================
RCS file: gnu/javax/swing/text/html/css/CSSParserException.java
diff -N gnu/javax/swing/text/html/css/CSSParserException.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gnu/javax/swing/text/html/css/CSSParserException.java	21 Aug 2006 20:44:15 -0000
@@ -0,0 +1,62 @@
+/* CSSParserException.java -- The CSS parser exception
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.javax.swing.text.html.css;
+
+import java.io.IOException;
+
+/**
+ * This exception is raised when the CSS parser hits a syntax error.
+ *
+ * @author Roman Kennke ([EMAIL PROTECTED])
+ */
+public class CSSParserException
+  extends IOException
+{
+
+  /**
+   * Creates a new CSSParserException.
+   *
+   * @param message the exception message
+   */
+  public CSSParserException(String message)
+  {
+    super(message);
+  }
+
+}
Index: gnu/javax/swing/text/html/css/CSSScanner.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/javax/swing/text/html/css/CSSScanner.java,v
retrieving revision 1.3
diff -u -1 -2 -r1.3 CSSScanner.java
--- gnu/javax/swing/text/html/css/CSSScanner.java	21 Aug 2006 10:27:44 -0000	1.3
+++ gnu/javax/swing/text/html/css/CSSScanner.java	21 Aug 2006 20:44:15 -0000
@@ -36,84 +36,92 @@
 exception statement from your version. */
 
 
 package gnu.javax.swing.text.html.css;
 
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 
 /**
- * A parser for CSS stylesheets. This is based on the grammar from:
+ * A tokenizer for CSS stylesheets. This is based on the scanner definition
+ * from:
  *
- * http://www.w3.org/TR/CSS21/syndata.html
+ * http://www.w3.org/TR/CSS21/syndata.html#tokenization
  *
  * @author Roman Kennke ([EMAIL PROTECTED])
  */
+// TODO: Maybe implement more restrictive scanner:
+// http://www.w3.org/TR/CSS21/grammar.html#q2
 class CSSScanner
 {
 
   // The tokens. This list is taken from:
   // http://www.w3.org/TR/CSS21/syndata.html#tokenization
-  private static final int IDENT = 1;
-  private static final int ATKEYWORD = 2;
-  private static final int STRING = 3;
-  private static final int INVALID = 4;
-  private static final int HASH = 5;
-  private static final int NUMBER = 6;
-  private static final int PERCENTAGE = 7;
-  private static final int DIMENSION = 8;
-  private static final int URI = 9;
-  private static final int UNICODE_RANGE = 10;
-  private static final int CDO = 11;
-  private static final int CDC = 12;
-  private static final int SEMICOLON = 13;
-  private static final int CURLY_LEFT = 14;
-  private static final int CURLY_RIGHT = 15;
-  private static final int PAREN_LEFT = 16;
-  private static final int PAREN_RIGHT = 17;
-  private static final int BRACE_LEFT = 16;
-  private static final int BRACE_RIGHT = 17;
-  private static final int S = 18;
-  private static final int COMMENT = 19;
-  private static final int FUNCTION = 20;
-  private static final int INCLUDES = 21;
-  private static final int DASHMATCH = 22;
-  private static final int DELIM = 23;
+  static final int IDENT = 1;
+  static final int ATKEYWORD = 2;
+  static final int STRING = 3;
+  static final int INVALID = 4;
+  static final int HASH = 5;
+  static final int NUMBER = 6;
+  static final int PERCENTAGE = 7;
+  static final int DIMENSION = 8;
+  static final int URI = 9;
+  static final int UNICODE_RANGE = 10;
+  static final int CDO = 11;
+  static final int CDC = 12;
+  static final int SEMICOLON = 13;
+  static final int CURLY_LEFT = 14;
+  static final int CURLY_RIGHT = 15;
+  static final int PAREN_LEFT = 16;
+  static final int PAREN_RIGHT = 17;
+  static final int BRACE_LEFT = 16;
+  static final int BRACE_RIGHT = 17;
+  static final int S = 18;
+  static final int COMMENT = 19;
+  static final int FUNCTION = 20;
+  static final int INCLUDES = 21;
+  static final int DASHMATCH = 22;
+  static final int DELIM = 23;
+
+  // Additional tokens defined for convenience.
+  static final int EOF = -1;
 
   /**
    * The input source.
    */
   private Reader in;
 
   /**
    * The parse buffer.
    */
-  private char[] parseBuffer;
+  char[] parseBuffer;
 
   /**
    * The end index in the parseBuffer of the current token.
    */
-  private int tokenEnd;
+  int tokenEnd;
 
   /**
    * The lookahead 'buffer'.
    */
   private int[] lookahead;
 
   CSSScanner(Reader r)
   {
     lookahead = new int[2];
+    lookahead[0] = -1;
+    lookahead[1] = -1;
     parseBuffer = new char[2048];
     in = r;
   }
 
   /**
    * Fetches the next token. The actual character data is in the parseBuffer
    * afterwards with the tokenStart at index 0 and the tokenEnd field
    * pointing to the end of the token.
    *
    * @return the next token
    */
   int nextToken()
@@ -299,24 +307,25 @@
               }
             break;
           default:
             // Handle IDENT that don't begin with '-'.
             if (next == '_' || (next >= 'a' && next <= 'z')
                 || (next >= 'A' && next <= 'Z') || next == '\\' || next > 177)
               {
                 lookahead[0] = next;
                 readIdent();
                 int ch4 = read();
                 if (ch4 == -1 || ch4 != '(')
                   {
+                    lookahead[0] = ch4;
                     token = IDENT;
                   }
                 else
                   {
                     parseBuffer[tokenEnd] = (char) ch4;
                     tokenEnd++;
                     token = FUNCTION;
                   }
               }
             else
               {
                 parseBuffer[0] = (char) next;
@@ -687,22 +696,22 @@
   {
     try
       {
         String name = "/javax/swing/text/html/default.css";
         InputStream in = CSSScanner.class.getResourceAsStream(name);
         BufferedInputStream bin = new BufferedInputStream(in);
         InputStreamReader r = new InputStreamReader(bin);
         CSSScanner s = new CSSScanner(r);
         int token;
         do
           {
             token = s.nextToken();
-            System.err.println("token: " + token + ": "
+            System.out.println("token: " + token + ": "
                                + s.currentTokenString());
           } while (token != -1);
       }
     catch (IOException ex)
       {
         ex.printStackTrace();
       }
   }
 }

Reply via email to