Http11Parser.java

costin Wed, 23 Apr 2008 10:34:46 -0700

Author: costin
Date: Wed Apr 23 10:34:22 2008
New Revision: 650949

URL: http://svn.apache.org/viewvc?rev=650949&view=rev
Log:
Extracted from apr and nio connectors, transformed to completely non-blocking, 
independent of the io.



Added:
    
tomcat/sandbox/tomcat-lite/coyote-extensions/org/apache/tomcat/util/http/Http11Parser.java
   (with props)

Added: 
tomcat/sandbox/tomcat-lite/coyote-extensions/org/apache/tomcat/util/http/Http11Parser.java
URL: 
http://svn.apache.org/viewvc/tomcat/sandbox/tomcat-lite/coyote-extensions/org/apache/tomcat/util/http/Http11Parser.java?rev=650949&view=auto
==============================================================================
--- 
tomcat/sandbox/tomcat-lite/coyote-extensions/org/apache/tomcat/util/http/Http11Parser.java
 (added)
+++ 
tomcat/sandbox/tomcat-lite/coyote-extensions/org/apache/tomcat/util/http/Http11Parser.java
 Wed Apr 23 10:34:22 2008
@@ -0,0 +1,761 @@
+/*
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package org.apache.tomcat.util.http;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.tomcat.util.buf.MessageBytes;
+
+/**
+ * Non-blocking parser for request and response line and headers. 
+ * 
+ * This could/should replace the parsing parts of InternalAprInputBuffer, 
+ * InternalNioInputBuffer, but the main goal is to be used in non-blocking
+ * client code.
+ * 
+ * All parse methods will return a negative number if more data is needed 
+ * and leave the buffer position/limit unchanged. After more data is 
+ * available, call again setBuffer() and the same parse method. If enough
+ * data ia available, 'pos' will be moved to the first byte after the 
+ * parsed data.
+ * 
+ * The next get() will be a header (after parseRequestLine, 
+ * parseResponseLine, parseHeader), a LF after the last parseHeader, or
+ * the first byte of the payload for parseRequest()/parseResponse().
+ * 
+ * TODO: use a ByteChunk instead of byte[], enhance methods using ByteBuffer
+ * 
+ * @author <a href="mailto:[EMAIL PROTECTED]">Remy Maucherat</a>
+ * @author Costin Manolache 
+ */
+public class Http11Parser {
+
+    /**
+     * CRLF.
+     */
+    public static final String CRLF = "\r\n";
+    
+    /**
+     * CR.
+     */
+    public static final byte CR = (byte) '\r';
+
+
+    /**
+     * LF.
+     */
+    public static final byte LF = (byte) '\n';
+
+
+    /**
+     * SP.
+     */
+    public static final byte SP = (byte) ' ';
+
+
+    /**
+     * HT.
+     */
+    public static final byte HT = (byte) '\t';
+
+
+    /**
+     * COLON.
+     */
+    public static final byte COLON = (byte) ':';
+    
+    /**
+     * SEMI_COLON.
+     */
+    public static final byte SEMI_COLON = (byte) ';';
+
+    /**
+     * 'A'.
+     */
+    public static final byte A = (byte) 'A';
+
+
+    /**
+     * 'a'.
+     */
+    public static final byte a = (byte) 'a';
+
+
+    /**
+     * 'Z'.
+     */
+    public static final byte Z = (byte) 'Z';
+    
+
+    /**
+     * Lower case offset.
+     */
+    public static final byte LC_OFFSET = A - a;
+
+    /**
+     * '?'.
+     */
+    public static final byte QUESTION = (byte) '?';
+
+    /**
+     * HTTP/1.0.
+     */
+    public static final String HTTP_10 = "HTTP/1.0";
+
+    public static final byte[] _200_BYTES = {'2', '0', '0'};
+    
+    public static final byte[] _400_BYTES = {'4', '0' , '0'};
+
+    public static final byte[] _404_BYTES = { '4', '0', '4' }; 
+
+    /**
+     * HTTP/1.1.
+     */
+    public static final String HTTP_11 = "HTTP/1.1";
+
+    public static final byte[] HTTP_11_BYTES = HTTP_11.getBytes();
+
+    
+    // ============== Buffer ==================== 
+    
+    /** Last valid byte in the buf[]
+     */
+    public int lastValid; // limit - 1
+
+    /** Position in the buffer.
+     */
+    public int pos;
+
+    /**
+     * Pointer to the current read buffer.
+     */
+    public byte[] buf;
+
+    // TODO: same thing with ByteChunk, ByteBuffer
+    // Since ByteChunk can be easily wrapped as ByteBuffer - only second is 
+    // needed. Replace:
+    // buf[pos++] with bb.get();
+    // pos-- with bb.position(bb.position() - 1);
+    //ByteBuffer bb;
+    
+    
+    
+    // =====================================
+
+    // restart from this position
+    int lastParsed = 0;
+
+    // 0: parsing request line
+    // 1: parsing headers
+    // 2: request done
+    public int state = 0;
+    
+    public static int STATE_REQUEST_LINE = 0;
+    public static int STATE_HEADERS = 1;
+    public static int STATE_BODY = 2;
+
+        
+    public Http11Parser() {
+      state = STATE_REQUEST_LINE;
+    }
+
+    /**
+     * Must be called every time new data is read.
+     * 
+     * @param data
+     * @param start
+     * @param end
+     */
+    public void setBuffer(byte[] data, int start, int end) {
+        buf = data;
+        pos = start;
+        lastValid = end;
+    }
+
+    // ---------- Utilities ---------------
+    
+    public final int skipBlank() {
+      // Skipping blank lines
+      byte chr = 0;
+      do {
+        // Read new bytes if needed
+        if (pos >= lastValid) {
+          return -1;
+        }
+        chr = buf[pos++];
+      } while ((chr == CR) || (chr == LF));
+
+      pos--;
+      return pos;
+    }
+    
+    public final int readToDelim(MessageBytes res, byte delim) {
+      boolean space = false;
+      // Mark the current buffer position
+      int start = pos;
+      while (!space) {
+        if (pos >= lastValid) {
+          return -1;
+        }
+        if (buf[pos] == SP) {
+          space = true;
+          res.setBytes(buf, start, pos - start);
+        }
+        pos++;
+      }
+
+      return pos;
+    }
+
+    public final int readToDelimAndLowerCase(byte delim, boolean lower) {
+      boolean space = false;
+      while (!space) {
+        if (pos >= lastValid) {
+          return -1;
+        }
+        byte chr = buf[pos];
+        if (chr == delim || chr == SP) {
+          space = true;
+        }
+        if (lower && (chr >= A) && (chr <= Z)) {
+            buf[pos] = (byte) (chr - LC_OFFSET);
+        }
+        pos++;
+      }
+
+      return pos;
+    }
+
+    public final int readToEnd(MessageBytes res) {
+      int start = pos;
+      int endpos = 0;
+      boolean eol = false;
+      while (!eol) {
+        if (pos >= lastValid) {
+          return -1;
+        }
+        if (buf[pos] == CR) {
+          endpos = pos;
+        } else if (buf[pos] == LF) {
+          if (endpos == 0)
+            endpos = pos;
+          eol = true;
+        }
+        pos++;
+      }
+      if ((endpos - start) > 0) {
+        res.setBytes(buf, start, endpos - start);
+      } else {
+        res.setString("");
+      }
+      return pos;
+    }
+
+    public boolean skipSpace() {
+      boolean space = true;
+      while (space) {
+        if (pos >= lastValid) {
+          return false;
+        }
+        if ((buf[pos] == SP) || (buf[pos] == HT)) {
+          pos++;
+        } else {
+          space = false;
+        }
+      }
+      return true;
+    }
+    
+    
+    // ------------ Same utils, with ByteBuffer param ------------
+    // Currently used to evaluate the overhead.
+    
+    public final int skipBlank(ByteBuffer bb, int start) {
+      // Skipping blank lines
+      byte chr = 0;
+      do {
+        if (!bb.hasRemaining()) {
+          return -1;
+        }
+        chr = bb.get();
+      } while ((chr == CR) || (chr == LF));
+      return bb.position();
+    }
+    
+    public final int readToDelim(ByteBuffer bb,
+                                 MessageBytes res, 
+                                 byte delim) {
+      byte chr = 0;
+      // Mark the current buffer position
+      int start = bb.position();
+      while (true) {
+        if (!bb.hasRemaining()) {
+          return -1;
+        }
+        chr = bb.get();
+        if (chr == delim) {
+          res.setBytes(bb, start, bb.position() - start);
+          break;
+        }
+      }
+
+      return bb.position();
+    }
+
+    public final int readToDelimAndLowerCase(ByteBuffer bb,
+                                             byte delim, 
+                                             boolean lower) {
+      boolean space = false;
+      byte chr = 0;
+      while (!space) {
+        if (!bb.hasRemaining()) {
+          return -1;
+        }
+        chr = bb.get();
+        if (chr == delim) {
+          space = true;
+        }
+        if (lower && (chr >= A) && (chr <= Z)) {
+          bb.put(bb.position() - 1, 
+              (byte) (chr - LC_OFFSET));
+        }
+      }
+      return bb.position();
+    }
+
+    public final int readToEnd(ByteBuffer bb, MessageBytes res) {
+      int start = bb.position();
+      int endpos = 0;
+      boolean eol = false;
+      byte chr = 0;
+      
+      while (!eol) {
+        if (!bb.hasRemaining()) {
+          return -1;
+        }
+        chr = bb.get();
+        if (chr == CR) {
+          endpos = bb.position();
+        } else if (buf[pos] == LF) {
+          if (endpos == 0)
+            endpos = bb.position();
+          eol = true;
+        }
+      }
+      if ((endpos - start) > 0) {
+        res.setBytes(bb, start, endpos - start);
+      } else {
+        res.setString("");
+      }
+      return bb.position();
+    }
+
+    public boolean skipSpace(ByteBuffer bb) {
+      boolean space = true;
+      while (space) {
+        if (!bb.hasRemaining()) {
+          return false;
+        }
+        byte chr = bb.get();
+        if ((chr == SP) || (chr == HT)) {
+          //
+        } else {
+          space = false;
+          bb.position(bb.position() -1); // move back
+        }
+      }
+      return true;
+    }
+    
+    
+    // ---------- Parsing request/response line, headers -------- 
+
+    public boolean parseRequest(MessageBytes methodMB, 
+                                MessageBytes unparsedURIMB,
+                                MessageBytes queryMB,
+                                MessageBytes uriMB,
+                                MessageBytes protoMB,
+                                MimeHeaders headers) throws IOException {
+      if (state == STATE_REQUEST_LINE) {
+        boolean res = 
+          parseRequestLine(methodMB, unparsedURIMB, queryMB, uriMB, protoMB);
+        if (!res) {
+          return false;
+        }
+        state = STATE_HEADERS;
+      }
+      if (state == STATE_HEADERS) {
+        int res = parseHeaders(headers);
+        if (res < 0) {
+          return false;
+        }
+        state = STATE_BODY;
+      }
+      
+      return true;
+    }
+
+    public boolean parseResponse(MessageBytes status,
+                                 MessageBytes msg,
+                                 MessageBytes protoMB,
+                                 MimeHeaders headers) throws IOException {
+      if (state == STATE_REQUEST_LINE) {
+        boolean res = 
+          parseResponseLine(protoMB, status, msg);
+        if (!res) {
+          return false;
+        }
+        state = STATE_HEADERS;
+      }
+      if (state == STATE_HEADERS) {
+        int res = parseHeaders(headers);
+        if (res < 0) {
+          return false;
+        }
+      }
+      
+      return true;
+    }
+    
+    public int parseHeaders(MimeHeaders headers)
+        throws IOException {
+
+      while (true) {
+        int newPos = parseHeader(headers);
+        if (newPos < 0) {
+          return -1; // need more data
+        }
+        pos = newPos;
+        byte chr = buf[pos];
+        if (chr == CR) {
+          pos++;
+          chr = buf[pos];
+        }
+        if (chr == LF) {
+          pos++;
+          chr = buf[pos];
+          //end = pos;
+          state = STATE_BODY;
+          return pos;
+        }
+      }
+    }
+    
+    
+    /**
+     * Parse an HTTP header, non-blocking
+     * 
+     * @param headers a new header will be added if found.
+     * @return -1 if more data is needed, pos remains at the end of the
+     *  previously read header + 1.
+     *         pos - start of the new header, of CR/LF if last header. 
+     */
+    public int parseHeader(MimeHeaders headers)
+          throws IOException {
+
+        // Check for blank line
+        byte chr = 0;
+        while (true) {
+            if (pos >= lastValid) {
+              return -1;
+            }
+            chr = buf[pos];
+            if ((chr == CR) || (chr == LF)) {
+                if (chr == LF) {
+                    pos++;
+                    return pos;
+                }
+            } else {
+                break;
+            }
+            pos++;
+        }
+
+        // Mark the current buffer position
+        int start = pos;
+        
+        int startName = pos;
+        int newPos = readToDelimAndLowerCase(COLON, false);
+        if (newPos < 0) {
+          return -1;
+        }
+        pos = newPos;
+        int endName = pos - 1;
+        
+        // Mark the current buffer position
+        start = pos;
+        int realPos = pos;
+
+        //
+        // Reading the header value (which can be spanned over multiple lines)
+        //
+
+        boolean eol = false;
+        boolean validLine = true;
+
+        while (validLine) {
+          if (!skipSpace()) { 
+            return -1;
+          }
+          int lastSignificantChar = realPos;
+
+            // Reading bytes until the end of the line
+            while (!eol) {
+
+                // Read new bytes if needed
+                if (pos >= lastValid) {
+                  return -1;
+                }
+
+                if (buf[pos] == CR) {
+                } else if (buf[pos] == LF) {
+                    eol = true;
+                } else if (buf[pos] == SP) {
+                    buf[realPos] = buf[pos];
+                    realPos++;
+                } else {
+                    buf[realPos] = buf[pos];
+                    
+                    // TODO: reentrant modification ?
+                    buf[pos] = SP; // so next time we skip it, if we parse 
+                    // again this line                    
+                    realPos++;
+                    lastSignificantChar = realPos;
+                }
+
+                pos++;
+
+            }
+
+            realPos = lastSignificantChar;
+
+            // Checking the first character of the new line. If the character
+            // is a LWS, then it's a multiline header
+
+            // Read new bytes if needed
+            if (pos >= lastValid) {
+              return -1;
+            }
+
+            chr = buf[pos];
+            if ((chr != SP) && (chr != HT)) {
+                validLine = false;
+            } else {
+                eol = false;
+                // Copying one extra space in the buffer (since there must
+                // be at least one space inserted between the lines)
+                buf[realPos] = chr;
+                realPos++;
+            }
+
+        }
+
+        // Set the header value
+        MessageBytes headerValue = headers.addValue(buf, startName, 
+            endName - startName);
+        headerValue.setBytes(buf, start, realPos - start);
+
+        return pos;
+    }
+
+    
+    /**
+     * Read the request line. This function is meant to be used during the 
+     * HTTP request header parsing. Do NOT attempt to read the request body 
+     * using it.
+     *
+     * @throws IOException If an exception occurs during the underlying socket
+     * read operations, or if the given buffer is not big enough to accomodate
+     * the whole line.
+     */
+    public boolean parseRequestLine(MessageBytes methodMB, 
+                                    MessageBytes unparsedURIMB,
+                                    MessageBytes queryMB,
+                                    MessageBytes uriMB,
+                                    MessageBytes protoMB)
+            throws IOException {
+
+        int start = 0;
+        
+        lastParsed = pos;
+        state = STATE_REQUEST_LINE;
+        
+        // Skipping blank lines
+        byte chr = 0;
+        do {
+            // Read new bytes if needed
+            if (pos >= lastValid) {
+                return false;
+            }
+
+            chr = buf[pos++];
+
+        } while ((chr == CR) || (chr == LF));
+
+        pos--;
+
+        // Mark the current buffer position
+        start = pos;
+
+        //
+        // Reading the method name
+        // Method name is always US-ASCII
+        //
+        boolean space = false;
+
+        while (!space) {
+            // Read new bytes if needed
+            if (pos >= lastValid) {
+                return false;
+            }
+
+            if (buf[pos] == SP) {
+                space = true;
+                methodMB.setBytes(buf, start, pos - start);
+            }
+            pos++;
+        }
+
+        // Mark the current buffer position
+        start = pos;
+        int end = 0;
+        int questionPos = -1;
+
+        // Reading the URI
+        space = false;
+        boolean eol = false;
+        while (!space) {
+            // Read new bytes if needed
+            if (pos >= lastValid) {
+                return false;
+            }
+            if (buf[pos] == SP) {
+                space = true;
+                end = pos;
+            } else if ((buf[pos] == CR) 
+                       || (buf[pos] == LF)) {
+                // HTTP/0.9 style request
+                eol = true;
+                space = true;
+                end = pos;
+            } else if ((buf[pos] == QUESTION) 
+                       && (questionPos == -1)) {
+                questionPos = pos;
+            }
+            pos++;
+        }
+
+        unparsedURIMB.setBytes(buf, start, end - start);
+        if (questionPos >= 0) {
+            queryMB.setBytes(buf, questionPos + 1, 
+                                           end - questionPos - 1);
+            uriMB.setBytes(buf, start, questionPos - start);
+        } else {
+            uriMB.setBytes(buf, start, end - start);
+        }
+
+        // Mark the current buffer position
+        start = pos;
+        end = 0;
+
+        // Reading the protocol. Protocol is always US-ASCII
+        while (!eol) {
+            // Read new bytes if needed
+            if (pos >= lastValid) {
+                return false;
+            }
+            if (buf[pos] == CR) {
+                end = pos;
+            } else if (buf[pos] == LF) {
+                if (end == 0)
+                    end = pos;
+                eol = true;
+            }
+            pos++;
+        }
+
+        if ((end - start) > 0) {
+            protoMB.setBytes(buf, start, end - start);
+        } else {
+            protoMB.setString("");
+        }
+        
+        state = STATE_HEADERS;
+        lastParsed = pos;
+        return true;
+    }
+    
+
+    public boolean parseResponseLine(MessageBytes protoMB,
+                                 MessageBytes statusCode,
+                                 MessageBytes status)
+            throws IOException {
+      int res = skipBlank();
+      if (res < 0) {
+        return false;
+      }
+
+      res = readToDelim(protoMB, SP);
+      if (res < 0) {
+        return false;
+      }
+
+      res = readToDelim(statusCode, SP);
+      if (res < 0) {
+        return false;
+      }
+      
+      res = readToEnd(status);
+      if (res < 0) {
+        return false;
+      }
+
+      state = STATE_HEADERS;
+      return true;
+    }
+
+    /**
+     * Recycle the input buffer. This should be called when closing the 
+     * connection.
+     */
+    public void recycle() {
+        lastValid = 0;
+        pos = 0;
+        state = STATE_REQUEST_LINE;
+        buf = null;
+    }
+
+    /**
+     * End processing of current HTTP request.
+     * Note: All bytes of the current request should have been already 
+     * consumed. This method only resets all the pointers so that we are ready
+     * to parse the next HTTP request.
+     */
+    public void nextRequest() {
+        if (pos < lastValid) {
+            System.arraycopy(buf, pos, buf, 0, lastValid - pos);
+            lastValid -= pos;
+        } else {
+            lastValid = 0;
+        }
+        pos = 0;
+        state = STATE_REQUEST_LINE;
+    }
+
+    public String toString() {
+      return state + " " + pos + " " + lastValid + " " + 
+        new String(buf, 0, lastValid);
+    }
+}

Propchange: 
tomcat/sandbox/tomcat-lite/coyote-extensions/org/apache/tomcat/util/http/Http11Parser.java
------------------------------------------------------------------------------
    svn:eol-style = native



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

svn commit: r650949 - /tomcat/sandbox/tomcat-lite/coyote-extensions/org/apache/tomcat/util/http/Http11Parser.java

Reply via email to