Author: max
Date: 2007-08-27 11:03:32 -0700 (Mon, 27 Aug 2007)
New Revision: 6250

Modified:
   
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/cache/Cache.java
   
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/utils/FileUtils.java
Log:
Change 20070825-maxcarlson-z by [EMAIL PROTECTED] on 2007-08-25 14:45:32 PDT
    in /Users/maxcarlson/openlaszlo/wafflecone
    for http://svn.openlaszlo.org/openlaszlo/branches/wafflecone

Summary: Improve caching and file IO performance of server (thanks to Jason 
Venner!)

New Features:

Bugs Fixed: LPP-4583 - Improve compiler and caching performance

Technical Reviewer: ptw
QA Reviewer: promanik
Doc Reviewer: (pending)

Documentation:

Release Notes:

Details: FileUtils.java - Precompile regexp.  Avoid re-reading the same stream 
twice for makeXMLReaderForFile() calls.

Cache.java - Use a BufferedInputStream to read items into the cache.
    

Tests: Compilation of large applications improved by ~10%!



Modified: 
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/cache/Cache.java
===================================================================
--- 
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/cache/Cache.java
       2007-08-27 14:07:07 UTC (rev 6249)
+++ 
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/cache/Cache.java
       2007-08-27 18:03:32 UTC (rev 6250)
@@ -3,7 +3,7 @@
  * 
****************************************************************************/
 
 /* J_LZ_COPYRIGHT_BEGIN *******************************************************
-* Copyright 2001-2006 Laszlo Systems, Inc.  All Rights Reserved.              *
+* Copyright 2001-2007 Laszlo Systems, Inc.  All Rights Reserved.              *
 * Use is subject to license terms.                                            *
 * J_LZ_COPYRIGHT_END *********************************************************/
 
@@ -997,9 +997,9 @@
         { 
             mLock = new ReentrantLock();
 
-            FileInputStream in = null;
+            InputStream in = null;
             try {
-                in = new FileInputStream(f);
+                in = new BufferedInputStream(new FileInputStream(f));
                 ObjectInputStream istr = new ObjectInputStream(in);
                 mInfo = (CachedInfo)istr.readObject();
         // after reading the object, call our override routine

Modified: 
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/utils/FileUtils.java
===================================================================
--- 
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/utils/FileUtils.java
   2007-08-27 14:07:07 UTC (rev 6249)
+++ 
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/utils/FileUtils.java
   2007-08-27 18:03:32 UTC (rev 6250)
@@ -36,6 +36,8 @@
 
 import org.apache.log4j.*;
 import org.apache.oro.text.regex.*;
+import java.io.InputStreamReader;
+import java.io.BufferedInputStream;
 
 // A dir is absolute if it begins with "" (the empty string to
 // the left of the initial '/'), or a drive letter.
@@ -137,6 +139,20 @@
     }
 
 
+    private static final Pattern pattern;
+    static {
+        Perl5Compiler compiler = new Perl5Compiler();
+        Pattern tmp = null;
+        try {
+            tmp = 
compiler.compile("[^<]*\\s*<[?]xml\\s+[^>]*encoding=[\"'](.*)['\"][^>]*?>", 
Perl5Compiler.READ_ONLY_MASK);
+        } catch( MalformedPatternException failed ) {
+            System.err.println( failed );
+            System.exit( 0 );
+        }
+        pattern = tmp;
+    }
+        
+
     /** Attempt to deduce the encoding of an XML file, by looking for the 
"encoding" attribute in the
      * XML declaration.
      * Default is to return "UTF-8"
@@ -144,24 +160,26 @@
      * @return the encoding name
      * @throws IOException if an error occurs
      */
-    public static String getXMLEncodingFromFile(String pathname, String 
defaultEncoding)
+    public static Reader getXMLEncodingFromFile(InputStream input, String 
defaultEncoding)
       throws IOException {
-        java.io.FileInputStream ifs = new java.io.FileInputStream(pathname);
         ByteArrayOutputStream bout = new ByteArrayOutputStream();
-        send(ifs, bout);
+        send(input, bout);
         Perl5Matcher matcher = new Perl5Matcher();
-        try {
-            Perl5Compiler compiler = new Perl5Compiler();
-            Pattern pattern = 
compiler.compile("[^<]*\\s*<[?]xml\\s+[^>]*encoding=[\"'](.*)['\"][^>]*?>");
-            if (matcher.contains(new String(bout.toByteArray()), pattern)) {
-                MatchResult result = matcher.getMatch();
-                String encoding = result.group(1);
-                return encoding;
-            } else {
-                return "UTF-8";
-            }
-        } catch (MalformedPatternException e) {
-            throw new RuntimeException(e.getMessage());
+        
+        byte [] array = bout.toByteArray();
+        // We will ignore the byte order mark encoding for now,
+        // hopefully no one is going to be using UTF16. I don't want
+        // to deal right now with the case where the XML encoding
+        // directive conflicts with the byte order mark.
+        int skip = stripByteOrderMark( array );
+        ByteArrayInputStream bais = new ByteArrayInputStream( array, skip, 
array.length );
+        
+        if (matcher.contains(new String(array, 0, Math.min( 1024, array.length 
)), pattern)) {
+            MatchResult result = matcher.getMatch();
+            String encoding = result.group(1);
+            return new InputStreamReader( bais, encoding ); 
+        } else {
+            return new InputStreamReader( bais, defaultEncoding ); 
         }
     }
 
@@ -174,18 +192,11 @@
      */
     public static Reader makeXMLReaderForFile (String pathname, String 
defaultEncoding)
       throws IOException {
-        String encoding = getXMLEncodingFromFile(pathname, defaultEncoding);
-        InputStream ifs = new java.io.FileInputStream(pathname);
+        InputStream ifs = new BufferedInputStream( new 
java.io.FileInputStream(pathname) );
         if (pathname.endsWith(".lzo")) {
           ifs = new java.util.zip.GZIPInputStream(ifs);
         }
-        java.io.PushbackInputStream pbis = new 
java.io.PushbackInputStream(ifs, 1024);
-        // We will ignore the byte order mark encoding for now,
-        // hopefully no one is going to be using UTF16. I don't want
-        // to deal right now with the case where the XML encoding
-        // directive conflicts with the byte order mark.
-        FileUtils.stripByteOrderMark(pbis);
-        return new java.io.InputStreamReader(pbis, encoding);
+        return getXMLEncodingFromFile( ifs, defaultEncoding );
     }
 
     /** Read a (pushback-able) byte input stream looking for some form of
@@ -223,6 +234,46 @@
         }
     }
 
+    /** Read a (pushback-able) byte input stream looking for some form of
+        Unicode Byte Order Mark Defaults. If found, strip it out.
+     * @param raw bytes
+     * @return the count of characters to skip
+     */
+    public static int stripByteOrderMark (byte[] raw) {
+        // We need to peek at the stream and if the first three chars
+        // are a UTF-8 or UTF-16 encoded BOM (byte order mark) we will
+        // discard them. 
+        int c1 = ((int) raw[0]) & 0xff;
+        int c2 = ((int) raw[1]) & 0xff;
+        int c3 = ((int) raw[2]) & 0xff;
+        int count = 0;
+        if (c1 == 0xFF & c2 == 0xFE) {
+            // UTF16 Big Endian BOM
+            // discard the first two chars
+//             pbis.unread(c3);
+//             return "UTF-16BE";
+            return 2;
+        } else if (c1 == 0xFE & c2 == 0xFF) {
+            // UTF16 Little Endian BOM
+            // discard the first two chars
+//             pbis.unread(c3);
+//             return "UTF-16LE";
+            return 2;
+        } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
+//             // UTF-8 BOM
+//             // discard all three chars
+//             return "UTF-8";
+            return 3;
+        } else {
+            // Otherwise put back the chars we just read and proceed
+//             pbis.unread(c3);
+//             pbis.unread(c2);
+//             pbis.unread(c1);
+//             return null;
+            return 0;
+        }
+    }
+
     /**
      * @param file file to read
      * @return size of file


_______________________________________________
Laszlo-checkins mailing list
[email protected]
http://www.openlaszlo.org/mailman/listinfo/laszlo-checkins

Reply via email to