Author: max
Date: 2007-08-27 11:03:32 -0700 (Mon, 27 Aug 2007)
New Revision: 6250
Modified:
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/cache/Cache.java
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/utils/FileUtils.java
Log:
Change 20070825-maxcarlson-z by [EMAIL PROTECTED] on 2007-08-25 14:45:32 PDT
in /Users/maxcarlson/openlaszlo/wafflecone
for http://svn.openlaszlo.org/openlaszlo/branches/wafflecone
Summary: Improve caching and file IO performance of server (thanks to Jason
Venner!)
New Features:
Bugs Fixed: LPP-4583 - Improve compiler and caching performance
Technical Reviewer: ptw
QA Reviewer: promanik
Doc Reviewer: (pending)
Documentation:
Release Notes:
Details: FileUtils.java - Precompile regexp. Avoid re-reading the same stream
twice for makeXMLReaderForFile() calls.
Cache.java - Use a BufferedInputStream to read items into the cache.
Tests: Compilation of large applications improved by ~10%!
Modified:
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/cache/Cache.java
===================================================================
---
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/cache/Cache.java
2007-08-27 14:07:07 UTC (rev 6249)
+++
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/cache/Cache.java
2007-08-27 18:03:32 UTC (rev 6250)
@@ -3,7 +3,7 @@
*
****************************************************************************/
/* J_LZ_COPYRIGHT_BEGIN *******************************************************
-* Copyright 2001-2006 Laszlo Systems, Inc. All Rights Reserved. *
+* Copyright 2001-2007 Laszlo Systems, Inc. All Rights Reserved. *
* Use is subject to license terms. *
* J_LZ_COPYRIGHT_END *********************************************************/
@@ -997,9 +997,9 @@
{
mLock = new ReentrantLock();
- FileInputStream in = null;
+ InputStream in = null;
try {
- in = new FileInputStream(f);
+ in = new BufferedInputStream(new FileInputStream(f));
ObjectInputStream istr = new ObjectInputStream(in);
mInfo = (CachedInfo)istr.readObject();
// after reading the object, call our override routine
Modified:
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/utils/FileUtils.java
===================================================================
---
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/utils/FileUtils.java
2007-08-27 14:07:07 UTC (rev 6249)
+++
openlaszlo/branches/wafflecone/WEB-INF/lps/server/src/org/openlaszlo/utils/FileUtils.java
2007-08-27 18:03:32 UTC (rev 6250)
@@ -36,6 +36,8 @@
import org.apache.log4j.*;
import org.apache.oro.text.regex.*;
+import java.io.InputStreamReader;
+import java.io.BufferedInputStream;
// A dir is absolute if it begins with "" (the empty string to
// the left of the initial '/'), or a drive letter.
@@ -137,6 +139,20 @@
}
+ private static final Pattern pattern;
+ static {
+ Perl5Compiler compiler = new Perl5Compiler();
+ Pattern tmp = null;
+ try {
+ tmp =
compiler.compile("[^<]*\\s*<[?]xml\\s+[^>]*encoding=[\"'](.*)['\"][^>]*?>",
Perl5Compiler.READ_ONLY_MASK);
+ } catch( MalformedPatternException failed ) {
+ System.err.println( failed );
+ System.exit( 0 );
+ }
+ pattern = tmp;
+ }
+
+
/** Attempt to deduce the encoding of an XML file, by looking for the
"encoding" attribute in the
* XML declaration.
* Default is to return "UTF-8"
@@ -144,24 +160,26 @@
* @return the encoding name
* @throws IOException if an error occurs
*/
- public static String getXMLEncodingFromFile(String pathname, String
defaultEncoding)
+ public static Reader getXMLEncodingFromFile(InputStream input, String
defaultEncoding)
throws IOException {
- java.io.FileInputStream ifs = new java.io.FileInputStream(pathname);
ByteArrayOutputStream bout = new ByteArrayOutputStream();
- send(ifs, bout);
+ send(input, bout);
Perl5Matcher matcher = new Perl5Matcher();
- try {
- Perl5Compiler compiler = new Perl5Compiler();
- Pattern pattern =
compiler.compile("[^<]*\\s*<[?]xml\\s+[^>]*encoding=[\"'](.*)['\"][^>]*?>");
- if (matcher.contains(new String(bout.toByteArray()), pattern)) {
- MatchResult result = matcher.getMatch();
- String encoding = result.group(1);
- return encoding;
- } else {
- return "UTF-8";
- }
- } catch (MalformedPatternException e) {
- throw new RuntimeException(e.getMessage());
+
+ byte [] array = bout.toByteArray();
+ // We will ignore the byte order mark encoding for now,
+ // hopefully no one is going to be using UTF16. I don't want
+ // to deal right now with the case where the XML encoding
+ // directive conflicts with the byte order mark.
+ int skip = stripByteOrderMark( array );
+ ByteArrayInputStream bais = new ByteArrayInputStream( array, skip,
array.length );
+
+ if (matcher.contains(new String(array, 0, Math.min( 1024, array.length
)), pattern)) {
+ MatchResult result = matcher.getMatch();
+ String encoding = result.group(1);
+ return new InputStreamReader( bais, encoding );
+ } else {
+ return new InputStreamReader( bais, defaultEncoding );
}
}
@@ -174,18 +192,11 @@
*/
public static Reader makeXMLReaderForFile (String pathname, String
defaultEncoding)
throws IOException {
- String encoding = getXMLEncodingFromFile(pathname, defaultEncoding);
- InputStream ifs = new java.io.FileInputStream(pathname);
+ InputStream ifs = new BufferedInputStream( new
java.io.FileInputStream(pathname) );
if (pathname.endsWith(".lzo")) {
ifs = new java.util.zip.GZIPInputStream(ifs);
}
- java.io.PushbackInputStream pbis = new
java.io.PushbackInputStream(ifs, 1024);
- // We will ignore the byte order mark encoding for now,
- // hopefully no one is going to be using UTF16. I don't want
- // to deal right now with the case where the XML encoding
- // directive conflicts with the byte order mark.
- FileUtils.stripByteOrderMark(pbis);
- return new java.io.InputStreamReader(pbis, encoding);
+ return getXMLEncodingFromFile( ifs, defaultEncoding );
}
/** Read a (pushback-able) byte input stream looking for some form of
@@ -223,6 +234,46 @@
}
}
+ /** Read a (pushback-able) byte input stream looking for some form of
+ Unicode Byte Order Mark Defaults. If found, strip it out.
+ * @param raw bytes
+ * @return the count of characters to skip
+ */
+ public static int stripByteOrderMark (byte[] raw) {
+ // We need to peek at the stream and if the first three chars
+ // are a UTF-8 or UTF-16 encoded BOM (byte order mark) we will
+ // discard them.
+ int c1 = ((int) raw[0]) & 0xff;
+ int c2 = ((int) raw[1]) & 0xff;
+ int c3 = ((int) raw[2]) & 0xff;
+ int count = 0;
+ if (c1 == 0xFF & c2 == 0xFE) {
+ // UTF16 Big Endian BOM
+ // discard the first two chars
+// pbis.unread(c3);
+// return "UTF-16BE";
+ return 2;
+ } else if (c1 == 0xFE & c2 == 0xFF) {
+ // UTF16 Little Endian BOM
+ // discard the first two chars
+// pbis.unread(c3);
+// return "UTF-16LE";
+ return 2;
+ } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
+// // UTF-8 BOM
+// // discard all three chars
+// return "UTF-8";
+ return 3;
+ } else {
+ // Otherwise put back the chars we just read and proceed
+// pbis.unread(c3);
+// pbis.unread(c2);
+// pbis.unread(c1);
+// return null;
+ return 0;
+ }
+ }
+
/**
* @param file file to read
* @return size of file
_______________________________________________
Laszlo-checkins mailing list
[email protected]
http://www.openlaszlo.org/mailman/listinfo/laszlo-checkins