Author: lryan
Date: Tue Jul 29 17:58:24 2008
New Revision: 680892

URL: http://svn.apache.org/viewvc?rev=680892&view=rev
Log:
Optimization to record the Charset into the Content-Type header so we dont keep 
calling ICU4J charset detection which is very expensive. This is a partial 
solution

Modified:
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/DefaultContentRewriter.java

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java?rev=680892&r1=680891&r2=680892&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
 Tue Jul 29 17:58:24 2008
@@ -47,17 +47,24 @@
  * Represents the results of an HTTP content retrieval operation.
  */
 public class HttpResponse {
+
   // Replicate HTTP status codes here.
   public final static int SC_OK = 200;
+
   public final static int SC_UNAUTHORIZED = 401;
+
   public final static int SC_FORBIDDEN = 403;
+
   public final static int SC_NOT_FOUND = 404;
+
   public final static int SC_INTERNAL_SERVER_ERROR = 500;
+
   public final static int SC_TIMEOUT = 504;
+
   private final static Set<String> BINARY_CONTENT_TYPES = new 
HashSet<String>(Arrays.asList(
-    "image/jpeg", "image/png", "image/gif", "image/jpg", 
"application/x-shockwave-flash"
+      "image/jpeg", "image/png", "image/gif", "image/jpg", 
"application/x-shockwave-flash"
   ));
-  
+
   private final static Set<Integer> CACHE_CONTROL_OK_STATUS_CODES = new 
HashSet<Integer>(
       Arrays.asList(SC_OK, SC_UNAUTHORIZED, SC_FORBIDDEN));
 
@@ -66,49 +73,65 @@
   protected final static long NEGATIVE_CACHE_TTL = 30 * 1000;
 
   /**
-   * Default TTL for an entry in the cache that does not have any
-   * cache controlling headers.
+   * Default TTL for an entry in the cache that does not have any cache 
controlling headers.
    */
   protected static final long DEFAULT_TTL = 5L * 60L * 1000L;
 
   public static final String DEFAULT_ENCODING = "UTF-8";
 
   private final int httpStatusCode;
+
+  // Derivation of encoding from content is EXPENSIVE using icu4j so be careful
+  // how you construct, copy and store response objects.
   private final String encoding;
 
   // Used to lazily convert to a string representation of the input.
   private String responseString = null;
+
   private final byte[] responseBytes;
+
   private final Map<String, List<String>> headers;
+
   private final Map<String, String> metadata;
+
   private final long date;
 
   private HttpResponse rewritten;
 
-  @Inject @Named("http.cache.negativeCacheTtl")
+  @Inject
+  @Named("http.cache.negativeCacheTtl")
   private static long negativeCacheTtl = NEGATIVE_CACHE_TTL;
 
-  @Inject @Named("http.cache.defaultTtl")
+  @Inject
+  @Named("http.cache.defaultTtl")
   private static long defaultTtl = DEFAULT_TTL;
 
   // Holds character sets for fast conversion
-  private static ConcurrentHashMap<String,Charset> encodingToCharset
-      = new ConcurrentHashMap<String,Charset>();
+  private static ConcurrentHashMap<String, Charset> encodingToCharset
+      = new ConcurrentHashMap<String, Charset>();
 
   /**
    * Create a dummy empty map. Access via HttpResponse.ERROR
    */
   public HttpResponse(int statusCode) {
-    this(statusCode, ArrayUtils.EMPTY_BYTE_ARRAY, null);
+    this(statusCode, ArrayUtils.EMPTY_BYTE_ARRAY, null, 
Charset.defaultCharset().name());
   }
 
   /**
-   * @param httpStatusCode
-   * @param responseBytes
    * @param headers May be null.
    */
   public HttpResponse(int httpStatusCode, byte[] responseBytes,
-                       Map<String, List<String>> headers) {
+      Map<String, List<String>> headers) {
+    this(httpStatusCode, responseBytes, headers, null);
+  }
+
+  /**
+   * @param headers  May be null.
+   * @param encoding May be null.
+   */
+  public HttpResponse(int httpStatusCode, byte[] responseBytes,
+      Map<String, List<String>> headers,
+      String encoding) {
     this.httpStatusCode = httpStatusCode;
     if (responseBytes == null) {
       this.responseBytes = ArrayUtils.EMPTY_BYTE_ARRAY;
@@ -134,17 +157,22 @@
     this.headers = tmpHeaders;
 
     this.metadata = new HashMap<String, String>();
-    this.encoding = detectEncoding();
+    if (encoding == null) {
+      if (responseBytes != null && responseBytes.length > 0) {
+        this.encoding = detectEncoding();
+      } else {
+        this.encoding = Charset.defaultCharset().name();
+      }
+    } else {
+      this.encoding = encoding;
+    }
   }
 
   /**
-   * Simple constructor for setting a basic response from a string. Mostly used
-   * for testing.
-   *
-   * @param body
+   * Simple constructor for setting a basic response from a string. Mostly 
used for testing.
    */
   public HttpResponse(String body) {
-    this(SC_OK, body.getBytes(), null);
+    this(SC_OK, body.getBytes(), null, Charset.defaultCharset().name());
   }
 
   public static HttpResponse error() {
@@ -185,8 +213,9 @@
   }
 
   /**
-   * Attempts to determine the encoding of the body. If it can't be determined,
-   * we use DEFAULT_ENCODING instead.
+   * Attempts to determine the encoding of the body. If it can't be 
determined, we use
+   * DEFAULT_ENCODING instead.
+   *
    * @return The detected encoding or DEFAULT_ENCODING.
    */
   private String detectEncoding() {
@@ -208,6 +237,13 @@
     CharsetDetector detector = new CharsetDetector();
     detector.setText(responseBytes);
     CharsetMatch match = detector.detect();
+
+    if (contentType != null) {
+      // Record the charset in the content-type header so that its value can 
be cached
+      // and re-used. This is a BIG performance win.
+      this.headers.put("Content-Type",
+          Lists.newArrayList(contentType + "; charset=" + 
match.getName().toUpperCase()));
+    }
     return match.getName().toUpperCase();
   }
 
@@ -237,9 +273,9 @@
   }
 
   /**
-   * Attempts to convert the response body to a string using the Content-Type
-   * header. If no Content-Type header is specified (or it doesn't include an
-   * encoding), we will assume it is UTF-8.
+   * Attempts to convert the response body to a string using the Content-Type 
header. If no
+   * Content-Type header is specified (or it doesn't include an encoding), we 
will assume it is
+   * UTF-8.
    *
    * @return The body as a string.
    */
@@ -248,7 +284,7 @@
       Charset charset = encodingToCharset.get(encoding);
       if (charset == null) {
         charset = Charset.forName(encoding);
-        encodingToCharset.put(encoding,charset);
+        encodingToCharset.put(encoding, charset);
       }
       responseString = 
charset.decode(ByteBuffer.wrap(responseBytes)).toString();
 
@@ -275,7 +311,6 @@
   }
 
   /**
-   * @param name
    * @return All headers with the given name.
    */
   public List<String> getHeaders(String name) {
@@ -288,9 +323,8 @@
   }
 
   /**
-   * @param name
-   * @return The first set header with the given name or null if not set. If
-   *         you need multiple values for the header, use getHeaders().
+   * @return The first set header with the given name or null if not set. If 
you need multiple
+   *         values for the header, use getHeaders().
    */
   public String getHeader(String name) {
     List<String> headerList = getHeaders(name);
@@ -310,6 +344,7 @@
 
   /**
    * Get the rewritten version of this content
+   *
    * @return A rewritten HttpResponse
    */
   public HttpResponse getRewritten() {
@@ -318,16 +353,14 @@
 
   /**
    * Set the rewritten version of this content
-   * @param rewritten
    */
   public void setRewritten(HttpResponse rewritten) {
     this.rewritten = rewritten;
   }
 
   /**
-   * Set the externally forced minimum cache min-TTL
-   * This is derived from the "refresh" param on OpenProxy request
-   * Value is in seconds
+   * Set the externally forced minimum cache min-TTL This is derived from the 
"refresh" param on
+   * OpenProxy request Value is in seconds
    */
   public void setForcedCacheTTL(int forcedCacheTtl) {
     if (forcedCacheTtl > 0) {
@@ -336,7 +369,7 @@
       this.headers.put("Cache-Control", Lists.newArrayList("public,max-age=" + 
forcedCacheTtl));
     }
   }
-  
+
   /**
    * Sets cache-control headers indicating the response is not cacheable.
    */

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/DefaultContentRewriter.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/DefaultContentRewriter.java?rev=680892&r1=680891&r2=680892&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/DefaultContentRewriter.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/DefaultContentRewriter.java
 Tue Jul 29 17:58:24 2008
@@ -17,17 +17,24 @@
  */
 package org.apache.shindig.gadgets.rewrite;
 
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
+import com.google.inject.name.Named;
+
 import org.apache.shindig.gadgets.GadgetException;
 import org.apache.shindig.gadgets.GadgetSpecFactory;
 import org.apache.shindig.gadgets.http.HttpRequest;
 import org.apache.shindig.gadgets.http.HttpResponse;
 import org.apache.shindig.gadgets.spec.GadgetSpec;
 
-import com.google.inject.Inject;
-import com.google.inject.Singleton;
-import com.google.inject.name.Named;
-
-import java.io.*;
+import java.io.ByteArrayOutputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
 import java.net.URI;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -41,9 +48,13 @@
 public class DefaultContentRewriter implements ContentRewriter {
 
   private final GadgetSpecFactory specFactory;
+
   private final String includeUrls;
+
   private final String excludeUrls;
+
   private final String expires;
+
   private final Set<String> includeTags;
 
   @Inject
@@ -85,7 +96,8 @@
           output)) {
         return new HttpResponse(original.getHttpStatusCode(),
             baos.toByteArray(),
-            original.getAllHeaders());
+            original.getAllHeaders(),
+            original.getEncoding());
       }
       return null;
     } catch (UnsupportedEncodingException uee) {
@@ -111,7 +123,8 @@
     }
 
     // Store the feature in the spec so we dont keep parsing it
-    ContentRewriterFeature rewriterFeature = 
(ContentRewriterFeature)spec.getAttribute("content-rewrite");
+    ContentRewriterFeature rewriterFeature = (ContentRewriterFeature) spec
+        .getAttribute("content-rewrite");
     if (rewriterFeature == null) {
       rewriterFeature = new ContentRewriterFeature(spec, includeUrls, 
excludeUrls, expires,
           includeTags);
@@ -157,12 +170,16 @@
   }
 
   private boolean isHTML(String mime) {
-    if (mime == null) return false;
+    if (mime == null) {
+      return false;
+    }
     return (mime.toLowerCase().contains("html"));
   }
 
   private boolean isCSS(String mime) {
-    if (mime == null) return false;
+    if (mime == null) {
+      return false;
+    }
     return (mime.toLowerCase().contains("css"));
   }
 
@@ -174,7 +191,8 @@
     return "/gadgets/concat?";
   }
 
-  protected LinkRewriter createLinkRewriter(GadgetSpec spec, 
ContentRewriterFeature rewriterFeature) {
+  protected LinkRewriter createLinkRewriter(GadgetSpec spec,
+      ContentRewriterFeature rewriterFeature) {
     return new ProxyingLinkRewriter(spec.getUrl(), rewriterFeature, 
getProxyUrl());
   }
 }


Reply via email to