There are several typos in here. Can you fix them? :) On Tue, Nov 11, 2008 at 11:24 AM, <[EMAIL PROTECTED]> wrote:
> Author: lryan > Date: Tue Nov 11 11:24:14 2008 > New Revision: 713121 > > URL: http://svn.apache.org/viewvc?rev=713121&view=rev > Log: > Add support for caching parsed HTML documents in memory to speed up > rewriter (off by default) > Make cache exposes capacity as a property to avoid key generation costs > when cache has no capacity > > Modified: > > > incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml > > > incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java > > > incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java > > > incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java > > > incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java > > Modified: > incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml > URL: > http://svn.apache.org/viewvc/incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml?rev=713121&r1=713120&r2=713121&view=diff > > ============================================================================== > --- > incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml > (original) > +++ > incubator/shindig/trunk/java/common/src/main/bundle/org/apache/shindig/common/cache/ehcache/ehcacheConfig.xml > Tue Nov 11 11:24:14 2008 > @@ -35,6 +35,14 @@ > diskPersistent="false" > memoryStoreEvictionPolicy="LFU"/> > > + <!-- By default do not cache any parsed documents. This is experimental > --> > + <cache name="parsedDocuments" > + maxElementsInMemory="0" > + eternal="true" > + overflowToDisk="false" > + diskPersistent="false" > + memoryStoreEvictionPolicy="LFU"/> > + > <!-- > This configuration is only suitable for a modest sized HTTP cache. > You should configure a shared cache for production use. > > Modified: > incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java > URL: > http://svn.apache.org/viewvc/incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java?rev=713121&r1=713120&r2=713121&view=diff > > ============================================================================== > --- > incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java > (original) > +++ > incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/Cache.java > Tue Nov 11 11:24:14 2008 > @@ -43,4 +43,12 @@ > * @return The entry stored under the given key, or null if it doesn't > exist. > */ > public V removeElement(K key); > + > + /** > + * Returns the capacity of the cache. > + * > + * @return a positive integer indicating the upper bound on the number > of allowed elements > + * in the cace, -1 signifies that the capacity is unbounded > + */ > + public long getCapacity(); > } > > Modified: > incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java > URL: > http://svn.apache.org/viewvc/incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java?rev=713121&r1=713120&r2=713121&view=diff > > ============================================================================== > --- > incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java > (original) > +++ > incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/LruCache.java > Tue Nov 11 11:24:14 2008 > @@ -44,6 +44,10 @@ > return super.remove(key); > } > > + public long getCapacity() { > + return capacity; > + } > + > @Override > protected synchronized boolean removeEldestEntry(Map.Entry<K, V> eldest) > { > return size() > capacity; > > Modified: > incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java > URL: > http://svn.apache.org/viewvc/incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java?rev=713121&r1=713120&r2=713121&view=diff > > ============================================================================== > --- > incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java > (original) > +++ > incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/cache/ehcache/EhConfiguredCache.java > Tue Nov 11 11:24:14 2008 > @@ -82,4 +82,13 @@ > return (V) value; > } > > + /* > + * (non-Javadoc) > + * > + * @see org.apache.shindig.common.cache.Cache#getCapacity() > + */ > + public long getCapacity() { > + return cache.getCacheConfiguration().getMaxElementsInMemory() + > + cache.getCacheConfiguration().getMaxElementsOnDisk(); > + } > } > > Modified: > incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java > URL: > http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java?rev=713121&r1=713120&r2=713121&view=diff > > ============================================================================== > --- > incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java > (original) > +++ > incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java > Tue Nov 11 11:24:14 2008 > @@ -17,24 +17,31 @@ > */ > package org.apache.shindig.gadgets.parse; > > +import org.apache.shindig.common.cache.Cache; > +import org.apache.shindig.common.cache.CacheProvider; > +import org.apache.shindig.common.util.HashUtil; > import org.apache.shindig.gadgets.GadgetException; > import org.apache.shindig.gadgets.parse.nekohtml.NekoSimplifiedHtmlParser; > > import com.google.inject.ImplementedBy; > - > +import com.google.inject.Inject; > import org.w3c.dom.Document; > > /** > - * Parser for arbitrary HTML content. The content may simply be a > - * fragment or snippet of HTML rather than a fully-structured Document, > - * so the interface returns a list of [EMAIL PROTECTED] ParsedHtmlNode} > objects > - * rather than a single top-level item. > - * > - * [EMAIL PROTECTED] ParsedHtmlNode} for parsing details > + * Parser for arbitrary HTML content > */ > @ImplementedBy(NekoSimplifiedHtmlParser.class) > public abstract class GadgetHtmlParser { > > + public static final String PARSED_DOUCMENTS = "parsedDocuments"; > + > + private Cache<String, Document> documentCache; > + > + @Inject > + public void setCacheProvider(CacheProvider cacheProvider) { > + documentCache = cacheProvider.createCache(PARSED_DOUCMENTS); > + } > + > /** > * @param content > * @return true if we detect a preamble of doctype or html > @@ -45,17 +52,40 @@ > } > > public final Document parseDom(String source) throws GadgetException { > - Document document = parseDomImpl(source); > - // Ensure head tag exists > - if (DomUtil.getFirstNamedChildNode(document.getDocumentElement(), > "head") == null) { > - // Add as first element > - document.getDocumentElement().insertBefore( > - document.createElement("head"), > - document.getDocumentElement().getFirstChild()); > + Document document = null; > + String key = null; > + // Avoid checksum overhead if we arent caching > + boolean shouldCache = shouldCache(); > + if (shouldCache) { > + // TODO - Consider using the source if its under a certain size > + key = HashUtil.rawChecksum(source.getBytes()); > + document = documentCache.getElement(key); > + } > + if (document == null) { > + document = parseDomImpl(source); > + // Ensure head tag exists > + if (DomUtil.getFirstNamedChildNode(document.getDocumentElement(), > "head") == null) { > + // Add as first element > + document.getDocumentElement().insertBefore( > + document.createElement("head"), > + document.getDocumentElement().getFirstChild()); > + } > + if (shouldCache) { > + documentCache.addElement(key, document); > + } > + } > + if (shouldCache) { > + Document copy = (Document)document.cloneNode(true); > + HtmlSerializer.copySerializer(document, copy); > + return copy; > } > return document; > } > > + private boolean shouldCache() { > + return documentCache != null && documentCache.getCapacity() != 0; > + } > + > /** > * @param source > * @return a parsed document or document fragment > > >

