Author: siren Date: Mon Jun 19 10:30:13 2006 New Revision: 415379 URL: http://svn.apache.org/viewvc?rev=415379&view=rev Log: fixed caching to store entries to disk as promised
Added: lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/ lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java Modified: lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java Added: lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java?rev=415379&view=auto ============================================================================== --- lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java (added) +++ lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/cache/CustomDiskPersistenceListener.java Mon Jun 19 10:30:13 2006 @@ -0,0 +1,12 @@ +package org.apache.nutch.cache; + +import com.opensymphony.oscache.plugins.diskpersistence.AbstractDiskPersistenceListener; + +public class CustomDiskPersistenceListener extends + AbstractDiskPersistenceListener { + + protected char[] getCacheFileName(String arg0) { + return arg0.toCharArray(); + } + +} Modified: lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java?rev=415379&r1=415378&r2=415379&view=diff ============================================================================== --- lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java (original) +++ lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/CacheManager.java Mon Jun 19 10:30:13 2006 @@ -15,21 +15,58 @@ */ package org.apache.nutch.webapp; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.Serializable; +import java.util.Properties; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.nutch.webapp.common.Search; +import org.apache.nutch.webapp.common.ServiceLocator; import com.opensymphony.oscache.base.Cache; import com.opensymphony.oscache.base.CacheEntry; import com.opensymphony.oscache.base.EntryRefreshPolicy; import com.opensymphony.oscache.base.NeedsRefreshException; +import com.opensymphony.oscache.general.GeneralCacheAdministrator; /** - * CacheManager for + * This class is responsible for configuring the used cache and + * delivering cached Search objects. + * + * Configuration parameters can be overrided with default nutch + * configuration mechanism. + * + * Search Objects are compressed for smaller space requirements. */ public class CacheManager { + + public static class ByteBufferWrapper implements Serializable { + + private static final long serialVersionUID = 1L; + byte[] contents; + + public ByteBufferWrapper(final byte[] contents){ + this.contents=contents; + } + + public byte[] getContents(){ + return contents; + } + + } - static final String CACHE_KEY="cache"; + static final Log LOG=LogFactory.getLog(CacheManager.class); + static final String CACHE_KEY=CacheManager.class.getName(); + class NutchRefreshPolicy implements EntryRefreshPolicy { private static final long serialVersionUID = 1L; @@ -41,17 +78,39 @@ EntryRefreshPolicy policy=new NutchRefreshPolicy(); + Cache cache; + GeneralCacheAdministrator cacheadmin; - protected CacheManager(){ - cache=new Cache(true,true,false,true,"com.opensymphony.oscache.base.algorithm.UnlimitedCache",Integer.MAX_VALUE); + protected CacheManager(Configuration conf){ + + Properties p=new Properties(); + + //use memory for caching + boolean cacheMemory=conf.getBoolean("cache.memory", false); + p.setProperty("cache.memory", Boolean.toString(cacheMemory)); + + //the persistence class used + String cachePersistenceClass=conf.get("cache.persistence.class","org.apache.nutch.cache.CustomDiskPersistenceListener"); + p.setProperty("cache.persistence.class", cachePersistenceClass); + + //where to store cache files (if file cache used) + String cachePath=conf.get("cache.path", "."); + p.setProperty("cache.path", cachePath); + + //cacacity of cache (how many entries) + int cacheCapacity=conf.getInt("cache.capacity", 1000); + p.setProperty("cache.capacity", Integer.toString(cacheCapacity)); + + cacheadmin=new GeneralCacheAdministrator(p); + cache=cacheadmin.getCache(); } - public static CacheManager getInstance(Configuration conf){ + public synchronized static CacheManager getInstance(Configuration conf){ CacheManager cache=(CacheManager)conf.getObject(CACHE_KEY); if(cache==null) { - cache = new CacheManager(); + cache = new CacheManager(conf); conf.setObject(CACHE_KEY, cache); } @@ -64,8 +123,33 @@ * @return * @throws NeedsRefreshException */ - public Search getSearch(String id) throws NeedsRefreshException { - return (Search) cache.getFromCache(id); + public Search getSearch(String id, ServiceLocator locator) throws NeedsRefreshException { + Search search=null; + + ByteBufferWrapper w=(ByteBufferWrapper)cache.getFromCache(id); + if(w!=null){ + + + try { + long time=System.currentTimeMillis(); + ByteArrayInputStream is=new ByteArrayInputStream(w.getContents()); + GZIPInputStream gs = new GZIPInputStream(is); + DataInputStream dis = new DataInputStream(gs); + + search = new Search(locator); + search.readFields(dis); + long delta=System.currentTimeMillis()-time; + + if(LOG.isDebugEnabled()){ + LOG.debug("Decompressing cache entry took: " + delta + "ms."); + } + + search.init(); + } catch (IOException e) { + LOG.info("Could not get cached object: " + e); + } + } + return search; } /** @@ -75,7 +159,28 @@ * @param search the search to cache */ public void putSearch(String id, Search search){ - cache.putInCache(id,search,policy); + try { + long time=System.currentTimeMillis(); + ByteArrayOutputStream bos=new ByteArrayOutputStream(); + GZIPOutputStream gzos=new GZIPOutputStream(bos); + DataOutputStream oos=new DataOutputStream(gzos); + search.write(oos); + oos.flush(); + oos.close(); + gzos.close(); + long delta=System.currentTimeMillis()-time; + ByteBufferWrapper wrap=new ByteBufferWrapper(bos.toByteArray()); + if(LOG.isDebugEnabled()){ + LOG.debug("Compressing cache entry took: " + delta + "ms."); + LOG.debug("size: " + wrap.getContents().length + " bytes"); + } + cache.putInCache(id, wrap); + } catch (IOException e) { + LOG.info("cannot store object in cache: " + e); + } } + public void cancelUpdate(String key) { + cache.cancelUpdate(key); + } } Modified: lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java?rev=415379&r1=415378&r2=415379&view=diff ============================================================================== --- lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java (original) +++ lucene/nutch/trunk/contrib/web2/plugins/web-caching-oscache/src/java/org/apache/nutch/webapp/controller/CachingSearchController.java Mon Jun 19 10:30:13 2006 @@ -25,6 +25,7 @@ import org.apache.nutch.webapp.CacheManager; import org.apache.nutch.webapp.common.Search; import org.apache.nutch.webapp.common.ServiceLocator; +import org.apache.nutch.webapp.common.Startable; import org.apache.nutch.webapp.controller.SearchController; import org.apache.struts.tiles.ComponentContext; @@ -34,37 +35,51 @@ * This naive search result caching implementation is just an example of * extending the web ui. */ -public class CachingSearchController extends SearchController { +public class CachingSearchController extends SearchController implements Startable { + + CacheManager manager=null; public void nutchPerform(ComponentContext tileContext, HttpServletRequest request, HttpServletResponse response, ServletContext servletContext) throws ServletException, IOException { - Search search = null; - boolean requiresUpdate = false; - - // key used for caching - String key = request.getQueryString(); - ServiceLocator locator = getServiceLocator(request); - - if (key != null) { + Search search; + + // key used for caching results, should really be something else but a part of user + // definable String + String key = request.getQueryString().replace("?","_").replace("&","_"); + StringBuffer cacheKey=new StringBuffer(key.length()*2); + for(int i=0;i<key.length();i++){ + cacheKey.append(key.charAt(i)).append(java.io.File.separatorChar); + } + + if(LOG.isDebugEnabled()){ + LOG.debug("cache key:" + cacheKey); + } + if (cacheKey != null) { try { - search = CacheManager.getInstance(locator.getConfiguration()) - .getSearch(key); + search = manager.getSearch(cacheKey.toString(), locator); request.setAttribute(Search.REQ_ATTR_SEARCH, search); - LOG.info("Using cached"); + if(LOG.isDebugEnabled()) { + LOG.debug("Using cached"); + } } catch (NeedsRefreshException e) { - requiresUpdate = true; - LOG.info("Cache update required"); + try{ + super.nutchPerform(tileContext, request, response, servletContext); + search = (Search) locator.getSearch(); + manager.putSearch(cacheKey.toString(), + search); + } catch (Exception ex){ + LOG.info("Cancelling update"); + manager.cancelUpdate(cacheKey.toString()); + } } } - if (key!=null && (search == null || requiresUpdate)) { - LOG.info("Cache miss"); - super.nutchPerform(tileContext, request, response, servletContext); - search = (Search) locator.getSearch(); - CacheManager.getInstance(locator.getConfiguration()).putSearch(key, - search); - } + } + + public void start(ServletContext servletContext) { + ServiceLocator locator=getServiceLocator(servletContext); + manager=CacheManager.getInstance(locator.getConfiguration()); } } Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java?rev=415379&r1=415378&r2=415379&view=diff ============================================================================== --- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java (original) +++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java Mon Jun 19 10:30:13 2006 @@ -15,12 +15,15 @@ */ package org.apache.nutch.webapp.common; +import java.io.DataInput; +import java.io.DataOutput; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.io.Writable; import org.apache.nutch.html.Entities; import org.apache.nutch.searcher.Hit; import org.apache.nutch.searcher.HitDetails; @@ -35,7 +38,10 @@ * results) might be a good candidate for caching ? * */ -public class Search { +public class Search implements Writable { + + private static final long serialVersionUID = 1L; + public static final String REQ_ATTR_SEARCH="nutchSearch"; public static final Log LOG = LogFactory.getLog(Search.class); @@ -104,18 +110,8 @@ int realEnd = (int) Math.min(hits.getLength(), getStartOffset() + getHitsRequired()); - int endOffset=hits.getLength(); - + init(); show = hits.getHits(getStartOffset(), realEnd - getStartOffset()); - - navigationHelper = new NavigationHelper(startOffset, endOffset, hitsPerPage, hits - .getTotal(), hits.totalIsExact()); - - // set offset to next page to form so it get's to ui - if (navigationHelper.hasNext()) { - form.setValue(SearchForm.NAME_START, Long.toString(navigationHelper - .getNextPageStart())); - } try { details = locator.getNutchBean().getDetails(show); @@ -126,6 +122,20 @@ } } + public void init(){ + int endOffset=hits.getLength(); + + navigationHelper = new NavigationHelper(startOffset, endOffset, hitsPerPage, hits + .getTotal(), hits.totalIsExact()); + + // set offset to next page to form so it get's to ui + if (navigationHelper.hasNext()) { + form.setValue(SearchForm.NAME_START, Long.toString(navigationHelper + .getNextPageStart())); + } + } + + /** * gets the results of search to display * @@ -156,6 +166,10 @@ } return ret; } + + public Search(){ + + } public Search(ServiceLocator locator) { this.locator = locator; @@ -463,5 +477,55 @@ public void launchSearch() { BaseSearch bs=new BaseSearch(locator); bs.doSearch(); + } + + public void write(DataOutput out) throws IOException { + LOG.info("writing hits"); + hits.write(out); + + + out.writeInt(show.length); + + for(int i=0;i<show.length;i++){ + show[i].write(out); + } + + out.writeInt(details.length); + for(int i=0;i<details.length;i++){ + details[i].write(out); + } + + out.writeInt(summaries.length); + for(int i=0;i<summaries.length;i++){ + summaries[i].write(out); + } + + } + + public void readFields(DataInput in) throws IOException { + hits=new Hits(); + hits.readFields(in); + int showlength=in.readInt(); + show=new Hit[showlength]; + for(int i=0;i<showlength;i++){ + show[i]=new Hit(); + show[i].readFields(in); + } + + int detailsLength=in.readInt(); + details=new HitDetails[detailsLength]; + for(int i=0;i<detailsLength;i++){ + details[i]=new HitDetails(); + details[i].readFields(in); + } + + int summariesLength=in.readInt(); + summaries=new Summary[summariesLength]; + for(int i=0;i<summariesLength;i++){ + summaries[i]=new Summary(); + summaries[i].readFields(in); + } + + } }