Author: ab Date: Wed Feb 6 04:06:34 2008 New Revision: 618975 URL: http://svn.apache.org/viewvc?rev=618975&view=rev Log: NUTCH-604 Upgrade to Lucene 2.3.0.
Added: lucene/nutch/trunk/lib/lucene-core-2.3.0.jar (with props) lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar (with props) lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar (with props) lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar (with props) Removed: lucene/nutch/trunk/lib/lucene-core-2.2.0.jar lucene/nutch/trunk/lib/lucene-misc-2.2.0.jar lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.2.0.jar lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.2.0.jar Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=618975&r1=618974&r2=618975&view=diff ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Wed Feb 6 04:06:34 2008 @@ -195,6 +195,8 @@ 68. NUTCH-587 - Upgrade to Hadoop 0.15.3 (kubes) +69. NUTCH-604 - Upgrade to Lucene 2.3.0 (ab) + Release 0.9 - 2007-04-02 1. Changed log4j confiquration to log to stdout on commandline Added: lucene/nutch/trunk/lib/lucene-core-2.3.0.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/lucene-core-2.3.0.jar?rev=618975&view=auto ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/lib/lucene-core-2.3.0.jar ------------------------------------------------------------------------------ svn:executable = * Propchange: lucene/nutch/trunk/lib/lucene-core-2.3.0.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar?rev=618975&view=auto ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar ------------------------------------------------------------------------------ svn:executable = * Propchange: lucene/nutch/trunk/lib/lucene-misc-2.3.0.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java?rev=618975&r1=618974&r2=618975&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java Wed Feb 6 04:06:34 2008 @@ -63,13 +63,17 @@ private static final String TEMP_FILE = "temp"; private final RAMDirectory tempDir = new RAMDirectory(); - private final RAMOutputStream out = - (RAMOutputStream)tempDir.createOutput(TEMP_FILE); + private RAMOutputStream out; private IndexInput in; public SortedTermPositions(TermPositions original, int[] oldToNew) { this.original = original; this.oldToNew = oldToNew; + try { + out = (RAMOutputStream)tempDir.createOutput(TEMP_FILE); + } catch (IOException ioe) { + LOG.warn("Error creating temporary output: " + StringUtils.stringifyException(ioe)); + } } public void seek(Term term) throws IOException { Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java?rev=618975&r1=618974&r2=618975&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java Wed Feb 6 04:06:34 2008 @@ -19,29 +19,27 @@ import java.io.File; import java.io.IOException; -import java.net.URI; - -import java.util.ArrayList; -import java.util.Enumeration; - -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiReader; - -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.FieldCache; - -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; - -import org.apache.hadoop.fs.*; -import org.apache.hadoop.io.*; -import org.apache.hadoop.conf.*; -import org.apache.nutch.indexer.*; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.nutch.indexer.FsDirectory; +import org.apache.nutch.indexer.NutchSimilarity; /** Implements [EMAIL PROTECTED] Searcher} and [EMAIL PROTECTED] HitDetailer} for either a single * merged index, or a set of indexes. */ @@ -85,7 +83,7 @@ if ("file".equals(this.fs.getUri().getScheme())) { Path qualified = file.makeQualified(FileSystem.getLocal(conf)); File fsLocal = new File(qualified.toUri()); - return FSDirectory.getDirectory(fsLocal.getAbsolutePath(), false); + return FSDirectory.getDirectory(fsLocal.getAbsolutePath()); } else { return new FsDirectory(this.fs, file, false, this.conf); } @@ -109,20 +107,19 @@ } public HitDetails getDetails(Hit hit) throws IOException { - ArrayList fields = new ArrayList(); - ArrayList values = new ArrayList(); Document doc = luceneSearcher.doc(hit.getIndexDocNo()); - Enumeration e = doc.fields(); - while (e.hasMoreElements()) { - Field field = (Field)e.nextElement(); - fields.add(field.name()); - values.add(field.stringValue()); + List docFields = doc.getFields(); + String[] fields = new String[docFields.size()]; + String[] values = new String[docFields.size()]; + for (int i = 0; i < docFields.size(); i++) { + Field field = (Field)docFields.get(i); + fields[i] = field.name(); + values[i] = field.stringValue(); } - return new HitDetails((String[])fields.toArray(new String[fields.size()]), - (String[])values.toArray(new String[values.size()])); + return new HitDetails(fields, values); } public HitDetails[] getDetails(Hit[] hits) throws IOException { Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java?rev=618975&r1=618974&r2=618975&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java Wed Feb 6 04:06:34 2008 @@ -125,13 +125,15 @@ } super.collect(doc, score); } - } private static class LimitExceeded extends RuntimeException { + } + + private static class LimitExceeded extends RuntimeException { private int maxDoc; public LimitExceeded(int maxDoc) { this.maxDoc = maxDoc; } } - private LinkedHashMap cache; // an LRU cache of QueryFilter - + private LinkedHashMap<BooleanQuery, Filter> cache; // an LRU cache of QueryFilter + private float threshold; private int searcherMaxHits; @@ -154,7 +156,7 @@ this.threshold = conf.getFloat("searcher.filter.cache.threshold", 0.05f); this.searcherMaxHits = conf.getInt("searcher.max.hits", -1); - this.cache = new LinkedHashMap(cacheSize, 0.75f, true) { + this.cache = new LinkedHashMap<BooleanQuery, Filter>(cacheSize, 0.75f, true) { protected boolean removeEldestEntry(Map.Entry eldest) { return size() > cacheSize; // limit size of cache } @@ -174,7 +176,7 @@ BooleanQuery query = new BooleanQuery(); BooleanQuery cacheQuery = new BooleanQuery(); BooleanQuery filterQuery = new BooleanQuery(); - ArrayList filters = new ArrayList(); + ArrayList<Filter> filters = new ArrayList<Filter>(); BooleanClause[] clauses = original.getClauses(); for (int i = 0; i < clauses.length; i++) { @@ -214,12 +216,12 @@ Filter filter = null; if (cacheQuery.getClauses().length != 0) { synchronized (cache) { // check cache - filter = (Filter)cache.get(cacheQuery); + filter = cache.get(cacheQuery); } if (filter == null) { // miss if (filterQuery.getClauses().length != 0) // add filterQuery to filters - filters.add(new QueryFilter(filterQuery)); + filters.add(new CachingWrapperFilter(new QueryWrapperFilter(filterQuery))); if (filters.size() == 1) { // convert filters to filter filter = (Filter)filters.get(0); @@ -228,7 +230,7 @@ (new Filter[filters.size()]), ChainedFilter.AND); } - if (!(filter instanceof QueryFilter)) // make sure bits are cached + if (!(filter instanceof CachingWrapperFilter)) // make sure bits are cached filter = new CachingWrapperFilter(filter); synchronized (cache) { Added: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar?rev=618975&view=auto ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar ------------------------------------------------------------------------------ svn:executable = * Propchange: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.3.0.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Modified: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml?rev=618975&r1=618974&r2=618975&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml (original) +++ lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml Wed Feb 6 04:06:34 2008 @@ -25,11 +25,11 @@ <plugin id="lib-lucene-analyzers" name="Lucene Analysers" - version="2.2.0" + version="2.3.0" provider-name="org.apache.lucene"> <runtime> - <library name="lucene-analyzers-2.2.0.jar"> + <library name="lucene-analyzers-2.3.0.jar"> <export name="*"/> </library> </runtime> Added: lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar?rev=618975&view=auto ============================================================================== Binary file - no diff available. Propchange: lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar ------------------------------------------------------------------------------ svn:executable = * Propchange: lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.3.0.jar ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Modified: lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml?rev=618975&r1=618974&r2=618975&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml (original) +++ lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml Wed Feb 6 04:06:34 2008 @@ -25,7 +25,7 @@ <library name="summary-lucene.jar"> <export name="*"/> </library> - <library name="lucene-highlighter-2.2.0.jar"/> + <library name="lucene-highlighter-2.3.0.jar"/> </runtime> <requires>