svn commit: r405083 - /lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources.properties
Author: siren Date: Mon May 8 09:19:56 2006 New Revision: 405083 URL: http://svn.apache.org/viewcvs?rev=405083view=rev Log: restored props for default locale Added: lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources.properties Added: lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources.properties URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources.properties?rev=405083view=auto == --- lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources.properties (added) +++ lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources.properties Mon May 8 09:19:56 2006 @@ -0,0 +1,99 @@ +#This is the default resource file for nutch ui localization. +#If you create a new localized version of resources, please use +#this as the base + +#language code used in html lang attribute +lang=en + +#anchors page title +anchors.title=anchors +anchors.anchors=incoming anchor text: +anchors.page=page: a href={0}{0}/a + +#cached page title +cached.title=nutch cache +cached.page=page: a href={0}{0}/a +cached.noContent=Sorry, no content is cached for this page. +cached.notHtml=The cached content has mime type {0}, click this a href=servlet/cached?{1}link/a to download it directly. + +#explain page title +explain.title=score explanation +explain.page=page +explain.scoreForQuery=score for query: tt{0}/tt + +#search page title +search.title=search results + +#text in search button +search.search=Search + +#text wich describes the search results (nn-nn out of nn) +search.hits=Results b{0}-{1}/b of about b{2}/b total matching pages for b{3}/b. + +#text displayed when there are no reults +search.noResults=Your search - b{0}/b - did not match any documents. + +#cached page link text +search.cached=cached + +#explain page link text +search.explain=explain + +#anchors page link text +search.anchors=anchors + +#text in next page button +search.next=next page + +#link text of +search.moreFrom=more from +search.showAllHits=show all hits + +search.clustering=clustering +search.viewAsText=View as Plain Text + +#search help link text +search.help=help + +#index more web ui localization +search.contentType=[span class=contentType{0}/span] +search.contentLength=({0} bytes) +search.lastModified={0} + +#view as text page title +text.title=plain text cache +text.note=This is the plain text version of the file: a href={0}{0}/a. +text.noText=iSorry, no plain text version is available./i + +#title of help page +help.title=help + +#title of preferences page +preferences.title=preferences + +#interface languages +preferences.ui.language=Interface language +preferences.ui.language.info= +ca=Catalan +de=German +en=English +es=Spanish +fi=Finnish +fr=French +hu=Hungarian +ms=Malay +nl=Dutch +pl=Polish +pt=Portuguese +sh=Serbo-Croatian +sr=Serbian +sv=Swedish +th=Thai +zh=Chinese + +#number of results +preferences.numResults=Number of Results +preferences.numResults.info= + +#text on save button +preferences.submit=Save and return to search \ No newline at end of file
svn commit: r405089 - /lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties
Author: siren Date: Mon May 8 09:27:10 2006 New Revision: 405089 URL: http://svn.apache.org/viewcvs?rev=405089view=rev Log: removed log flooding, removed unneycessaru code from PreferencesController, integrated displaying information of index-more plugin Modified: lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties Modified: lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties?rev=405089r1=405088r2=405089view=diff == --- lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties (original) +++ lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties Mon May 8 09:27:10 2006 @@ -55,6 +55,11 @@ #search help link text search.help=help +#index more web ui localization +search.contentType=[span class=contentType{0}/span] +search.contentLength=({0} bytes) +search.lastModified={0} + #view as text page title text.title=plain text cache text.note=This is the plain text version of the file: a href={0}{0}/a.
svn commit: r405088 - in /lucene/nutch/trunk/contrib/web2: ./ src/main/java/org/apache/nutch/webapp/common/ src/main/java/org/apache/nutch/webapp/controller/ src/main/webapp/WEB-INF/ src/main/webapp/W
Author: siren Date: Mon May 8 09:25:53 2006 New Revision: 405088 URL: http://svn.apache.org/viewcvs?rev=405088view=rev Log: removed log flooding, removed unneycessaru code from PreferencesController, integrated displaying information of index-more plugin Added: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/MoreController.java Modified: lucene/nutch/trunk/contrib/web2/README.txt lucene/nutch/trunk/contrib/web2/build.xml lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/NavigationHelper.java lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/NutchController.java lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/PreferencesController.java lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/more.jsp lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/preferences.jsp lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/results.jsp lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/search.jsp lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/template.jsp lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml Modified: lucene/nutch/trunk/contrib/web2/README.txt URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/README.txt?rev=405088r1=405087r2=405088view=diff == --- lucene/nutch/trunk/contrib/web2/README.txt (original) +++ lucene/nutch/trunk/contrib/web2/README.txt Mon May 8 09:25:53 2006 @@ -9,9 +9,9 @@ (and related) pages. Layout is constructed by using following tag libraries: -struts-logic +jstl-c +jstl-fmt struts-tiles -struts-bean These tiles blocks can be extended or overridden by plugins implementing org.apache.nutch.webapp.UIExtensionPoint. A @@ -36,8 +36,10 @@ Todo: --Provide some samples of ui plugins - +-provide some samples of ui plugins +-move more functionality to plugin +-remove table structures from html to allow more flexible css layouts +-add mechanism for adding binary items (ie. images) Directory contents Modified: lucene/nutch/trunk/contrib/web2/build.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/build.xml?rev=405088r1=405087r2=405088view=diff == --- lucene/nutch/trunk/contrib/web2/build.xml (original) +++ lucene/nutch/trunk/contrib/web2/build.xml Mon May 8 09:25:53 2006 @@ -287,6 +287,7 @@ includes=**/*.html/ replace dir=${docs.dir} token=help.html value=help.do includes=**/*.html/ + replace dir=${docs.dir} token=about.html value=about.do includes=**/*.html/ replace dir=${docs.dir} token=search.jsp value=search.do includes=**/*.html/ replace dir=${docs.dir} token=../ value= includes=**/*.html/ Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/NavigationHelper.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/NavigationHelper.java?rev=405088r1=405087r2=405088view=diff == --- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/NavigationHelper.java (original) +++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/NavigationHelper.java Mon May 8 09:25:53 2006 @@ -62,9 +62,6 @@ * @return */ protected boolean hasNext() { -System.out.println(totalIsExact + totalIsExact); -System.out.println(end + end); -System.out.println(totalHits + totalHits); return end totalHits (!getShowAllHits()); } Modified: lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java?rev=405088r1=405087r2=405088view=diff == --- lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java (original) +++ lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java Mon May 8 09:25:53 2006 @@ -89,18 +89,12 @@ hits = new Hits(0, new Hit[0]); } -LOG.info(form:); -LOG.info(locator.getSearchForm().toString()); -LOG.info(performing search); - int realEnd = (int) Math.min(hits.getLength(), getStartOffset() + getMaxHits()); int endOffset=hits.getLength(); show = hits.getHits(getStartOffset(), realEnd - getStartOffset()); - - navigationHelper = new NavigationHelper(startOffset, endOffset, hitsPerPage, hits
svn commit: r405165 - in /lucene/nutch/trunk: ./ conf/ src/java/org/apache/nutch/searcher/ src/plugin/ src/plugin/nutch-extensionpoints/ src/plugin/summary-basic/ src/plugin/summary-basic/src/ src/plu
Author: jerome Date: Mon May 8 14:04:01 2006 New Revision: 405165 URL: http://svn.apache.org/viewcvs?rev=405165view=rev Log: NUTCH-134 : Added a summarizer extension point and two enxtensions: * summary-basic is the current nutch implementation moved into a plugin * summary-lucene a raw version of a summarizer plugin based on lucene highlighter Added: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/SummarizerFactory.java (with props) lucene/nutch/trunk/src/plugin/summary-basic/ lucene/nutch/trunk/src/plugin/summary-basic/build.xml (with props) lucene/nutch/trunk/src/plugin/summary-basic/plugin.xml (with props) lucene/nutch/trunk/src/plugin/summary-basic/src/ lucene/nutch/trunk/src/plugin/summary-basic/src/java/ lucene/nutch/trunk/src/plugin/summary-basic/src/java/org/ lucene/nutch/trunk/src/plugin/summary-basic/src/java/org/apache/ lucene/nutch/trunk/src/plugin/summary-basic/src/java/org/apache/nutch/ lucene/nutch/trunk/src/plugin/summary-basic/src/java/org/apache/nutch/summary/ lucene/nutch/trunk/src/plugin/summary-basic/src/java/org/apache/nutch/summary/basic/ lucene/nutch/trunk/src/plugin/summary-basic/src/java/org/apache/nutch/summary/basic/BasicSummarizer.java (with props) lucene/nutch/trunk/src/plugin/summary-basic/src/java/org/apache/nutch/summary/basic/package.html (with props) lucene/nutch/trunk/src/plugin/summary-lucene/ lucene/nutch/trunk/src/plugin/summary-lucene/build.xml (with props) lucene/nutch/trunk/src/plugin/summary-lucene/lib/ lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.0-rc1-dev.jar (with props) lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml (with props) lucene/nutch/trunk/src/plugin/summary-lucene/src/ lucene/nutch/trunk/src/plugin/summary-lucene/src/java/ lucene/nutch/trunk/src/plugin/summary-lucene/src/java/org/ lucene/nutch/trunk/src/plugin/summary-lucene/src/java/org/apache/ lucene/nutch/trunk/src/plugin/summary-lucene/src/java/org/apache/nutch/ lucene/nutch/trunk/src/plugin/summary-lucene/src/java/org/apache/nutch/summary/ lucene/nutch/trunk/src/plugin/summary-lucene/src/java/org/apache/nutch/summary/lucene/ lucene/nutch/trunk/src/plugin/summary-lucene/src/java/org/apache/nutch/summary/lucene/LuceneSummarizer.java (with props) lucene/nutch/trunk/src/plugin/summary-lucene/src/java/org/apache/nutch/summary/lucene/package.html (with props) Modified: lucene/nutch/trunk/build.xml lucene/nutch/trunk/conf/nutch-default.xml lucene/nutch/trunk/default.properties lucene/nutch/trunk/src/java/org/apache/nutch/searcher/FetchedSegments.java lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Summarizer.java lucene/nutch/trunk/src/plugin/build.xml lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml Modified: lucene/nutch/trunk/build.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/build.xml?rev=405165r1=405164r2=405165view=diff == --- lucene/nutch/trunk/build.xml (original) +++ lucene/nutch/trunk/build.xml Mon May 8 14:04:01 2006 @@ -323,6 +323,8 @@ packageset dir=${plugins.dir}/query-more/src/java/ packageset dir=${plugins.dir}/query-site/src/java/ packageset dir=${plugins.dir}/query-url/src/java/ + packageset dir=${plugins.dir}/summary-basic/src/java/ + packageset dir=${plugins.dir}/summary-lucene/src/java/ packageset dir=${plugins.dir}/urlfilter-automaton/src/java/ packageset dir=${plugins.dir}/urlfilter-regex/src/java/ packageset dir=${plugins.dir}/urlfilter-prefix/src/java/ @@ -350,6 +352,7 @@ group title=Analysis Plugins packages=${plugins.analysis}/ group title=Indexing Filter Plugins packages=${plugins.index}/ group title=Query Filter Plugins packages=${plugins.query}/ + group title=Summary Plugins packages=${plugins.summary}/ group title=Clustering Plugins packages=${plugins.clustering}/ group title=Ontology Plugins packages=${plugins.ontology}/ group title=Misc. Plugins packages=${plugins.misc}/ Modified: lucene/nutch/trunk/conf/nutch-default.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/nutch-default.xml?rev=405165r1=405164r2=405165view=diff == --- lucene/nutch/trunk/conf/nutch-default.xml (original) +++ lucene/nutch/trunk/conf/nutch-default.xml Mon May 8 14:04:01 2006 @@ -564,7 +564,7 @@ property nameplugin.includes/name - valueprotocol-http|urlfilter-regex|parse-(text|html|js)|index-basic|query-(basic|site|url)/value + valueprotocol-http|urlfilter-regex|parse-(text|html|js)|index-basic|query-(basic|site|url)|summary-basic/value descriptionRegular expression naming plugin directory names to include. Any plugin not matching this expression is excluded. In any
svn commit: r405179 - in /lucene/nutch/trunk/src: java/org/apache/nutch/crawl/MapWritable.java test/org/apache/nutch/crawl/TestMapWritable.java
Author: ab Date: Mon May 8 14:48:21 2006 New Revision: 405179 URL: http://svn.apache.org/viewcvs?rev=405179view=rev Log: Fix NUTCH-263. Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java?rev=405179r1=405178r2=405179view=diff == --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java Mon May 8 14:48:21 2006 @@ -224,16 +224,20 @@ public boolean equals(Object obj) { if (obj instanceof MapWritable) { MapWritable map = (MapWritable) obj; + if (fSize != map.fSize) return false; + HashSet set1 = new HashSet(); KeyValueEntry e1 = fFirst; - KeyValueEntry e2 = map.fFirst; - while (e1 != null e2 != null) { -if (!e1.equals(e2)) { - return false; -} + while (e1 != null) { +set1.add(e1); e1 = e1.fNextEntry; + } + HashSet set2 = new HashSet(); + KeyValueEntry e2 = map.fFirst; + while (e2 != null) { +set2.add(e2); e2 = e2.fNextEntry; } - return true; + return set1.equals(set2); } return false; } @@ -451,6 +455,10 @@ return entry.fKey.equals(fKey) entry.fValue.equals(fValue); } return false; +} + +public int hashCode() { + return toString().hashCode(); } } Modified: lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java?rev=405179r1=405178r2=405179view=diff == --- lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java (original) +++ lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java Mon May 8 14:48:21 2006 @@ -91,6 +91,16 @@ } testWritable(c); } + + public void testEquals() { +MapWritable map1 = new MapWritable(); +MapWritable map2 = new MapWritable(); +map1.put(new UTF8(key1), new UTF8(val1)); +map1.put(new UTF8(key2), new UTF8(val2)); +map2.put(new UTF8(key2), new UTF8(val2)); +map2.put(new UTF8(key1), new UTF8(val1)); +assertTrue(map1.equals(map2)); + } public void testPerformance() throws Exception { File file = new File(System.getProperty(java.io.tmpdir), mapTestFile);
svn commit: r405181 - in /lucene/nutch/trunk/src/java/org/apache/nutch/crawl: CrawlDbReader.java LinkDb.java
Author: ab Date: Mon May 8 14:52:09 2006 New Revision: 405181 URL: http://svn.apache.org/viewcvs?rev=405181view=rev Log: Refactor to make it easier to use these classes programmatically. Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java?rev=405181r1=405180r2=405181view=diff == --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java Mon May 8 14:52:09 2006 @@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.io.Closeable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.MapFile; @@ -55,9 +56,28 @@ * @author Andrzej Bialecki * */ -public class CrawlDbReader { +public class CrawlDbReader implements Closeable { public static final Logger LOG = LogFormatter.getLogger(CrawlDbReader.class.getName()); + + private MapFile.Reader[] readers = null; + + private void openReaders(String crawlDb, Configuration config) throws IOException { +if (readers != null) return; +FileSystem fs = FileSystem.get(config); +readers = MapFileOutputFormat.getReaders(fs, new File(crawlDb, CrawlDatum.DB_DIR_NAME), config); + } + + private void closeReaders() { +if (readers == null) return; +for (int i = 0; i readers.length; i++) { + try { +readers[i].close(); + } catch (Exception e) { + + } +} + } public static class CrawlDbStatMapper implements Mapper { public void configure(JobConf job) {} @@ -177,6 +197,10 @@ public void close() {} } + + public void close() { +closeReaders(); + } public void processStatJob(String crawlDb, Configuration config) throws IOException { LOG.info(CrawlDb statistics start: + crawlDb); @@ -249,16 +273,20 @@ LOG.info(CrawlDb statistics: done); } - - public void readUrl(String crawlDb, String url, Configuration config) throws IOException { -FileSystem fs = FileSystem.get(config); + + public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException { UTF8 key = new UTF8(url); CrawlDatum val = new CrawlDatum(); -MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new File(crawlDb, CrawlDatum.DB_DIR_NAME), config); -Writable res = MapFileOutputFormat.getEntry(readers, new HashPartitioner(), key, val); +openReaders(crawlDb, config); +CrawlDatum res = (CrawlDatum)MapFileOutputFormat.getEntry(readers, new HashPartitioner(), key, val); +return res; + } + + public void readUrl(String crawlDb, String url, Configuration config) throws IOException { +CrawlDatum res = get(crawlDb, url, config); System.out.println(URL: + url); if (res != null) { - System.out.println(val); + System.out.println(res); } else { System.out.println(not found); } Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java?rev=405181r1=405180r2=405181view=diff == --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Mon May 8 14:52:09 2006 @@ -28,6 +28,7 @@ import org.apache.hadoop.util.LogFormatter; import org.apache.hadoop.mapred.*; +import org.apache.nutch.net.URLFilters; import org.apache.nutch.parse.*; import org.apache.nutch.util.NutchConfiguration; import org.apache.nutch.util.NutchJob; @@ -44,15 +45,27 @@ private int maxInlinks; private boolean ignoreInternalLinks; - public static class LinkDbMerger extends MapReduceBase implements Reducer { + public static class Merger extends MapReduceBase implements Reducer { private int _maxInlinks; +private URLFilters filters = null; public void configure(JobConf job) { super.configure(job); _maxInlinks = job.getInt(db.max.inlinks, 1); + if (job.getBoolean(linkdb.merger.urlfilters, false)) { +filters = new URLFilters(job); + } } public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter) throws IOException { + if (filters != null) { +try { + if (filters.filter(((UTF8)key).toString()) == null) +return; +} catch (Exception e) { + LOG.fine(Can't filter + key + : + e); +} + }