Dear Wiki user, You have subscribed to a wiki page or wiki category on "Nutch Wiki" for change notification.
The "JavaDemoApplication" page has been changed by Cristian Vulpe. http://wiki.apache.org/nutch/JavaDemoApplication?action=diff&rev1=15&rev2=16 -------------------------------------------------- <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> - <property> + <property> - <name>plugin.folders</name> + <name>plugin.folders</name> - <value>${nutch.site.plugin.folders} + <value>${nutch.site.plugin.folders} - </value> - <description /> - </property> + </value> + <description /> + </property> - <property> - <name>searcher.dir</name> + <property> + <name>searcher.dir</name> - <value>${nutch.site.searcher.dir}</value> + <value>${nutch.site.searcher.dir}</value> - <description /> - </property> + <description /> + </property> </configuration> }}} + and run the java application using the appropriate parameters: - and run the java application using the appropriate parameters: {{{ -Dnutch.site.plugin.folders="c:\tools\crawlers\apache-nutch-1.1-bin\plugins" -Dnutch.site.searcher.dir="c:\tools\crawlers\apache-nutch-1.1-bin\crawl" }}} - === CLASSPATH Configuration === - You also need to make sure that the following jars are placed in WEB-INF/lib (this assumes usage of Nutch 0.9): {{{ @@ -70, +68 @@ lucene-misc-2.2.0.jar nutch-0.9.jar }}} - For a standalone application, one might want to use Apache maven (this configuration assumes Nutch 1.1). At the moment of writing this note, Nutch does not publish its artifacts to maven. However we (members of community) hope that maven support will be added soon. In the meantime, just install the nutch-1.1.jar to your maven repository. Here is a snippet that will manage the dependencies that you need to run this example (note that the 1.1-XXX version of Nutch marks the fact that the artifact cannot be found in any public repository yet): {{{ <dependency> - <groupId>org.apache.nutch</groupId> + <groupId>org.apache.nutch</groupId> - <artifactId>nutch</artifactId> + <artifactId>nutch</artifactId> - <version>1.1-XXX</version> + <version>1.1-XXX</version> </dependency> <dependency> - <groupId>org.apache.hadoop</groupId> + <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> + <artifactId>hadoop-core</artifactId> - <version>0.20.2</version> + <version>0.20.2</version> </dependency> <dependency> - <groupId>org.apache.lucene</groupId> + <groupId>org.apache.lucene</groupId> - <artifactId>lucene-core</artifactId> + <artifactId>lucene-core</artifactId> - <version>3.0.1</version> + <version>3.0.1</version> - <scope>runtime</scope> + <scope>runtime</scope> </dependency> <dependency> - <groupId>org.apache.lucene</groupId> + <groupId>org.apache.lucene</groupId> - <artifactId>lucene-misc</artifactId> + <artifactId>lucene-misc</artifactId> - <version>3.0.1</version> + <version>3.0.1</version> - <scope>runtime</scope> + <scope>runtime</scope> </dependency> <dependency> - <groupId>commons-lang</groupId> + <groupId>commons-lang</groupId> - <artifactId>commons-lang</artifactId> + <artifactId>commons-lang</artifactId> - <version>2.1</version> + <version>2.1</version> - <scope>runtime</scope> + <scope>runtime</scope> </dependency> }}} - == Sample code == With that, all is ready and we can now write some simple code to search. A quick example in Java to search the crawl index and return the number of hits found is: {{{ - // necessary imports import org.apache.hadoop.conf.Configuration; import org.apache.nutch.searcher.Hit; @@ -124, +119 @@ import java.util.Date; public class Search { - public static void main(String[] args) { + public static void main(String[] args) { - try { - // define a keyword for the search - String nutchSearchString = "smart"; + try { + // define a keyword for the search + String nutchSearchString = "smart"; - // configure nutch + // configure nutch - Configuration nutchConf = NutchConfiguration.create(); + Configuration nutchConf = NutchConfiguration.create(); - NutchBean nutchBean = new NutchBean(nutchConf); + NutchBean nutchBean = new NutchBean(nutchConf); - // build the query + // build the query - Query nutchQuery = Query.parse(nutchSearchString, nutchConf); + Query nutchQuery = Query.parse(nutchSearchString, nutchConf); - // optionally specify the maximum number of hits (default is 10) + // optionally specify the maximum number of hits (default is 10) - // nutchQuery.getParams().setNumHits(100); + // nutchQuery.getParams().setNumHits(100); - // nutchQuery.getParams().setMaxHitsPerDup(100); + // nutchQuery.getParams().setMaxHitsPerDup(100); - Hits nutchHits = nutchBean.search(nutchQuery); + Hits nutchHits = nutchBean.search(nutchQuery); - // display the number of hits + // display the number of hits - System.out.println("Found " + nutchHits.getLength() + " hits.\n"); + System.out.println("Found " + nutchHits.getLength() + " hits.\n"); - // get the details about each hit (includes title, URL, a summary + // get the details about each hit (includes title, URL, a summary - // and the date when this was fetched) + // and the date when this was fetched) - for (int i = 0; i < nutchHits.getLength(); i++) { + for (int i = 0; i < nutchHits.getLength(); i++) { - Hit hit = nutchHits.getHit(i); - HitDetails details = nutchBean.getDetails(hit); - String title = details.getValue("title"); - String url = details.getValue("url"); + Hit hit = nutchHits.getHit(i); + HitDetails details = nutchBean.getDetails(hit); + String title = details.getValue("title"); + String url = details.getValue("url"); - String summary = nutchBean.getSummary(details, nutchQuery) + String summary = nutchBean.getSummary(details, nutchQuery) - .toString(); - System.out.println("Title is: " + title); - System.out.println("(" + url + ")"); + .toString(); + System.out.println("Title is: " + title); + System.out.println("(" + url + ")"); - Date date = new Date(nutchBean.getFetchDate(details)); + Date date = new Date(nutchBean.getFetchDate(details)); - System.out.println("Date Fetched: " + date); - System.out.println(summary + "\n"); + System.out.println("Date Fetched: " + date); + System.out.println(summary + "\n"); - System.out.println("----------------------------------------"); + System.out.println("----------------------------------------"); - } + } - // as usually, don't forget to close the resources + // as usually, don't forget to close the resources - nutchBean.close(); - } catch (Throwable e) { - e.printStackTrace(); - } - } + nutchBean.close(); + } catch (Throwable e) { + e.printStackTrace(); + } + } } }}} + Extra information about developing a standalone application that does the search can be obtained by inspecting the main method in org.apache.nutch.searcher.NutchBean. + + === Authors === + Chaz Hickman (Jan 2008) - Cristi Vulpe (Aug 2010)

