Dear Wiki user, You have subscribed to a wiki page or wiki category on "Nutch Wiki" for change notification.
The "JavaDemoApplication" page has been changed by Cristian Vulpe. http://wiki.apache.org/nutch/JavaDemoApplication?action=diff&rev1=10&rev2=11 -------------------------------------------------- With that, all is ready and we can now write some simple code to search. A quick example in Java to search the crawl index and return the number of hits found is: {{{ + package com.siemens.scr.sgcm.service; - import org.apache.nutch.searcher.*; - import org.apache.hadoop.conf.*; - import org.apache.nutch.util.*; - String nutchSearchString = "query_string"; - int maxHits = 1000; - Configuration nutchConf = NutchConfiguration.create(); - NutchBean nutchBean = new NutchBean(nutchConf); - Query nutchQuery = Query.parse(nutchSearchString, nutchConf); - Hits nutchHits = nutchBean.search(nutchQuery, maxHits); - out.println("Found " + nutchHits.getLength() + " hits\n"); - }}} - Obviously this is not the most useful application, but it provides the basics for querying the Nutch index. Once a Hits object is returned, we can inspect each Hit object within that structure and glean more information from it: - {{{ + import java.util.Date; + + // necessary imports + import org.apache.hadoop.conf.Configuration; + import org.apache.nutch.searcher.Hit; + import org.apache.nutch.searcher.HitDetails; + import org.apache.nutch.searcher.Hits; + import org.apache.nutch.searcher.NutchBean; + import org.apache.nutch.searcher.Query; + import org.apache.nutch.util.NutchConfiguration; + + public class Search { + public static void main(String[] args) { + try { + // define a keyword for the search + String nutchSearchString = "smart"; + + // configure nutch + Configuration nutchConf = NutchConfiguration.create(); + NutchBean nutchBean = new NutchBean(nutchConf); + // build the query + Query nutchQuery = Query.parse(nutchSearchString, nutchConf); + // optionally specify the maximum number of hits (default is 10) + // nutchQuery.getParams().setNumHits(100); + // nutchQuery.getParams().setMaxHitsPerDup(100); + Hits nutchHits = nutchBean.search(nutchQuery); + + // display the number of hits + System.out.println("Found " + nutchHits.getLength() + " hits.\n"); + + // get the details about each hit (includes title, URL, a summary + // and the date when this was fetched) - for (int i = 0; i < nutchHits.getLength(); i++) { + for (int i = 0; i < nutchHits.getLength(); i++) { - Hit hit = nutchHits.getHit(i); + Hit hit = nutchHits.getHit(i); - HitDetails details = nutchBean.getDetails(hit); + HitDetails details = nutchBean.getDetails(hit); - String title = details.getValue("title"); + String title = details.getValue("title"); - String url = details.getValue("url"); + String url = details.getValue("url"); - String summary = bean.getSummary(details, query).toString(); + String summary = nutchBean.getSummary(details, nutchQuery) + .toString(); - System.out.println("Title is: " + title); + System.out.println("Title is: " + title); - System.out.println("(" + url + ")"); + System.out.println("(" + url + ")"); - java.util.Date date = new java.util.Date(nutchBean.getFetchDate(details)); + Date date = new Date(nutchBean.getFetchDate(details)); - System.out.println("Date Fetched: " + date); + System.out.println("Date Fetched: " + date); - System.out.println(summary + "\n"); + System.out.println(summary + "\n"); - System.out.println("----------------------------------------"); + System.out.println("----------------------------------------"); + } + + // as usually, don't forget to close the resources + nutchBean.close(); + } catch (Throwable e) { + e.printStackTrace(); + } + } } }}} Chaz Hickman (Jan 2008)

