Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Nutch Wiki" for change 
notification.

The "JavaDemoApplication" page has been changed by Cristian Vulpe.
http://wiki.apache.org/nutch/JavaDemoApplication?action=diff&rev1=10&rev2=11

--------------------------------------------------

  With that, all is ready and we can now write some simple code to search. A 
quick example in Java to search the crawl index and return the number of hits 
found is:
  
  {{{
+ package com.siemens.scr.sgcm.service;
- import org.apache.nutch.searcher.*;
- import org.apache.hadoop.conf.*;
- import org.apache.nutch.util.*;
- String nutchSearchString = "query_string";
- int maxHits = 1000;
- Configuration nutchConf = NutchConfiguration.create();
- NutchBean nutchBean = new NutchBean(nutchConf);
- Query nutchQuery = Query.parse(nutchSearchString, nutchConf);
- Hits nutchHits = nutchBean.search(nutchQuery, maxHits);
- out.println("Found " + nutchHits.getLength() + " hits\n");
- }}}
- Obviously this is not the most useful application, but it provides the basics 
for querying the Nutch index. Once a Hits object is returned, we can inspect 
each Hit object within that structure and glean more information from it:
  
- {{{
+ import java.util.Date;
+ 
+ // necessary imports
+ import org.apache.hadoop.conf.Configuration;
+ import org.apache.nutch.searcher.Hit;
+ import org.apache.nutch.searcher.HitDetails;
+ import org.apache.nutch.searcher.Hits;
+ import org.apache.nutch.searcher.NutchBean;
+ import org.apache.nutch.searcher.Query;
+ import org.apache.nutch.util.NutchConfiguration;
+ 
+ public class Search {
+       public static void main(String[] args) {
+               try {
+                       // define a keyword for the search
+                       String nutchSearchString = "smart";
+ 
+                       // configure nutch
+                       Configuration nutchConf = NutchConfiguration.create();
+                       NutchBean nutchBean = new NutchBean(nutchConf);
+                       // build the query
+                       Query nutchQuery = Query.parse(nutchSearchString, 
nutchConf);
+                       // optionally specify the maximum number of hits 
(default is 10)
+                       // nutchQuery.getParams().setNumHits(100);
+                       // nutchQuery.getParams().setMaxHitsPerDup(100);
+                       Hits nutchHits = nutchBean.search(nutchQuery);
+ 
+                       // display the number of hits
+                       System.out.println("Found " + nutchHits.getLength() + " 
hits.\n");
+ 
+                       // get the details about each hit (includes title, URL, 
a summary
+                       // and the date when this was fetched)
- for (int i = 0; i < nutchHits.getLength(); i++) {
+                       for (int i = 0; i < nutchHits.getLength(); i++) {
-   Hit hit = nutchHits.getHit(i);
+                               Hit hit = nutchHits.getHit(i);
-   HitDetails details = nutchBean.getDetails(hit);
+                               HitDetails details = nutchBean.getDetails(hit);
-   String title = details.getValue("title");
+                               String title = details.getValue("title");
-   String url = details.getValue("url");
+                               String url = details.getValue("url");
-   String summary = bean.getSummary(details, query).toString();
+                               String summary = nutchBean.getSummary(details, 
nutchQuery)
+                                               .toString();
-   System.out.println("Title is: " + title);
+                               System.out.println("Title is: " + title);
-   System.out.println("(" + url + ")");
+                               System.out.println("(" + url + ")");
-   java.util.Date date = new java.util.Date(nutchBean.getFetchDate(details));
+                               Date date = new 
Date(nutchBean.getFetchDate(details));
-   System.out.println("Date Fetched: " + date);
+                               System.out.println("Date Fetched: " + date);
-   System.out.println(summary + "\n");
+                               System.out.println(summary + "\n");
-   System.out.println("----------------------------------------");
+                               
System.out.println("----------------------------------------");
+                       }
+ 
+                       // as usually, don't forget to close the resources
+                       nutchBean.close();
+               } catch (Throwable e) {
+                       e.printStackTrace();
+               }
+       }
  }
  }}}
  Chaz Hickman (Jan 2008)

Reply via email to