Update of /cvsroot/nutch/nutch/src/java/net/nutch/searcher
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6372/src/java/net/nutch/searcher

Modified Files:
        DistributedSearch.java FetchedSegments.java HitContent.java 
        NutchBean.java 
Log Message:
Added codes to provide "View as Plain Text" feature.


Index: FetchedSegments.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/searcher/FetchedSegments.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -d -r1.8 -r1.9
*** FetchedSegments.java        20 Aug 2004 20:36:14 -0000      1.8
--- FetchedSegments.java        7 Oct 2004 04:24:53 -0000       1.9
***************
*** 51,60 ****
      }
  
-     public String getText(int docNo) throws IOException {
-       ParseText entry = new ParseText();
-       text.get(docNo, entry);
-       return entry.getText();
-     }
- 
      public ParseData getParseData(int docNo) throws IOException {
          ParseData entry = new ParseData();
--- 51,54 ----
***************
*** 63,66 ****
--- 57,66 ----
        }
  
+     public ParseText getParseText(int docNo) throws IOException {
+         ParseText entry = new ParseText();
+         text.get(docNo, entry);
+         return entry;
+       }
+ 
    }
  
***************
*** 99,106 ****
    }
  
    public String getSummary(HitDetails details, Query query)
      throws IOException {
  
!     String text = getSegment(details).getText(getDocNo(details));
  
      return new Summarizer().getSummary(text, query).toString();
--- 99,110 ----
    }
  
+   public ParseText getParseText(HitDetails details) throws IOException {
+     return getSegment(details).getParseText(getDocNo(details));
+   }
+ 
    public String getSummary(HitDetails details, Query query)
      throws IOException {
  
!     String text = getSegment(details).getParseText(getDocNo(details)).getText();
  
      return new Summarizer().getSummary(text, query).toString();

Index: NutchBean.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/searcher/NutchBean.java,v
retrieving revision 1.15
retrieving revision 1.16
diff -C2 -d -r1.15 -r1.16
*** NutchBean.java      27 Aug 2004 20:26:46 -0000      1.15
--- NutchBean.java      7 Oct 2004 04:24:53 -0000       1.16
***************
*** 244,247 ****
--- 244,251 ----
    }
  
+   public ParseText getParseText(HitDetails hit) throws IOException {
+     return content.getParseText(hit);
+   }
+ 
    public String[] getAnchors(HitDetails hit) throws IOException {
      return content.getAnchors(hit);

Index: HitContent.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/searcher/HitContent.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** HitContent.java     8 Jul 2004 20:02:30 -0000       1.2
--- HitContent.java     7 Oct 2004 04:24:53 -0000       1.3
***************
*** 7,10 ****
--- 7,11 ----
  
  import net.nutch.parse.ParseData;
+ import net.nutch.parse.ParseText;
  
  /** Service that returns the content of a hit. */
***************
*** 16,19 ****
--- 17,23 ----
    ParseData getParseData(HitDetails details) throws IOException;
  
+   /** Returns the ParseText of a hit document. */
+   ParseText getParseText(HitDetails details) throws IOException;
+ 
    /** Returns the anchors of a hit document. */
    String[] getAnchors(HitDetails details) throws IOException;

Index: DistributedSearch.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/searcher/DistributedSearch.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -d -r1.8 -r1.9
*** DistributedSearch.java      3 Aug 2004 20:30:34 -0000       1.8
--- DistributedSearch.java      7 Oct 2004 04:24:53 -0000       1.9
***************
*** 10,13 ****
--- 10,14 ----
  
  import net.nutch.parse.ParseData;
+ import net.nutch.parse.ParseText;
  import net.nutch.util.LogFormatter;
  import net.nutch.io.*;
***************
*** 31,37 ****
    private static final byte OP_ANCHORS = (byte)6;
    private static final byte OP_PARSEDATA = (byte)7;
  
    /** Names of the op codes. */
!   private static final String[] OP_NAMES = new String[8];
    static {
      OP_NAMES[OP_SEGMENTS] = "getSegmentNames";
--- 32,39 ----
    private static final byte OP_ANCHORS = (byte)6;
    private static final byte OP_PARSEDATA = (byte)7;
+   private static final byte OP_PARSETEXT = (byte)8;
  
    /** Names of the op codes. */
!   private static final String[] OP_NAMES = new String[9];
    static {
      OP_NAMES[OP_SEGMENTS] = "getSegmentNames";
***************
*** 43,46 ****
--- 45,49 ----
      OP_NAMES[OP_ANCHORS] = "getAnchors";
      OP_NAMES[OP_PARSEDATA] = "getParseData";
+     OP_NAMES[OP_PARSETEXT] = "getParseText";
    }
  
***************
*** 100,103 ****
--- 103,110 ----
          second = NullWritable.get();
          break;
+       case OP_PARSETEXT:
+         first = new HitDetails();
+         second = NullWritable.get();
+         break;
        default:
          throw new RuntimeException("Unknown op code: " + op);
***************
*** 155,158 ****
--- 162,168 ----
          value = new ParseData();
          break;
+       case OP_PARSETEXT:
+         value = new ParseText();
+         break;
        default:
          throw new RuntimeException("Unknown op code: " + op);
***************
*** 203,206 ****
--- 213,219 ----
          value = bean.getParseData((HitDetails)p.first);
          break;
+       case OP_PARSETEXT:
+         value = bean.getParseText((HitDetails)p.first);
+         break;
        default:
          throw new RuntimeException("Unknown op code: " + p.op);
***************
*** 422,425 ****
--- 435,446 ----
        }
        
+     public ParseText getParseText(HitDetails hit) throws IOException {
+         Param param = new Param(OP_PARSETEXT, hit);
+         InetSocketAddress address =
+           (InetSocketAddress)segmentToAddress.get(hit.getValue("segment"));
+         Result result = (Result)call(param, address);
+         return (ParseText)result.value;
+     }
+       
      public String[] getAnchors(HitDetails hit) throws IOException {
        Param param = new Param(OP_ANCHORS, hit);



-------------------------------------------------------
This SF.net email is sponsored by: IT Product Guide on ITManagersJournal
Use IT products in your business? Tell us what you think of them. Give us
Your Opinions, Get Free ThinkGeek Gift Certificates! Click to find out more
http://productguide.itmanagersjournal.com/guidepromo.tmpl
_______________________________________________
Nutch-cvs mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/nutch-cvs

Reply via email to