Update of /cvsroot/nutch/nutch/src/test/net/nutch/tools
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8994

Modified Files:
        TestSegmentMergeTool.java 
Log Message:
Add new test case, which checks for proper behaviour of SegmentMergeTool
when dealing with corrupt segments.

Reformat source to use 2-space indent instead of literal Tabs.


Index: TestSegmentMergeTool.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/test/net/nutch/tools/TestSegmentMergeTool.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** TestSegmentMergeTool.java   20 Aug 2004 20:36:17 -0000      1.2
--- TestSegmentMergeTool.java   6 Oct 2004 23:05:56 -0000       1.3
***************
*** 23,196 ****
  public class TestSegmentMergeTool extends TestCase {
  
!       private static final int SEGMENT_CNT = 5;
  
!       private static final int PAGE_CNT = 100;
  
!       private File testDir = null;
  
!       public TestSegmentMergeTool(String name) {
!               super(name);
!       }
  
!       /**
!        * Create test directory.
!        * 
!        * @see junit.framework.TestCase#setUp()
!        */
!       protected void setUp() throws Exception {
!               super.setUp();
!               testDir = File.createTempFile(".smttest", "");
!               testDir.delete();
!               testDir.mkdirs();
!       }
  
!       /**
!        * Create test segment data. NOTE: if segment format changes you need to
!        * modify the way segment data is created here (e.g. add more writers). 
!        * 
!        * @param dir
!        *            segment directory
!        * @param unique
!        *            if true, use unique data per segment, otherwise use the same data
!        * @throws Exception
!        */
!       protected void createSegmentData(NutchFileSystem nfs, File dir, boolean 
unique) throws Exception {
!               // Each segment consists of:
!               // - FetcherOutput
!               // - Content
!               // - ParseData
!               // - ParseText
!               // Create writers for these data files
!               ArrayFile.Writer fetcherWriter = new ArrayFile.Writer(nfs, new 
File(dir,
!                               FetcherOutput.DIR_NAME).toString(), 
FetcherOutput.class);
!               ArrayFile.Writer contentWriter = new ArrayFile.Writer(nfs, new 
File(dir,
!                               Content.DIR_NAME).toString(), Content.class);
!               ArrayFile.Writer parseDataWriter = new ArrayFile.Writer(nfs, new 
File(dir,
!                               ParseData.DIR_NAME).toString(), ParseData.class);
!               ArrayFile.Writer parseTextWriter = new ArrayFile.Writer(nfs, new 
File(dir,
!                               ParseText.DIR_NAME).toString(), ParseText.class);
!               for (int i = 0; i < PAGE_CNT; i++) {
!                       String url = "http://www.example.com/page-"; + i;
!                       String rnd = "";
!                       if (unique) {
!                               rnd = "/" + System.currentTimeMillis();
!                               url += rnd;
!                       }
!                       url += "/example.html";
!                       FetchListEntry fle = new FetchListEntry(true, new Page(url, 
1.0f,
!                                       1.0f), new String[] { "test" + rnd });
!                       FetcherOutput fo = new FetcherOutput(fle, MD5Hash.digest(url),
!                                       FetcherOutput.SUCCESS);
!                       fetcherWriter.append(fo);
!                       String content = "<html><body><h1>Hello from Page " + i + 
"</h1>";
!                       if (unique) {
!                               content += "<p>Created at epoch time: "
!                                               + System.currentTimeMillis() + "</p>";
!                       }
!                       content += "</body></html>";
!                       Properties meta = new Properties();
!                       meta.setProperty("Content-Type", "text/html");
!                       meta.setProperty("Host", "http://localhost";);
!                       meta.setProperty("Connection", "Keep-alive, close");
!                       Content c = new Content(url, "http://www.example.com";, content
!                                       .getBytes("UTF-8"), "text/html", meta);
!                       contentWriter.append(c);
!                       ParseData pd = new ParseData("Hello from Page " + i,
!                                       new Outlink[0], meta);
!                       parseDataWriter.append(pd);
!                       String text = "Hello from Page" + i;
!                       if (unique) {
!                               text += "\nCreated at epoch time: "
!                                               + System.currentTimeMillis();
!                       }
!                       ParseText pt = new ParseText(text);
!                       parseTextWriter.append(pt);
!               }
!               fetcherWriter.close();
!               contentWriter.close();
!               parseDataWriter.close();
!               parseTextWriter.close();
!       }
  
!       /**
!        * Remove test directory.
!        * 
!        * @see junit.framework.TestCase#tearDown()
!        */
!       protected void tearDown() throws Exception {
!             NutchFileSystem nfs = new LocalFileSystem();
!             try {
!               //System.out.println(".tearDown() entered.");
!               super.tearDown();
!               try {
!                       FileUtil.fullyDelete(nfs, testDir);
!               } catch (Exception e) {
!                       System.out.println("NON-FATAL: " + e.getMessage());
!               }
!               //System.out.println(".tearDown() finished.");
!             } finally {
!                 nfs.close();
!             }
!       }
  
!       /**
!        * Test merging segments with unique data. The output (merged segment) should
!        * contain the number of pages equal exactly to a product of segment count 
times
!        * page count per segment.
!        *
!        */
!       public void testUniqueMerge() throws IOException {
!             NutchFileSystem nfs = new LocalFileSystem();
!             try {
!               File dataDir = new File(testDir, "segments");
!               File outSegment = new File(testDir, "output");
!               try {
!                       for (int i = 0; i < SEGMENT_CNT; i++) {
!                               File f = new File(dataDir, "seg" + i);
!                               nfs.mkdirs(f);
!                               createSegmentData(nfs, f, true);
!                       }
!                       SegmentMergeTool.main(new String[] { dataDir.toString(), "-o",
!                                       outSegment.toString(), "-cm" });
!                       SegmentMergeTool.SegmentReader sr = new 
SegmentMergeTool.SegmentReader(
!                                       outSegment.listFiles()[0]);
!                       assertEquals(sr.size, SEGMENT_CNT * PAGE_CNT);
!                       sr.close();
!               } catch (Exception e) {
!                       fail(e.getMessage());
!               }
!             } finally {
!                 nfs.close();
!             }
!       }
  
!       /**
!        * Test merging segments with the same data. The output (merged segment) should
!        * contain the number of pages equal exactly to the page count of a single 
segment.
!        *
!        */
!       public void testSameMerge() throws IOException {
!             NutchFileSystem nfs = new LocalFileSystem();
!             try {
!               File dataDir = new File(testDir, "segments");
!               File outSegment = new File(testDir, "output");
!               try {
!                       for (int i = 0; i < SEGMENT_CNT; i++) {
!                               File f = new File(dataDir, "seg" + i);
!                               nfs.mkdirs(f);
!                               createSegmentData(nfs, f, false);
!                       }
!                       SegmentMergeTool.main(new String[] { dataDir.toString(), "-o",
!                                       outSegment.toString(), "-cm" });
!                       SegmentMergeTool.SegmentReader sr = new 
SegmentMergeTool.SegmentReader(
!                                       outSegment.listFiles()[0]);
!                       assertEquals(sr.size, PAGE_CNT);
!                       sr.close();
!               } catch (Exception e) {
!                       fail(e.getMessage());
!               }
!             } finally {
!                 nfs.close();
!             }
!       }
! }
--- 23,259 ----
  public class TestSegmentMergeTool extends TestCase {
  
!   private static final int SEGMENT_CNT = 5;
  
!   private static final int PAGE_CNT = 200;
  
!   private File testDir = null;
  
!   public TestSegmentMergeTool(String name) {
!     super(name);
!   }
  
!   /**
!    * Create test directory.
!    * 
!    * @see junit.framework.TestCase#setUp()
!    */
!   protected void setUp() throws Exception {
!     super.setUp();
!     testDir = File.createTempFile(".smttest", "");
!     testDir.delete();
!     testDir.mkdirs();
!   }
  
!   /**
!    * Create test segment data. NOTE: if segment format changes you need to
!    * modify the way segment data is created here (e.g. add more writers).
!    * 
!    * @param dir segment directory
!    * @param unique if true, use unique data per segment, otherwise use the same
!    *        data
!    * @throws Exception
!    */
!   protected void createSegmentData(NutchFileSystem nfs, File dir, boolean unique) 
throws Exception {
!     // Each segment consists of:
!     // - FetcherOutput
!     // - Content
!     // - ParseData
!     // - ParseText
!     // Create writers for these data files
!     ArrayFile.Writer fetcherWriter = new ArrayFile.Writer(nfs, new File(dir, 
FetcherOutput.DIR_NAME).toString(),
!             FetcherOutput.class);
!     ArrayFile.Writer contentWriter = new ArrayFile.Writer(nfs, new File(dir, 
Content.DIR_NAME).toString(),
!             Content.class);
!     ArrayFile.Writer parseDataWriter = new ArrayFile.Writer(nfs, new File(dir, 
ParseData.DIR_NAME).toString(),
!             ParseData.class);
!     ArrayFile.Writer parseTextWriter = new ArrayFile.Writer(nfs, new File(dir, 
ParseText.DIR_NAME).toString(),
!             ParseText.class);
!     for (int i = 0; i < PAGE_CNT; i++) {
!       String url = "http://www.example.com/page-"; + i;
!       String rnd = "";
!       if (unique) {
!         rnd = "/" + System.currentTimeMillis();
!         url += rnd;
!       }
!       url += "/example.html";
!       FetchListEntry fle = new FetchListEntry(true, new Page(url, 1.0f, 1.0f), new 
String[] { "test" + rnd });
!       FetcherOutput fo = new FetcherOutput(fle, MD5Hash.digest(url), 
FetcherOutput.SUCCESS);
!       fetcherWriter.append(fo);
!       String content = "<html><body><h1>Hello from Page " + i + "</h1>";
!       if (unique) {
!         content += "<p>Created at epoch time: " + System.currentTimeMillis() + 
"</p>";
!       }
!       content += "</body></html>";
!       Properties meta = new Properties();
!       meta.setProperty("Content-Type", "text/html");
!       meta.setProperty("Host", "http://localhost";);
!       meta.setProperty("Connection", "Keep-alive, close");
!       Content c = new Content(url, "http://www.example.com";, 
content.getBytes("UTF-8"), "text/html", meta);
!       contentWriter.append(c);
!       ParseData pd = new ParseData("Hello from Page " + i, new Outlink[0], meta);
!       parseDataWriter.append(pd);
!       String text = "Hello from Page" + i;
!       if (unique) {
!         text += "\nCreated at epoch time: " + System.currentTimeMillis();
!       }
!       ParseText pt = new ParseText(text);
!       parseTextWriter.append(pt);
!     }
!     fetcherWriter.close();
!     contentWriter.close();
!     parseDataWriter.close();
!     parseTextWriter.close();
!   }
  
!   /**
!    * Remove test directory.
!    * 
!    * @see junit.framework.TestCase#tearDown()
!    */
!   protected void tearDown() throws Exception {
!     NutchFileSystem nfs = new LocalFileSystem();
!     try {
!       //System.out.println(".tearDown() entered.");
!       super.tearDown();
!       try {
!         FileUtil.fullyDelete(nfs, testDir);
!       } catch (Exception e) {
!         System.out.println("NON-FATAL: " + e.getMessage());
!       }
!       //System.out.println(".tearDown() finished.");
!     } finally {
!       nfs.close();
!     }
!   }
  
!   /**
!    * Test merging segments with unique data. The output (merged segment) should
!    * contain the number of pages equal exactly to a product of segment count
!    * times page count per segment.
!    *  
!    */
!   public void testUniqueMerge() throws IOException {
!     NutchFileSystem nfs = new LocalFileSystem();
!     try {
!       File dataDir = new File(testDir, "segments");
!       File outSegment = new File(testDir, "output");
!       try {
!         for (int i = 0; i < SEGMENT_CNT; i++) {
!           File f = new File(dataDir, "seg" + i);
!           nfs.mkdirs(f);
!           createSegmentData(nfs, f, true);
!         }
!         SegmentMergeTool
!                 .main(new String[] { dataDir.toString(), "-o", 
outSegment.toString(), "-cm", "-i", "-ds", "-dm" });
!         SegmentMergeTool.SegmentReader sr = new 
SegmentMergeTool.SegmentReader(outSegment.listFiles()[0]);
!         assertEquals(sr.size, SEGMENT_CNT * PAGE_CNT);
!         sr.close();
!       } catch (Exception e) {
!         e.printStackTrace();
!         fail(e.getMessage());
!       }
!     } finally {
!       nfs.close();
!     }
!   }
  
!   /**
!    * Test merging segments with the same data. The output (merged segment)
!    * should contain the number of pages equal exactly to the page count of a
!    * single segment.
!    *  
!    */
!   public void testSameMerge() throws IOException {
!     NutchFileSystem nfs = new LocalFileSystem();
!     try {
!       File dataDir = new File(testDir, "segments");
!       File outSegment = new File(testDir, "output");
!       try {
!         for (int i = 0; i < SEGMENT_CNT; i++) {
!           File f = new File(dataDir, "seg" + i);
!           nfs.mkdirs(f);
!           createSegmentData(nfs, f, false);
!         }
!         SegmentMergeTool
!                 .main(new String[] { dataDir.toString(), "-o", 
outSegment.toString(), "-cm", "-i", "-ds", "-dm" });
!         SegmentMergeTool.SegmentReader sr = new 
SegmentMergeTool.SegmentReader(outSegment.listFiles()[0]);
!         assertEquals(sr.size, PAGE_CNT);
!         sr.close();
!       } catch (Exception e) {
!         e.printStackTrace();
!         fail(e.getMessage());
!       }
!     } catch (Throwable ex) {
!       ex.printStackTrace();
!       fail(ex.getMessage());
!     } finally {
!       nfs.close();
!     }
!   }
! 
!   public void testCorruptSegmentMerge() throws IOException {
!     NutchFileSystem nfs = new LocalFileSystem();
!     try {
!       File dataDir = new File(testDir, "segments");
!       File outSegment = new File(testDir, "output");
!       try {
!         for (int i = 0; i < SEGMENT_CNT; i++) {
!           File f = new File(dataDir, "seg" + i);
!           nfs.mkdirs(f);
!           createSegmentData(nfs, f, true);
!           switch (i) {
!             case 0:
!               // truncate the
!               // fetcherOutput data
!               // file
!               File data = new File(f, FetcherOutput.DIR_NAME);
!               data = new File(data, "data");
!               RandomAccessFile raf = new RandomAccessFile(data, "rws");
!               raf.setLength(raf.length() - raf.length() / 4);
!               raf.close();
!               break;
!             case 1:
!               // truncate the Content
!               // data file
!               data = new File(f, Content.DIR_NAME);
!               data = new File(data, "data");
!               raf = new RandomAccessFile(data, "rws");
!               raf.setLength(raf.length() - raf.length() / 4);
!               raf.close();
!               break;
!             case 2:
!               // trash the whole
!               // content
!               data = new File(f, Content.DIR_NAME);
!               new File(data, "data").delete();
!               new File(data, "index").delete();
!               break;
!             default:
!               // do nothing
!               ;
!           }
!         }
!         SegmentMergeTool
!                 .main(new String[] { dataDir.toString(), "-o", 
outSegment.toString(), "-cm", "-i", "-ds", "-dm" });
!         SegmentMergeTool.SegmentReader sr = new 
SegmentMergeTool.SegmentReader(outSegment.listFiles()[0]);
!         // we arrive at this expression as follows:
!         // 1. SEGMENT_CNT - 1 : because we trash one whole segment
!         // 2. 2 * PAGE_CNT / 4: because for two segments
!         // we truncate 1/4 of the data file
!         // 3. + 2: because sometimes truncation falls on
!         // the boundary of the last entry
!         int maxCnt = PAGE_CNT * (SEGMENT_CNT - 1) - 2 * PAGE_CNT / 4 + 2;
!         assertTrue(sr.size < maxCnt);
!         sr.close();
!       } catch (Exception e) {
!         e.printStackTrace();
!         fail(e.getMessage());
!       }
!     } catch (Throwable ex) {
!       ex.printStackTrace();
!       fail(ex.getMessage());
!     } finally {
!       nfs.close();
!     }
!   }
! }
\ No newline at end of file



-------------------------------------------------------
This SF.net email is sponsored by: IT Product Guide on ITManagersJournal
Use IT products in your business? Tell us what you think of them. Give us
Your Opinions, Get Free ThinkGeek Gift Certificates! Click to find out more
http://productguide.itmanagersjournal.com/guidepromo.tmpl
_______________________________________________
Nutch-cvs mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/nutch-cvs

Reply via email to