Repository: nutch
Updated Branches:
  refs/heads/master 71f0471b8 -> dce7a28c7


Fix compilation errors for CommonCrawlDataDumper


Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/dce7a28c
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/dce7a28c
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/dce7a28c

Branch: refs/heads/master
Commit: dce7a28c70297208c0d86a572f4c232120e72018
Parents: 71f0471
Author: Lewis John McGibbney <[email protected]>
Authored: Mon May 9 13:55:53 2016 -0700
Committer: Lewis John McGibbney <[email protected]>
Committed: Mon May 9 13:55:53 2016 -0700

----------------------------------------------------------------------
 .../nutch/tools/CommonCrawlDataDumper.java      |  13 +-
 .../nutch/tools/TestCommonCrawlDataDumper.java  | 150 +++++++++----------
 2 files changed, 87 insertions(+), 76 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nutch/blob/dce7a28c/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java 
b/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
index 5abd393..b4fc0a7 100644
--- a/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
+++ b/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
@@ -203,7 +203,6 @@ public class CommonCrawlDataDumper extends Configured 
implements Tool {
    *             the gzip option may be provided.
    * @throws Exception
    */
-  @SuppressWarnings("static-access")
   public static void main(String[] args) throws Exception {
     Configuration conf = NutchConfiguration.create();
     int res = ToolRunner.run(conf, new CommonCrawlDataDumper(), args);
@@ -573,6 +572,7 @@ public class CommonCrawlDataDumper extends Configured 
implements Tool {
   public int run(String[] args) throws Exception {
     Option helpOpt = new Option("h", "help", false, "show this help message.");
     // argument options
+    @SuppressWarnings("static-access")
     Option outputOpt = OptionBuilder.withArgName("outputDir").hasArg()
         .withDescription(
             "output directory (which will be created) to host the CBOR data.")
@@ -580,41 +580,52 @@ public class CommonCrawlDataDumper extends Configured 
implements Tool {
     // WARC format
     Option warcOpt = new Option("warc", "export to a WARC file");
 
+    @SuppressWarnings("static-access")
     Option segOpt = OptionBuilder.withArgName("segment").hasArgs()
         .withDescription("the segment or directory containing segments to 
use").create("segment");
     // create mimetype and gzip options
+    @SuppressWarnings("static-access")
     Option mimeOpt = OptionBuilder.isRequired(false).withArgName("mimetype")
         .hasArgs().withDescription(
             "an optional list of mimetypes to dump, excluding all others. 
Defaults to all.")
         .create("mimetype");
+    @SuppressWarnings("static-access")
     Option gzipOpt = OptionBuilder.withArgName("gzip").hasArg(false)
         .withDescription(
             "an optional flag indicating whether to additionally gzip the 
data.")
         .create("gzip");
+    @SuppressWarnings("static-access")
     Option keyPrefixOpt = OptionBuilder.withArgName("keyPrefix").hasArg(true)
         .withDescription("an optional prefix for key in the output format.")
         .create("keyPrefix");
+    @SuppressWarnings("static-access")
     Option simpleDateFormatOpt = OptionBuilder.withArgName("SimpleDateFormat")
         .hasArg(false).withDescription(
             "an optional format for timestamp in GMT epoch milliseconds.")
         .create("SimpleDateFormat");
+    @SuppressWarnings("static-access")
     Option epochFilenameOpt = OptionBuilder.withArgName("epochFilename")
         .hasArg(false)
         .withDescription("an optional format for output filename.")
         .create("epochFilename");
+    @SuppressWarnings("static-access")
     Option jsonArrayOpt = OptionBuilder.withArgName("jsonArray").hasArg(false)
         .withDescription("an optional format for JSON output.")
         .create("jsonArray");
+    @SuppressWarnings("static-access")
     Option reverseKeyOpt = 
OptionBuilder.withArgName("reverseKey").hasArg(false)
         .withDescription("an optional format for key value in JSON output.")
         .create("reverseKey");
+    @SuppressWarnings("static-access")
     Option extensionOpt = OptionBuilder.withArgName("extension").hasArg(true)
         .withDescription("an optional file extension for output documents.")
         .create("extension");
+    @SuppressWarnings("static-access")
     Option sizeOpt = OptionBuilder.withArgName("warcSize").hasArg(true)
         .withType(Number.class)
         .withDescription("an optional file size in bytes for the WARC file(s)")
         .create("warcSize");
+    @SuppressWarnings("static-access")
     Option linkDbOpt = OptionBuilder.withArgName("linkdb").hasArg(true)
         .withDescription("an optional linkdb parameter to include inlinks in 
dump files")
         .isRequired(false)

http://git-wip-us.apache.org/repos/asf/nutch/blob/dce7a28c/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java 
b/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java
index f635d67..1429925 100644
--- a/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java
+++ b/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java
@@ -41,85 +41,85 @@ import org.apache.nutch.tools.CommonCrawlConfig;
  */
 public class TestCommonCrawlDataDumper {
 
-       @Test
-       public void testDump() throws Exception {
-               File sampleSegmentDir = new 
File(System.getProperty("test.build.data",
-                               "."), "test-segments");
-               File tempDir = Files.createTempDirectory("temp").toFile();
+  @Test
+  public void testDump() throws Exception {
+    File sampleSegmentDir = new File(System.getProperty("test.build.data",
+        "."), "test-segments");
+    File tempDir = Files.createTempDirectory("temp").toFile();
 
-               String[] crawledFiles = {
-                               "c463a4381eb837f9f5d45978cfbde79e_.html",
-                               "a974b8d74f7779ab6c6f90b9b279467e_.html",
-                               "6bc6497314656a3129732efd708e9f96_.html",
-                               "6e88c40abe26cad0a726102997aed048_.html",
-                               
"5cafdd88f4e9cf3f0cd4c298c6873358_apachecon-europe.html",
-                               
"932dc10a76e894a2baa8ea4086ad72a8_apachecon-north-america.html",
-                               "8540187d75b9cd405b8fa97d665f9f90_.html",
-                               "e501bc976c8693b4d28a55b79c390a32_.html",
-                               "6add662f9f5758b7d75eec5cfa1f340b_.html",
-                               "d4f20df3c37033dc516067ee1f424e4e_.html",
-                               "d7b8fa9a02cdc95546030d04be4a98f3_solr.html",
-                               "3cbe876e3a8e7a397811de3bb6a945cd_.html",
-                               "5b987dde0da79d7f2e3f22b46437f514_bot.html",
-                               "3d742820d9a701a1f02e10d5bf5ae633_credits.html",
-                               
"693673f3c73d04a26276effdea69b7ee_downloads.html",
-                               "4f7e3469dafabb4c3b87b00531f81aa4_index.html",
-                               "15c5330675be8a69995aab18ff9859e0_javadoc.html",
-                               
"bc624e1b49e29870ef095819bb0e977a_mailing_lists.html",
-                               
"a7d66b68754c3665c66e62225255e3fd_version_control.html",
-                               "32fb7fe362e1a0d8a1b15addf2a00bdc_1.9-rel",
-                               "54ab3db10fe7b26415a04e21045125a8_1zE.html",
-                               "1012a41c08092c40340598bd8ee0bfa6_PGa.html",
-                               "c830cfc5c28bed10e69d5b83e9c1bcdc_nutch_2.3",
-                               "687d915dc264a77f35c61ba841936730_oHY.html",
-                               "2bf1afb650010128b4cf4afe677db3c5_1pav9xl.html",
-                               "550cab79e14110bbee61c36c61c830b0_1pbE15n.html",
-                               "664ff07b46520cc1414494ae49da91f6_.html",
-                               "04223714e648a6a43d7c8af8b095f733_.html",
-                               "3c8ccb865cd72cca06635d74c7f2f3c4_.html",
-                               
"90fe47b28716a2230c5122c83f0b8562_Becoming_A_Nutch_Developer.html",
-                               "ac0fefe70007d40644e2b8bd5da3c305_FAQ.html",
-                               
"bc9bc7f11c1262e8924032ab1c7ce112_NutchPropertiesCompleteList.html",
-                               "78d04611985e7375b441e478fa36f610_.html",
-                               "64adaebadd44e487a8b58894e979dc70_CHANGES.txt",
-                               "a48e9c2659b703fdea3ad332877708d8_.html",
-                               
"159d66d679dd4442d2d8ffe6a83b2912_sponsorship.html",
-                               "66f1ce6872c9195c665fc8bdde95f6dc_thanks.html",
-                               "ef7ee7e929a048c4a119af78492095b3_.html",
-                               "e4251896a982c2b2b68678b5c9c57f4d_.html",
-                               "5384764a16fab767ebcbc17d87758a24_.html",
-                               "a6ba75a218ef2a09d189cb7dffcecc0f_.html",
-                               
"f2fa63bd7a3aca63841eed4cd10fb519_SolrCloud.html",
-                               
"f8de0fbda874e1a140f1b07dcebab374_NUTCH-1047.html",
-                               
"9c120e94f52d690e9cfd044c34134649_NUTCH-1591.html",
-                               
"7dd70378379aa452279ce9200d0a5fed_NUTCH-841.html",
-                               "ddf78b1fe5c268d59fd62bc745815b92_.html",
-                               
"401c9f04887dbbf8d29ad52841b8bdb3_ApacheNutch.html",
-                               "8f984e2d3c2ba68d1695288f1738deaf_Nutch.html",
-                               
"c2ef09a95a956207cea073a515172be2_FrontPage.html",
-                               
"90d9b76e8eabdab1cbcc29bea437c7ae_NutchRESTAPI.html" };
+    String[] crawledFiles = {
+        "c463a4381eb837f9f5d45978cfbde79e_.html",
+        "a974b8d74f7779ab6c6f90b9b279467e_.html",
+        "6bc6497314656a3129732efd708e9f96_.html",
+        "6e88c40abe26cad0a726102997aed048_.html",
+        "5cafdd88f4e9cf3f0cd4c298c6873358_apachecon-europe.html",
+        "932dc10a76e894a2baa8ea4086ad72a8_apachecon-north-america.html",
+        "8540187d75b9cd405b8fa97d665f9f90_.html",
+        "e501bc976c8693b4d28a55b79c390a32_.html",
+        "6add662f9f5758b7d75eec5cfa1f340b_.html",
+        "d4f20df3c37033dc516067ee1f424e4e_.html",
+        "d7b8fa9a02cdc95546030d04be4a98f3_solr.html",
+        "3cbe876e3a8e7a397811de3bb6a945cd_.html",
+        "5b987dde0da79d7f2e3f22b46437f514_bot.html",
+        "3d742820d9a701a1f02e10d5bf5ae633_credits.html",
+        "693673f3c73d04a26276effdea69b7ee_downloads.html",
+        "4f7e3469dafabb4c3b87b00531f81aa4_index.html",
+        "15c5330675be8a69995aab18ff9859e0_javadoc.html",
+        "bc624e1b49e29870ef095819bb0e977a_mailing_lists.html",
+        "a7d66b68754c3665c66e62225255e3fd_version_control.html",
+        "32fb7fe362e1a0d8a1b15addf2a00bdc_1.9-rel",
+        "54ab3db10fe7b26415a04e21045125a8_1zE.html",
+        "1012a41c08092c40340598bd8ee0bfa6_PGa.html",
+        "c830cfc5c28bed10e69d5b83e9c1bcdc_nutch_2.3",
+        "687d915dc264a77f35c61ba841936730_oHY.html",
+        "2bf1afb650010128b4cf4afe677db3c5_1pav9xl.html",
+        "550cab79e14110bbee61c36c61c830b0_1pbE15n.html",
+        "664ff07b46520cc1414494ae49da91f6_.html",
+        "04223714e648a6a43d7c8af8b095f733_.html",
+        "3c8ccb865cd72cca06635d74c7f2f3c4_.html",
+        "90fe47b28716a2230c5122c83f0b8562_Becoming_A_Nutch_Developer.html",
+        "ac0fefe70007d40644e2b8bd5da3c305_FAQ.html",
+        "bc9bc7f11c1262e8924032ab1c7ce112_NutchPropertiesCompleteList.html",
+        "78d04611985e7375b441e478fa36f610_.html",
+        "64adaebadd44e487a8b58894e979dc70_CHANGES.txt",
+        "a48e9c2659b703fdea3ad332877708d8_.html",
+        "159d66d679dd4442d2d8ffe6a83b2912_sponsorship.html",
+        "66f1ce6872c9195c665fc8bdde95f6dc_thanks.html",
+        "ef7ee7e929a048c4a119af78492095b3_.html",
+        "e4251896a982c2b2b68678b5c9c57f4d_.html",
+        "5384764a16fab767ebcbc17d87758a24_.html",
+        "a6ba75a218ef2a09d189cb7dffcecc0f_.html",
+        "f2fa63bd7a3aca63841eed4cd10fb519_SolrCloud.html",
+        "f8de0fbda874e1a140f1b07dcebab374_NUTCH-1047.html",
+        "9c120e94f52d690e9cfd044c34134649_NUTCH-1591.html",
+        "7dd70378379aa452279ce9200d0a5fed_NUTCH-841.html",
+        "ddf78b1fe5c268d59fd62bc745815b92_.html",
+        "401c9f04887dbbf8d29ad52841b8bdb3_ApacheNutch.html",
+        "8f984e2d3c2ba68d1695288f1738deaf_Nutch.html",
+        "c2ef09a95a956207cea073a515172be2_FrontPage.html",
+    "90d9b76e8eabdab1cbcc29bea437c7ae_NutchRESTAPI.html" };
 
-               CommonCrawlDataDumper dumper = new CommonCrawlDataDumper(
-                               new CommonCrawlConfig());
-               dumper.dump(tempDir, sampleSegmentDir, false, null, false, "", 
false);
+    CommonCrawlDataDumper dumper = new CommonCrawlDataDumper(
+        new CommonCrawlConfig());
+    dumper.dump(tempDir, sampleSegmentDir, null, false, null, false, "", 
false);
 
-               Collection<File> tempFiles = FileUtils.listFiles(tempDir,
-                               FileFilterUtils.fileFileFilter(),
-                               FileFilterUtils.directoryFileFilter());
+    Collection<File> tempFiles = FileUtils.listFiles(tempDir,
+        FileFilterUtils.fileFileFilter(),
+        FileFilterUtils.directoryFileFilter());
 
-               for (String expectedFileName : crawledFiles) {
-                 assertTrue("Missed file " + expectedFileName + " in dump", 
-                     hasFile(expectedFileName, tempFiles));
-               }
+    for (String expectedFileName : crawledFiles) {
+      assertTrue("Missed file " + expectedFileName + " in dump", 
+          hasFile(expectedFileName, tempFiles));
+    }
 
-       }
+  }
 
-       private boolean hasFile(String fileName, Collection<File> files) {
-               for (File f : files) {
-                       if (f.getName().equals(fileName)) {
-                               return true;
-                       }
-               }
-               return false;
-       }
+  private boolean hasFile(String fileName, Collection<File> files) {
+    for (File f : files) {
+      if (f.getName().equals(fileName)) {
+        return true;
+      }
+    }
+    return false;
+  }
 }

Reply via email to