This is an automated email from the ASF dual-hosted git repository.

lewismc pushed a commit to branch 2.x
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/2.x by this push:
     new 8eb2e0b  Nutch 2.X GeneratorJob creates NullPointerException when 
using DataFileAvroStore
8eb2e0b is described below

commit 8eb2e0b521c4321621a1649cf16be8306d8f77d5
Author: Lewis John McGibbney <[email protected]>
AuthorDate: Thu Dec 21 14:49:31 2017 +0000

    Nutch 2.X GeneratorJob creates NullPointerException when using 
DataFileAvroStore
---
 conf/gora.properties                               | 32 ----------------------
 src/java/org/apache/nutch/crawl/GeneratorJob.java  | 21 ++++++++------
 .../org/apache/nutch/crawl/GeneratorMapper.java    |  8 ++----
 .../org/apache/nutch/crawl/GeneratorReducer.java   |  2 +-
 src/java/org/apache/nutch/util/TableUtil.java      |  8 ++----
 5 files changed, 19 insertions(+), 52 deletions(-)

diff --git a/conf/gora.properties b/conf/gora.properties
index d49ca77..644ec0f 100644
--- a/conf/gora.properties
+++ b/conf/gora.properties
@@ -19,31 +19,6 @@
 #gora.datastore.default=org.apache.gora.mock.store.MockDataStore
 #gora.datastore.autocreateschema=true
 
-###############################
-# Default SqlStore properties #
-###############################
-
-#gora.sqlstore.jdbc.driver=org.hsqldb.jdbc.JDBCDriver
-#gora.sqlstore.jdbc.url=jdbc:hsqldb:hsql://localhost/nutchtest
-#gora.sqlstore.jdbc.user=sa
-#gora.sqlstore.jdbc.password=
-
-################################
-# Default AvroStore properties #
-################################
-
-# gora.avrostore.codec.type=BINARY||JSON
-# gora.avrostore.output.path=file:///tmp/gora.avrostore.test.output
-
-################################
-# DatafileAvroStore properties #
-################################
-# DataFileAvroStore is file based store which uses Avro's 
-# DataFile{Writer,Reader}'s as a backend. This datastore supports 
-# mapreduce.
-
-# gora.datafileavrostore.###=
-
 #########################
 # HBaseStore properties #
 #########################
@@ -68,13 +43,6 @@
 
 # gora.cassandrastore.servers=localhost:9160
 
-#######################
-# MemStore properties #
-#######################
-# This is a memory based {@link DataStore} implementation for tests.
-
-# gora.memstore.###=
-
 ############################
 # AccumuloStore properties #
 ############################
diff --git a/src/java/org/apache/nutch/crawl/GeneratorJob.java 
b/src/java/org/apache/nutch/crawl/GeneratorJob.java
index c4058c1..aa6fec7 100644
--- a/src/java/org/apache/nutch/crawl/GeneratorJob.java
+++ b/src/java/org/apache/nutch/crawl/GeneratorJob.java
@@ -66,7 +66,7 @@ public class GeneratorJob extends NutchTool implements Tool {
   public static final String BATCH_ID = "generate.batch.id";
   public static final String GENERATE_COUNT = "generate.count";
 
-  private static final Set<WebPage.Field> FIELDS = new 
HashSet<WebPage.Field>();
+  private static final Set<WebPage.Field> FIELDS = new HashSet<>();
 
   static {
     FIELDS.add(WebPage.Field.FETCH_TIME);
@@ -85,6 +85,7 @@ public class GeneratorJob extends NutchTool implements Tool {
     float score;
 
     public SelectorEntry() {
+      //default constructor
     }
 
     public SelectorEntry(String url, float score) {
@@ -92,11 +93,13 @@ public class GeneratorJob extends NutchTool implements Tool 
{
       this.score = score;
     }
 
+    @Override
     public void readFields(DataInput in) throws IOException {
       url = Text.readString(in);
       score = in.readFloat();
     }
 
+    @Override
     public void write(DataOutput out) throws IOException {
       Text.writeString(out, url);
       out.writeFloat(score);
@@ -161,7 +164,7 @@ public class GeneratorJob extends NutchTool implements Tool 
{
   }
 
   public GeneratorJob() {
-
+    //default constructor
   }
 
   public GeneratorJob(Configuration conf) {
@@ -169,7 +172,7 @@ public class GeneratorJob extends NutchTool implements Tool 
{
   }
 
   public Collection<WebPage.Field> getFields(Job job) {
-    Collection<WebPage.Field> fields = new HashSet<WebPage.Field>(FIELDS);
+    Collection<WebPage.Field> fields = new HashSet<>(FIELDS);
     fields.addAll(FetchScheduleFactory.getFetchSchedule(job.getConfiguration())
         .getFields());
     return fields;
@@ -183,8 +186,7 @@ public class GeneratorJob extends NutchTool implements Tool 
{
   public static String randomBatchId() {
     long curTime = System.currentTimeMillis();
     int randomSeed = Math.abs(new Random().nextInt());
-    String batchId = (curTime / 1000) + "-" + randomSeed;
-    return batchId;
+    return (curTime / 1000) + "-" + randomSeed;
   }
 
   /**
@@ -194,6 +196,7 @@ public class GeneratorJob extends NutchTool implements Tool 
{
    * @return results
    * @throws Exception
    */
+  @Override
   public Map<String, Object> run(Map<String, Object> args) throws Exception {
     String batchId = (String) args.get(Nutch.ARG_BATCH);
     if (batchId == null) {
@@ -342,8 +345,10 @@ public class GeneratorJob extends NutchTool implements 
Tool {
       return -1;
     }
 
-    long curTime = System.currentTimeMillis(), topN = Long.MAX_VALUE;
-    boolean filter = true, norm = true;
+    long curTime = System.currentTimeMillis();
+    long topN = Long.MAX_VALUE;
+    boolean filter = true;
+    boolean norm = true;
     boolean sitemap = false;
 
     for (int i = 0; i < args.length; i++) {
@@ -376,7 +381,7 @@ public class GeneratorJob extends NutchTool implements Tool 
{
     }
   }
 
-  public static void main(String args[]) throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new GeneratorJob(),
         args);
     System.exit(res);
diff --git a/src/java/org/apache/nutch/crawl/GeneratorMapper.java 
b/src/java/org/apache/nutch/crawl/GeneratorMapper.java
index d07b0b5..cc02c91 100644
--- a/src/java/org/apache/nutch/crawl/GeneratorMapper.java
+++ b/src/java/org/apache/nutch/crawl/GeneratorMapper.java
@@ -78,13 +78,9 @@ GoraMapper<String, WebPage, SelectorEntry, WebPage> {
       if ((sitemap && !URLFilters.isSitemap(page)) || !sitemap && URLFilters
           .isSitemap(page))
         return;
-    } catch (URLFilterException e) {
+    } catch (URLFilterException | MalformedURLException e) {
       GeneratorJob.LOG
-      .warn("Couldn't filter url: {} ({})", url, e.getMessage());
-      return;
-    } catch (MalformedURLException e) {
-      GeneratorJob.LOG
-      .warn("Couldn't filter url: {} ({})", url, e.getMessage());
+      .warn("Couldn't filter url: {} ({})", url, e);
       return;
     }
 
diff --git a/src/java/org/apache/nutch/crawl/GeneratorReducer.java 
b/src/java/org/apache/nutch/crawl/GeneratorReducer.java
index ed4d1e5..852adaf 100644
--- a/src/java/org/apache/nutch/crawl/GeneratorReducer.java
+++ b/src/java/org/apache/nutch/crawl/GeneratorReducer.java
@@ -45,7 +45,7 @@ public class GeneratorReducer extends
   private long maxCount;
   protected static long count = 0;
   private boolean byDomain = false;
-  private Map<String, Integer> hostCountMap = new HashMap<String, Integer>();
+  private Map<String, Integer> hostCountMap = new HashMap<>();
   private Utf8 batchId;
 
   @Override
diff --git a/src/java/org/apache/nutch/util/TableUtil.java 
b/src/java/org/apache/nutch/util/TableUtil.java
index e6ccbbc..21718eb 100644
--- a/src/java/org/apache/nutch/util/TableUtil.java
+++ b/src/java/org/apache/nutch/util/TableUtil.java
@@ -92,10 +92,8 @@ public class TableUtil {
       pathBegin = reversedUrl.length();
     String sub = reversedUrl.substring(0, pathBegin);
 
-    String[] splits = StringUtils.splitPreserveAllTokens(sub, ':'); // 
{<reversed
-                                                                    // host>,
-                                                                    // <port>,
-                                                                    // 
<protocol>}
+    // {<reversed host>, <port>, <protocol>}
+    String[] splits = StringUtils.splitPreserveAllTokens(sub, ':');
 
     buf.append(splits[1]); // add protocol
     buf.append("://");
@@ -155,7 +153,7 @@ public class TableUtil {
    * @return string-ifed Utf8 object or null if Utf8 instance is null
    */
   public static String toString(CharSequence utf8) {
-    return (utf8 == null ? null : StringUtil.cleanField(utf8.toString()));
+    return utf8 == null ? null : StringUtil.cleanField(utf8.toString());
   }
 
 }

-- 
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].

Reply via email to