Repository: nutch
Updated Branches:
  refs/heads/master 9f32fe84a -> d27c351f4


Fix for Nutch-2246: Refactor /seed end point, this closes #137


Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/d27c351f
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/d27c351f
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/d27c351f

Branch: refs/heads/master
Commit: d27c351f440f5a5932049232760d492585078a54
Parents: 9f32fe8
Author: Sujen Shah <sujen1...@gmail.com>
Authored: Mon Aug 1 11:46:39 2016 -0400
Committer: Sujen Shah <su...@apache.org>
Committed: Tue Aug 9 14:47:07 2016 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 src/java/org/apache/nutch/crawl/Injector.java   | 33 +++++++----
 src/java/org/apache/nutch/metadata/Nutch.java   |  2 +
 .../org/apache/nutch/service/NutchServer.java   |  7 +++
 .../org/apache/nutch/service/SeedManager.java   | 33 +++++++++++
 .../nutch/service/impl/SeedManagerImpl.java     | 58 ++++++++++++++++++++
 .../nutch/service/model/request/SeedList.java   | 10 ++++
 .../nutch/service/resources/SeedResource.java   | 27 ++++++++-
 8 files changed, 157 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nutch/blob/d27c351f/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index ffcf5ae..2e0e041 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -69,6 +69,7 @@ New Feature
 
 Task
 
+    [NUTCH-2246] - Refactor /seed endpoint for backward compatibility
     [NUTCH-2201] - Remove loops program from webgraph package
     [NUTCH-2211] - Filter and normalizer checkers missing in bin/nutch
     [NUTCH-2220] - Rename db.* options used only by the linkdb to linkdb.*

http://git-wip-us.apache.org/repos/asf/nutch/blob/d27c351f/src/java/org/apache/nutch/crawl/Injector.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/crawl/Injector.java 
b/src/java/org/apache/nutch/crawl/Injector.java
index 383aaf1..6575782 100644
--- a/src/java/org/apache/nutch/crawl/Injector.java
+++ b/src/java/org/apache/nutch/crawl/Injector.java
@@ -41,6 +41,7 @@ import org.apache.nutch.net.URLNormalizers;
 import org.apache.nutch.scoring.ScoringFilterException;
 import org.apache.nutch.scoring.ScoringFilters;
 import org.apache.nutch.util.LockUtil;
+import org.apache.nutch.service.NutchServer;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchTool;
 import org.apache.nutch.util.TimingUtil;
@@ -477,11 +478,28 @@ public class Injector extends NutchTool implements Tool {
    */
   public Map<String, Object> run(Map<String, Object> args, String crawlId)
       throws Exception {
-    if (args.size() < 1) {
-      throw new IllegalArgumentException("Required arguments <url_dir>");
+    if(args.size()<1){
+      throw new IllegalArgumentException("Required arguments <url_dir> or 
<seedName>");
+    }
+    Path input;
+    Object path = null;
+    if(args.containsKey(Nutch.ARG_SEEDDIR)) {
+      path = args.get(Nutch.ARG_SEEDDIR);
+    }
+    else if(args.containsKey(Nutch.ARG_SEEDNAME)) {
+      path = NutchServer.getInstance().getSeedManager().
+          getSeedList((String)args.get(Nutch.ARG_SEEDNAME)).getSeedFilePath();
+    }
+    else {
+      throw new IllegalArgumentException("Required arguments <url_dir> or 
<seedName>");
+    }
+    if(path instanceof Path) {
+      input = (Path) path;
+    }
+    else {
+      input = new Path(path.toString());
     }
     Map<String, Object> results = new HashMap<String, Object>();
-
     Path crawlDb;
     if (args.containsKey(Nutch.ARG_CRAWLDB)) {
       Object crawldbPath = args.get(Nutch.ARG_CRAWLDB);
@@ -493,15 +511,6 @@ public class Injector extends NutchTool implements Tool {
     } else {
       crawlDb = new Path(crawlId + "/crawldb");
     }
-
-    Path input;
-    Object path = args.get(Nutch.ARG_SEEDDIR);
-    if (path instanceof Path) {
-      input = (Path) path;
-    } else {
-      input = new Path(path.toString());
-    }
-
     inject(crawlDb, input);
     results.put(Nutch.VAL_RESULT, Integer.toString(0));
     return results;

http://git-wip-us.apache.org/repos/asf/nutch/blob/d27c351f/src/java/org/apache/nutch/metadata/Nutch.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/metadata/Nutch.java 
b/src/java/org/apache/nutch/metadata/Nutch.java
index de80399..cbc3317 100644
--- a/src/java/org/apache/nutch/metadata/Nutch.java
+++ b/src/java/org/apache/nutch/metadata/Nutch.java
@@ -84,6 +84,8 @@ public interface Nutch {
        public static final String CRAWL_ID_KEY = "storage.crawl.id";
        /** Argument key to specify location of the seed url dir for the REST 
endpoints **/
        public static final String ARG_SEEDDIR = "url_dir";
+       /** Argument key to specify name of a seed list for the REST endpoints 
**/
+       public static final String ARG_SEEDNAME = "seedName";
        /** Argument key to specify the location of crawldb for the REST 
endpoints **/
        public static final String ARG_CRAWLDB = "crawldb";
        /** Argument key to specify the location of linkdb for the REST 
endpoints **/

http://git-wip-us.apache.org/repos/asf/nutch/blob/d27c351f/src/java/org/apache/nutch/service/NutchServer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/service/NutchServer.java 
b/src/java/org/apache/nutch/service/NutchServer.java
index e206707..6d531e0 100644
--- a/src/java/org/apache/nutch/service/NutchServer.java
+++ b/src/java/org/apache/nutch/service/NutchServer.java
@@ -41,6 +41,7 @@ import org.apache.nutch.fetcher.FetchNodeDb;
 import org.apache.nutch.service.impl.ConfManagerImpl;
 import org.apache.nutch.service.impl.JobFactory;
 import org.apache.nutch.service.impl.JobManagerImpl;
+import org.apache.nutch.service.impl.SeedManagerImpl;
 import org.apache.nutch.service.impl.NutchServerPoolExecutor;
 import org.apache.nutch.service.model.response.JobInfo;
 import org.apache.nutch.service.model.response.JobInfo.State;
@@ -74,6 +75,7 @@ public class NutchServer {
   private boolean running;
   private ConfManager configManager;
   private JobManager jobManager;
+  private SeedManager seedManager;
   private JAXRSServerFactoryBean sf; 
 
   private static FetchNodeDb fetchNodeDb;
@@ -86,6 +88,7 @@ public class NutchServer {
 
   private NutchServer() {
     configManager = new ConfManagerImpl();
+    seedManager = new SeedManagerImpl();
     BlockingQueue<Runnable> runnables = 
Queues.newArrayBlockingQueue(JOB_CAPACITY);
     NutchServerPoolExecutor executor = new NutchServerPoolExecutor(10, 
JOB_CAPACITY, 1, TimeUnit.HOURS, runnables);
     jobManager = new JobManagerImpl(new JobFactory(), configManager, executor);
@@ -149,6 +152,10 @@ public class NutchServer {
   public JobManager getJobManager() {
     return jobManager;
   }
+  
+  public SeedManager getSeedManager() {
+    return seedManager;
+  }
 
   public FetchNodeDb getFetchNodeDb(){
     return fetchNodeDb;

http://git-wip-us.apache.org/repos/asf/nutch/blob/d27c351f/src/java/org/apache/nutch/service/SeedManager.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/service/SeedManager.java 
b/src/java/org/apache/nutch/service/SeedManager.java
new file mode 100644
index 0000000..a96c4ac
--- /dev/null
+++ b/src/java/org/apache/nutch/service/SeedManager.java
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.service;
+
+import java.util.Map;
+
+import org.apache.nutch.service.model.request.SeedList;
+
+public interface SeedManager {
+
+  public SeedList getSeedList(String seedName);
+  
+  public void setSeedList(String seedName, SeedList seedList);
+  
+  public boolean deleteSeedList(String seedName);
+  
+  public Map<String, SeedList> getSeeds();
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/d27c351f/src/java/org/apache/nutch/service/impl/SeedManagerImpl.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/service/impl/SeedManagerImpl.java 
b/src/java/org/apache/nutch/service/impl/SeedManagerImpl.java
new file mode 100644
index 0000000..c7b7607
--- /dev/null
+++ b/src/java/org/apache/nutch/service/impl/SeedManagerImpl.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.service.impl;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.nutch.service.SeedManager;
+import org.apache.nutch.service.model.request.SeedList;
+
+public class SeedManagerImpl implements SeedManager {
+
+  private static Map<String, SeedList> seeds;
+
+  public SeedManagerImpl() {
+    seeds = new HashMap<>();
+  }
+
+  public SeedList getSeedList(String seedName) {
+    if(seeds.containsKey(seedName)) {
+      return seeds.get(seedName);
+    }
+    else
+      return null;
+  }
+
+  public void setSeedList(String seedName, SeedList seedList) {
+    seeds.put(seedName, seedList);
+  }
+
+  public Map<String, SeedList> getSeeds(){
+    return seeds;
+  }
+  
+  public boolean deleteSeedList(String seedName) {
+    if(seeds.containsKey(seedName)) {
+      seeds.remove(seedName);
+      return true;
+    }
+    else
+      return false;
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/d27c351f/src/java/org/apache/nutch/service/model/request/SeedList.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/service/model/request/SeedList.java 
b/src/java/org/apache/nutch/service/model/request/SeedList.java
index bbb3e2a..5ba60da 100644
--- a/src/java/org/apache/nutch/service/model/request/SeedList.java
+++ b/src/java/org/apache/nutch/service/model/request/SeedList.java
@@ -29,6 +29,8 @@ public class SeedList implements Serializable {
   private Long id;
 
   private String name;
+  private String seedFilePath;
+
 
   @JsonManagedReference
   private Collection<SeedUrl> seedUrls;
@@ -57,6 +59,14 @@ public class SeedList implements Serializable {
     this.name = name;
   }
 
+  public String getSeedFilePath() {
+    return seedFilePath;
+  }
+
+  public void setSeedFilePath(String seedFilePath) {
+    this.seedFilePath = seedFilePath;
+  }
+
   @JsonIgnore
   public int getSeedUrlsCount() {
     if (CollectionUtils.isEmpty(seedUrls)) {

http://git-wip-us.apache.org/repos/asf/nutch/blob/d27c351f/src/java/org/apache/nutch/service/resources/SeedResource.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/service/resources/SeedResource.java 
b/src/java/org/apache/nutch/service/resources/SeedResource.java
index 5261139..638af33 100644
--- a/src/java/org/apache/nutch/service/resources/SeedResource.java
+++ b/src/java/org/apache/nutch/service/resources/SeedResource.java
@@ -24,8 +24,10 @@ import java.io.FileNotFoundException;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.util.Collection;
+import java.util.Map;
 
 import javax.ws.rs.Consumes;
+import javax.ws.rs.GET;
 import javax.ws.rs.POST;
 import javax.ws.rs.Path;
 import javax.ws.rs.Produces;
@@ -35,6 +37,7 @@ import javax.ws.rs.core.Response;
 import javax.ws.rs.core.Response.Status;
 
 import org.apache.commons.collections.CollectionUtils;
+import org.apache.nutch.service.NutchServer;
 import org.apache.nutch.service.model.request.SeedList;
 import org.apache.nutch.service.model.request.SeedUrl;
 import org.slf4j.Logger;
@@ -48,6 +51,23 @@ public class SeedResource extends AbstractResource {
       .getLogger(AdminResource.class);
 
   /**
+   * Gets the list of seedFiles already created 
+   * @return
+   */
+  @GET
+  @Path("/")
+  @Produces(MediaType.APPLICATION_JSON)
+  public Response getSeedLists() {
+    Map<String, SeedList> seeds = 
NutchServer.getInstance().getSeedManager().getSeeds();
+    if(seeds!=null) {
+      return Response.ok(seeds).build();
+    }
+    else {
+      return Response.ok().build();
+    }
+  }
+  
+  /**
    * Method creates seed list file and returns temporary directory path
    * @param seedList
    * @return
@@ -70,8 +90,11 @@ public class SeedResource extends AbstractResource {
         writeUrl(writer, seedUrl);
       }
     }
-
-    return Response.ok().entity(seedFile.getParent()).build();
+    String seedFilePath = seedFile.getParent();
+    seedList.setSeedFilePath(seedFilePath);
+    NutchServer.getInstance().getSeedManager().
+          setSeedList(seedList.getName(), seedList);
+    return Response.ok().entity(seedFilePath).build();
   }
 
   private void writeUrl(BufferedWriter writer, SeedUrl seedUrl) {

Reply via email to