http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/impl/JobWorker.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/impl/JobWorker.java b/nutch-core/src/main/java/org/apache/nutch/service/impl/JobWorker.java new file mode 100644 index 0000000..04821e7 --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/impl/JobWorker.java @@ -0,0 +1,114 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.impl; + +import java.text.MessageFormat; + +import org.apache.hadoop.conf.Configuration; +import org.apache.nutch.metadata.Nutch; +import org.apache.nutch.service.model.request.JobConfig; +import org.apache.nutch.service.model.response.JobInfo; +import org.apache.nutch.service.model.response.JobInfo.State; +import org.apache.nutch.service.resources.ConfigResource; +import org.apache.nutch.util.NutchTool; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class JobWorker implements Runnable{ + + private JobInfo jobInfo; + private JobConfig jobConfig; + private static final Logger LOG = LoggerFactory.getLogger(JobWorker.class); + private NutchTool tool; + + /** + * To initialize JobWorker thread with the Job Configurations provided by user. + * @param jobConfig + * @param conf + * @param tool - NutchTool to run + */ + public JobWorker(JobConfig jobConfig, Configuration conf, NutchTool tool) { + this.jobConfig = jobConfig; + this.tool = tool; + if (jobConfig.getConfId() == null) { + jobConfig.setConfId(ConfigResource.DEFAULT); + } + + jobInfo = new JobInfo(generateId(), jobConfig, State.IDLE, "idle"); + if (jobConfig.getCrawlId() != null) { + conf.set(Nutch.CRAWL_ID_KEY, jobConfig.getCrawlId()); + } + } + + private String generateId() { + if (jobConfig.getCrawlId() == null) { + return MessageFormat.format("{0}-{1}-{2}", jobConfig.getConfId(), + jobConfig.getType(), String.valueOf(hashCode())); + } + return MessageFormat.format("{0}-{1}-{2}-{3}", jobConfig.getCrawlId(), + jobConfig.getConfId(), jobConfig.getType(), String.valueOf(hashCode())); + } + + @Override + public void run() { + try { + getInfo().setState(State.RUNNING); + getInfo().setMsg("OK"); + getInfo().setResult(tool.run(getInfo().getArgs(), getInfo().getCrawlId())); + getInfo().setState(State.FINISHED); + } catch (Exception e) { + LOG.error("Cannot run job worker!", e); + getInfo().setMsg("ERROR: " + e.toString()); + getInfo().setState(State.FAILED); + } + } + + public JobInfo getInfo() { + return jobInfo; + } + + /** + * To stop the executing job + * @return boolean true/false + */ + public boolean stopJob() { + getInfo().setState(State.STOPPING); + try { + return tool.stopJob(); + } catch (Exception e) { + throw new RuntimeException( + "Cannot stop job with id " + getInfo().getId(), e); + } + } + + public boolean killJob() { + getInfo().setState(State.KILLING); + try { + boolean result = tool.killJob(); + getInfo().setState(State.KILLED); + return result; + } catch (Exception e) { + throw new RuntimeException( + "Cannot kill job with id " + getInfo().getId(), e); + } + } + + public void setInfo(JobInfo jobInfo) { + this.jobInfo = jobInfo; + } + +}
http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/impl/LinkReader.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/impl/LinkReader.java b/nutch-core/src/main/java/org/apache/nutch/service/impl/LinkReader.java new file mode 100644 index 0000000..cc88501 --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/impl/LinkReader.java @@ -0,0 +1,175 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.impl; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import javax.ws.rs.WebApplicationException; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.SequenceFile.Reader; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.StringUtils; +import org.apache.nutch.scoring.webgraph.LinkDatum; +import org.apache.nutch.service.NutchReader; + +public class LinkReader implements NutchReader{ + + @Override + public List read(String path) throws FileNotFoundException { + List<HashMap> rows=new ArrayList<HashMap>(); + Path file = new Path(path); + SequenceFile.Reader reader; + try{ + reader = new SequenceFile.Reader(conf, Reader.file(file)); + Writable key = (Writable) + ReflectionUtils.newInstance(reader.getKeyClass(), conf); + LinkDatum value = new LinkDatum(); + + while(reader.next(key, value)) { + try { + HashMap<String, String> t_row = getLinksRow(key,value); + rows.add(t_row); + } + catch (Exception e) { + } + } + reader.close(); + + }catch(FileNotFoundException fne){ + throw new FileNotFoundException(); + + }catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e)); + throw new WebApplicationException(); + } + + return rows; + } + + @Override + public List head(String path, int nrows) throws FileNotFoundException { + List<HashMap> rows=new ArrayList<HashMap>(); + Path file = new Path(path); + SequenceFile.Reader reader; + try{ + reader = new SequenceFile.Reader(conf, Reader.file(file)); + Writable key = (Writable) + ReflectionUtils.newInstance(reader.getKeyClass(), conf); + LinkDatum value = new LinkDatum(); + int i = 0; + while(reader.next(key, value) && i<nrows) { + + HashMap<String, String> t_row = getLinksRow(key,value); + rows.add(t_row); + + i++; + } + reader.close(); + + }catch(FileNotFoundException fne){ + throw new FileNotFoundException(); + + }catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e)); + throw new WebApplicationException(); + } + + return rows; + } + + @Override + public List slice(String path, int start, int end) + throws FileNotFoundException { + List<HashMap> rows=new ArrayList<HashMap>(); + Path file = new Path(path); + SequenceFile.Reader reader; + try{ + reader = new SequenceFile.Reader(conf, Reader.file(file)); + Writable key = (Writable) + ReflectionUtils.newInstance(reader.getKeyClass(), conf); + LinkDatum value = new LinkDatum(); + int i = 0; + + for(;i<start && reader.next(key, value);i++){} // increment to read start position + while(reader.next(key, value) && i<end) { + HashMap<String, String> t_row = getLinksRow(key,value); + rows.add(t_row); + + i++; + } + reader.close(); + + }catch(FileNotFoundException fne){ + throw new FileNotFoundException(); + + }catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e)); + throw new WebApplicationException(); + } + + return rows; + } + + @Override + public int count(String path) throws FileNotFoundException { + Path file = new Path(path); + SequenceFile.Reader reader; + int i = 0; + try { + reader = new SequenceFile.Reader(conf, Reader.file(file)); + Writable key = (Writable)ReflectionUtils.newInstance(reader.getKeyClass(), conf); + Writable value = (Writable)ReflectionUtils.newInstance(reader.getValueClass(), conf); + + while(reader.next(key, value)) { + i++; + } + reader.close(); + } catch(FileNotFoundException fne){ + throw new FileNotFoundException(); + }catch (IOException e) { + // TODO Auto-generated catch block + LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e)); + throw new WebApplicationException(); + } + return i; + } + + private HashMap<String, String> getLinksRow(Writable key, LinkDatum value) { + HashMap<String, String> t_row = new HashMap<String, String>(); + t_row.put("key_url", key.toString()); + t_row.put("url", value.getUrl()); + t_row.put("anchor", value.getAnchor()); + t_row.put("score", String.valueOf(value.getScore())); + t_row.put("timestamp", String.valueOf(value.getTimestamp())); + t_row.put("linktype", String.valueOf(value.getLinkType())); + + return t_row; + } +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/impl/NodeReader.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/impl/NodeReader.java b/nutch-core/src/main/java/org/apache/nutch/service/impl/NodeReader.java new file mode 100644 index 0000000..2155a16 --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/impl/NodeReader.java @@ -0,0 +1,184 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.impl; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import javax.ws.rs.WebApplicationException; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.io.SequenceFile.Reader; +import org.apache.nutch.scoring.webgraph.Node; +import org.apache.nutch.service.NutchReader; + +public class NodeReader implements NutchReader { + + @Override + public List read(String path) throws FileNotFoundException { + // TODO Auto-generated method stub + List<HashMap> rows=new ArrayList<HashMap>(); + Path file = new Path(path); + SequenceFile.Reader reader; + try{ + reader = new SequenceFile.Reader(conf, Reader.file(file)); + Writable key = (Writable) + ReflectionUtils.newInstance(reader.getKeyClass(), conf); + Node value = new Node(); + + while(reader.next(key, value)) { + try { + HashMap<String, String> t_row = getNodeRow(key,value); + rows.add(t_row); + } + catch (Exception e) { + } + } + reader.close(); + + }catch(FileNotFoundException fne){ + throw new FileNotFoundException(); + + }catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + LOG.error("Error occurred while reading file {} : ", file, StringUtils.stringifyException(e)); + throw new WebApplicationException(); + } + + return rows; + + } + + @Override + public List head(String path, int nrows) throws FileNotFoundException { + List<HashMap> rows=new ArrayList<HashMap>(); + Path file = new Path(path); + SequenceFile.Reader reader; + try{ + reader = new SequenceFile.Reader(conf, Reader.file(file)); + Writable key = (Writable) + ReflectionUtils.newInstance(reader.getKeyClass(), conf); + Node value = new Node(); + int i = 0; + while(reader.next(key, value) && i<nrows) { + HashMap<String, String> t_row = getNodeRow(key,value); + rows.add(t_row); + + i++; + } + reader.close(); + + }catch(FileNotFoundException fne){ + throw new FileNotFoundException(); + + }catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + LOG.error("Error occurred while reading file {} : ", file, + StringUtils.stringifyException(e)); + throw new WebApplicationException(); + } + + return rows; + } + + @Override + public List slice(String path, int start, int end) + throws FileNotFoundException { + List<HashMap> rows=new ArrayList<HashMap>(); + Path file = new Path(path); + SequenceFile.Reader reader; + try{ + reader = new SequenceFile.Reader(conf, Reader.file(file)); + Writable key = (Writable) + ReflectionUtils.newInstance(reader.getKeyClass(), conf); + Node value = new Node(); + int i = 0; + + for(;i<start && reader.next(key, value);i++){} // increment to read start position + while(reader.next(key, value) && i<end) { + HashMap<String, String> t_row = getNodeRow(key,value); + rows.add(t_row); + + i++; + } + reader.close(); + + }catch(FileNotFoundException fne){ + throw new FileNotFoundException(); + + }catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + LOG.error("Error occurred while reading file {} : ", file, + StringUtils.stringifyException(e)); + throw new WebApplicationException(); + } + + return rows; + } + + @Override + public int count(String path) throws FileNotFoundException { + Path file = new Path(path); + SequenceFile.Reader reader; + int i =0; + try{ + reader = new SequenceFile.Reader(conf, Reader.file(file)); + Writable key = (Writable) + ReflectionUtils.newInstance(reader.getKeyClass(), conf); + Node value = new Node(); + + while(reader.next(key, value)) { + i++; + } + reader.close(); + + }catch(FileNotFoundException fne){ + throw new FileNotFoundException(); + + }catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + LOG.error("Error occurred while reading file {} : ", file, + StringUtils.stringifyException(e)); + throw new WebApplicationException(); + } + + return i; + } + + private HashMap<String, String> getNodeRow(Writable key, Node value) { + HashMap<String, String> t_row = new HashMap<String, String>(); + t_row.put("key_url", key.toString()); + t_row.put("num_inlinks", String.valueOf(value.getNumInlinks()) ); + t_row.put("num_outlinks", String.valueOf(value.getNumOutlinks()) ); + t_row.put("inlink_score", String.valueOf(value.getInlinkScore())); + t_row.put("outlink_score", String.valueOf(value.getOutlinkScore())); + t_row.put("metadata", value.getMetadata().toString()); + + return t_row; + } +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java b/nutch-core/src/main/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java new file mode 100644 index 0000000..3fc5ba3 --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.impl; + +import java.util.Collection; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.nutch.service.model.response.JobInfo; + +import com.google.common.collect.Lists; +import com.google.common.collect.Queues; + + + +public class NutchServerPoolExecutor extends ThreadPoolExecutor{ + + private Queue<JobWorker> workersHistory; + private Queue<JobWorker> runningWorkers; + + public NutchServerPoolExecutor(int corePoolSize, int maxPoolSize, long keepAliveTime, TimeUnit unit, BlockingQueue<Runnable> workQueue){ + super(corePoolSize, maxPoolSize, keepAliveTime, unit, workQueue); + workersHistory = Queues.newArrayBlockingQueue(maxPoolSize); + runningWorkers = Queues.newArrayBlockingQueue(maxPoolSize); + } + + @Override + protected void beforeExecute(Thread thread, Runnable runnable) { + super.beforeExecute(thread, runnable); + synchronized (runningWorkers) { + runningWorkers.offer(((JobWorker) runnable)); + } + } + @Override + protected void afterExecute(Runnable runnable, Throwable throwable) { + super.afterExecute(runnable, throwable); + synchronized (runningWorkers) { + runningWorkers.remove(((JobWorker) runnable).getInfo()); + } + JobWorker worker = ((JobWorker) runnable); + addStatusToHistory(worker); + } + + private void addStatusToHistory(JobWorker worker) { + synchronized (workersHistory) { + if (!workersHistory.offer(worker)) { + workersHistory.poll(); + workersHistory.add(worker); + } + } + } + + /** + * Find the Job Worker Thread + * @param jobId + * @return + */ + public JobWorker findWorker(String jobId) { + synchronized (runningWorkers) { + for (JobWorker worker : runningWorkers) { + if (StringUtils.equals(worker.getInfo().getId(), jobId)) { + return worker; + } + } + } + return null; + } + + /** + * Gives the Job history + * @return + */ + public Collection<JobInfo> getJobHistory() { + return getJobsInfo(workersHistory); + } + + /** + * Gives the list of currently running jobs + * @return + */ + public Collection<JobInfo> getJobRunning() { + return getJobsInfo(runningWorkers); + } + + /** + * Gives all jobs(currently running and completed) + * @return + */ + @SuppressWarnings("unchecked") + public Collection<JobInfo> getAllJobs() { + return CollectionUtils.union(getJobRunning(), getJobHistory()); + } + + private Collection<JobInfo> getJobsInfo(Collection<JobWorker> workers) { + List<JobInfo> jobsInfo = Lists.newLinkedList(); + for (JobWorker worker : workers) { + jobsInfo.add(worker.getInfo()); + } + return jobsInfo; + } + + + public JobInfo getInfo(String jobId) { + for (JobInfo jobInfo : getAllJobs()) { + if (StringUtils.equals(jobId, jobInfo.getId())) { + return jobInfo; + } + } + return null; + } + +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/impl/SequenceReader.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/impl/SequenceReader.java b/nutch-core/src/main/java/org/apache/nutch/service/impl/SequenceReader.java new file mode 100644 index 0000000..ce5d120 --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/impl/SequenceReader.java @@ -0,0 +1,171 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.impl; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import javax.ws.rs.WebApplicationException; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.SequenceFile.Reader; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.StringUtils; +import org.apache.nutch.service.NutchReader; + +/** + * Enables reading a sequence file and methods provide different + * ways to read the file. + * @author Sujen Shah + * + */ +public class SequenceReader implements NutchReader { + + @Override + public List<List<String>> read(String path) throws FileNotFoundException { + // TODO Auto-generated method stub + List<List<String>> rows=new ArrayList<List<String>>(); + Path file = new Path(path); + SequenceFile.Reader reader; + try { + reader = new SequenceFile.Reader(conf, Reader.file(file)); + Writable key = + (Writable)ReflectionUtils.newInstance(reader.getKeyClass(), conf); + Writable value = + (Writable)ReflectionUtils.newInstance(reader.getValueClass(), conf); + + while(reader.next(key, value)) { + List<String> row =new ArrayList<String>(); + row.add(key.toString()); + row.add(value.toString()); + rows.add(row); + } + reader.close(); + }catch(FileNotFoundException fne){ + throw new FileNotFoundException(); + }catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + LOG.error("Error occurred while reading file {} : ", file, + StringUtils.stringifyException(e)); + throw new WebApplicationException(); + } + return rows; + } + + @Override + public List<List<String>> head(String path, int nrows) + throws FileNotFoundException { + // TODO Auto-generated method stub + + List<List<String>> rows=new ArrayList<List<String>>(); + Path file = new Path(path); + SequenceFile.Reader reader; + try { + + reader = new SequenceFile.Reader(conf, Reader.file(file)); + Writable key = + (Writable)ReflectionUtils.newInstance(reader.getKeyClass(), conf); + Writable value = + (Writable)ReflectionUtils.newInstance(reader.getValueClass(), conf); + int i = 0; + while(reader.next(key, value) && i<nrows) { + List<String> row =new ArrayList<String>(); + row.add(key.toString()); + row.add(value.toString()); + rows.add(row); + i++; + } + reader.close(); + } catch(FileNotFoundException fne){ + throw new FileNotFoundException(); + }catch (IOException e) { + // TODO Auto-generated catch block + LOG.error("Error occurred while reading file {} : ", file, + StringUtils.stringifyException(e)); + throw new WebApplicationException(); + } + return rows; + } + + @Override + public List<List<String>> slice(String path, int start, int end) + throws FileNotFoundException { + List<List<String>> rows=new ArrayList<List<String>>(); + Path file = new Path(path); + SequenceFile.Reader reader; + try { + + reader = new SequenceFile.Reader(conf, Reader.file(file)); + Writable key = + (Writable)ReflectionUtils.newInstance(reader.getKeyClass(), conf); + Writable value = + (Writable)ReflectionUtils.newInstance(reader.getValueClass(), conf); + int i = 0; + + for(;i<start && reader.next(key, value);i++){} // increment to read start position + while(reader.next(key, value) && i<end) { + List<String> row =new ArrayList<String>(); + row.add(key.toString()); + row.add(value.toString()); + rows.add(row); + i++; + } + reader.close(); + } catch(FileNotFoundException fne){ + throw new FileNotFoundException(); + }catch (IOException e) { + // TODO Auto-generated catch block + LOG.error("Error occurred while reading file {} : ", file, + StringUtils.stringifyException(e)); + throw new WebApplicationException(); + } + return rows; + } + + @Override + public int count(String path) throws FileNotFoundException { + Path file = new Path(path); + SequenceFile.Reader reader; + int i = 0; + try { + reader = new SequenceFile.Reader(conf, Reader.file(file)); + Writable key = + (Writable)ReflectionUtils.newInstance(reader.getKeyClass(), conf); + Writable value = + (Writable)ReflectionUtils.newInstance(reader.getValueClass(), conf); + + while(reader.next(key, value)) { + i++; + } + reader.close(); + } catch(FileNotFoundException fne){ + throw new FileNotFoundException(); + }catch (IOException e) { + // TODO Auto-generated catch block + LOG.error("Error occurred while reading file {} : ", file, + StringUtils.stringifyException(e)); + throw new WebApplicationException(); + } + return i; + } + +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/request/DbQuery.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/request/DbQuery.java b/nutch-core/src/main/java/org/apache/nutch/service/model/request/DbQuery.java new file mode 100644 index 0000000..5d069dc --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/model/request/DbQuery.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.model.request; + +import java.util.HashMap; +import java.util.Map; + +public class DbQuery { + + private String confId; + private String type; + private Map<String, String> args = new HashMap<String, String>(); + private String crawlId; + + public String getConfId() { + return confId; + } + public void setConfId(String confId) { + this.confId = confId; + } + public Map<String, String> getArgs() { + return args; + } + public void setArgs(Map<String, String> args) { + this.args = args; + } + public String getType() { + return type; + } + public void setType(String type) { + this.type = type; + } + public String getCrawlId() { + return crawlId; + } + public void setCrawlId(String crawlId) { + this.crawlId = crawlId; + } + + + +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/request/JobConfig.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/request/JobConfig.java b/nutch-core/src/main/java/org/apache/nutch/service/model/request/JobConfig.java new file mode 100644 index 0000000..af6c945 --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/model/request/JobConfig.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nutch.service.model.request; + +import java.util.Map; + +import org.apache.nutch.service.JobManager.JobType; + + +public class JobConfig { + private String crawlId; + private JobType type; + private String confId; + private String jobClassName; + private Map<String, Object> args; + + public String getCrawlId() { + return crawlId; + } + + public void setCrawlId(String crawlId) { + this.crawlId = crawlId; + } + + public JobType getType() { + return type; + } + + public void setType(JobType type) { + this.type = type; + } + + public String getConfId() { + return confId; + } + + public void setConfId(String confId) { + this.confId = confId; + } + + public Map<String, Object> getArgs() { + return args; + } + + public void setArgs(Map<String, Object> args) { + this.args = args; + } + + public String getJobClassName() { + return jobClassName; + } + + public void setJobClassName(String jobClass) { + this.jobClassName = jobClass; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/request/NutchConfig.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/request/NutchConfig.java b/nutch-core/src/main/java/org/apache/nutch/service/model/request/NutchConfig.java new file mode 100644 index 0000000..ffa9e3e --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/model/request/NutchConfig.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.model.request; + +import java.util.Map; + +import java.util.Collections; + +public class NutchConfig { + private String configId; + private boolean force = false; + private Map<String, String> params = Collections.emptyMap(); + + public Map<String, String> getParams() { + return params; + } + + public void setParams(Map<String, String> params) { + this.params = params; + } + + public String getConfigId() { + return configId; + } + + public void setConfigId(String configId) { + this.configId = configId; + } + + public boolean isForce() { + return force; + } + + public void setForce(boolean force) { + this.force = force; + } +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/request/ReaderConfig.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/request/ReaderConfig.java b/nutch-core/src/main/java/org/apache/nutch/service/model/request/ReaderConfig.java new file mode 100644 index 0000000..81d7440 --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/model/request/ReaderConfig.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.model.request; + +public class ReaderConfig { + + private String path; + + public String getPath() { + return path; + } + + public void setPath(String path) { + this.path = path; + } +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedList.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedList.java b/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedList.java new file mode 100644 index 0000000..bbb3e2a --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedList.java @@ -0,0 +1,93 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.apache.nutch.service.model.request; + +import java.io.Serializable; +import java.util.Collection; + +import org.apache.commons.collections4.CollectionUtils; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonManagedReference; + +public class SeedList implements Serializable { + + private Long id; + + private String name; + + @JsonManagedReference + private Collection<SeedUrl> seedUrls; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public Collection<SeedUrl> getSeedUrls() { + return seedUrls; + } + + public void setSeedUrls(Collection<SeedUrl> seedUrls) { + this.seedUrls = seedUrls; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + @JsonIgnore + public int getSeedUrlsCount() { + if (CollectionUtils.isEmpty(seedUrls)) { + return 0; + } + return seedUrls.size(); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((id == null) ? 0 : id.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + SeedList other = (SeedList) obj; + if (id == null) { + if (other.id != null) + return false; + } else if (!id.equals(other.id)) + return false; + return true; + } + +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedUrl.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedUrl.java b/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedUrl.java new file mode 100644 index 0000000..b1c93a8 --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/model/request/SeedUrl.java @@ -0,0 +1,89 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.apache.nutch.service.model.request; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonBackReference; +import com.fasterxml.jackson.annotation.JsonIgnore; + +public class SeedUrl implements Serializable { + + private Long id; + + @JsonBackReference + private SeedList seedList; + + private String url; + + public SeedUrl() {} + + public SeedUrl(String url) { + this.url = url; + } + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + @JsonIgnore + public SeedList getSeedList() { + return seedList; + } + + @JsonIgnore + public void setSeedList(SeedList seedList) { + this.seedList = seedList; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((id == null) ? 0 : id.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + SeedUrl other = (SeedUrl) obj; + if (id == null) { + if (other.id != null) + return false; + } else if (!id.equals(other.id)) + return false; + return true; + } +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/response/FetchNodeDbInfo.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/response/FetchNodeDbInfo.java b/nutch-core/src/main/java/org/apache/nutch/service/model/response/FetchNodeDbInfo.java new file mode 100644 index 0000000..267b50b --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/model/response/FetchNodeDbInfo.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.model.response; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.nutch.parse.Outlink; + +public class FetchNodeDbInfo { + + private String url; + private int status; + private int numOfOutlinks; + private List<ChildNode> children = new ArrayList<ChildNode>(); + + + public String getUrl() { + return url; + } + + + public void setUrl(String url) { + this.url = url; + } + + + public int getStatus() { + return status; + } + + + public void setStatus(int status) { + this.status = status; + } + + + public int getNumOfOutlinks() { + return numOfOutlinks; + } + + + public void setNumOfOutlinks(int numOfOutlinks) { + this.numOfOutlinks = numOfOutlinks; + } + + public void setChildNodes(Outlink[] links){ + ChildNode childNode; + for(Outlink outlink: links){ + childNode = new ChildNode(outlink.getToUrl(), outlink.getAnchor()); + children.add(childNode); + } + } + + + private class ChildNode{ + private String childUrl; + private String anchorText; + + public ChildNode(String childUrl, String anchorText){ + this.childUrl = childUrl; + this.anchorText = anchorText; + } + + public String getAnchorText() { + return anchorText; + } + public void setAnchorText(String anchorText) { + this.anchorText = anchorText; + } + public String getChildUrl() { + return childUrl; + } + public void setChildUrl(String childUrl) { + this.childUrl = childUrl; + } + } + + + public List<ChildNode> getChildren() { + return children; + } + + + public void setChildren(List<ChildNode> children) { + this.children = children; + } + +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/response/JobInfo.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/response/JobInfo.java b/nutch-core/src/main/java/org/apache/nutch/service/model/response/JobInfo.java new file mode 100644 index 0000000..c2e185d --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/model/response/JobInfo.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.model.response; + +import java.util.Map; + +import org.apache.nutch.service.JobManager.JobType; +import org.apache.nutch.service.model.request.JobConfig; + +/** + * This is the response object containing Job information + * + * + */ +public class JobInfo { + + public static enum State { + IDLE, RUNNING, FINISHED, FAILED, KILLED, STOPPING, KILLING, ANY + }; + + private String id; + private JobType type; + private String confId; + private Map<String, Object> args; + private Map<String, Object> result; + private State state; + private String msg; + private String crawlId; + + public JobInfo(String generateId, JobConfig jobConfig, State state, + String msg) { + this.id = generateId; + this.type = jobConfig.getType(); + this.confId = jobConfig.getConfId(); + this.crawlId = jobConfig.getCrawlId(); + this.args = jobConfig.getArgs(); + this.msg = msg; + this.state = state; + } + public String getId() { + return id; + } + public void setId(String id) { + this.id = id; + } + public JobType getType() { + return type; + } + public void setType(JobType type) { + this.type = type; + } + public String getConfId() { + return confId; + } + public void setConfId(String confId) { + this.confId = confId; + } + public Map<String, Object> getArgs() { + return args; + } + public void setArgs(Map<String, Object> args) { + this.args = args; + } + public Map<String, Object> getResult() { + return result; + } + public void setResult(Map<String, Object> result) { + this.result = result; + } + public State getState() { + return state; + } + public void setState(State state) { + this.state = state; + } + public String getMsg() { + return msg; + } + public void setMsg(String msg) { + this.msg = msg; + } + public String getCrawlId() { + return crawlId; + } + public void setCrawlId(String crawlId) { + this.crawlId = crawlId; + } +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/model/response/NutchServerInfo.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/model/response/NutchServerInfo.java b/nutch-core/src/main/java/org/apache/nutch/service/model/response/NutchServerInfo.java new file mode 100644 index 0000000..f8867e6 --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/model/response/NutchServerInfo.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.model.response; + +import java.util.Collection; +import java.util.Date; +import java.util.Set; + +public class NutchServerInfo { + + private Date startDate; + private Set<String> configuration; + private Collection<JobInfo> jobs; + private Collection<JobInfo> runningJobs; + public Date getStartDate() { + return startDate; + } + public void setStartDate(Date startDate) { + this.startDate = startDate; + } + public Set<String> getConfiguration() { + return configuration; + } + public void setConfiguration(Set<String> configuration) { + this.configuration = configuration; + } + public Collection<JobInfo> getJobs() { + return jobs; + } + public void setJobs(Collection<JobInfo> jobs) { + this.jobs = jobs; + } + public Collection<JobInfo> getRunningJobs() { + return runningJobs; + } + public void setRunningJobs(Collection<JobInfo> runningJobs) { + this.runningJobs = runningJobs; + } + + +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/resources/AbstractResource.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/resources/AbstractResource.java b/nutch-core/src/main/java/org/apache/nutch/service/resources/AbstractResource.java new file mode 100644 index 0000000..ebe4138 --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/resources/AbstractResource.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.resources; + +import javax.ws.rs.Produces; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.Response.Status; + +import org.apache.nutch.service.ConfManager; +import org.apache.nutch.service.JobManager; +import org.apache.nutch.service.NutchServer; + +@Produces(MediaType.APPLICATION_JSON) +public abstract class AbstractResource { + + protected JobManager jobManager; + protected ConfManager configManager; + protected NutchServer server; + + public AbstractResource() { + server = NutchServer.getInstance(); + configManager = NutchServer.getInstance().getConfManager(); + jobManager = NutchServer.getInstance().getJobManager(); + } + + protected void throwBadRequestException(String message) { + throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).build()); + } +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/resources/AdminResource.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/resources/AdminResource.java b/nutch-core/src/main/java/org/apache/nutch/service/resources/AdminResource.java new file mode 100644 index 0000000..3f0189e --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/resources/AdminResource.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.resources; + +import java.util.Date; + +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.QueryParam; + +import org.apache.nutch.service.model.response.JobInfo.State; +import org.apache.nutch.service.model.response.NutchServerInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Path(value="/admin") +public class AdminResource extends AbstractResource{ + + private final int DELAY_SEC = 1; + private static final Logger LOG = LoggerFactory + .getLogger(AdminResource.class); + + /** + * To get the status of the Nutch Server + * @return + */ + @GET + @Path(value="/") + public NutchServerInfo getServerStatus(){ + NutchServerInfo serverInfo = new NutchServerInfo(); + serverInfo.setConfiguration(configManager.list()); + serverInfo.setStartDate(new Date(server.getStarted())); + serverInfo.setJobs(jobManager.list(null, State.ANY)); + serverInfo.setRunningJobs(jobManager.list(null, State.RUNNING)); + return serverInfo; + } + + /** + * Stop the Nutch server + * @param force If set to true, it will kill any running jobs + * @return + */ + @GET + @Path(value="/stop") + public String stopServer(@QueryParam("force") boolean force){ + if(!server.canStop(force)){ + return "Jobs still running -- Cannot stop server now" ; + } + scheduleServerStop(); + return "Stopping in server on port " + server.getPort(); + } + + private void scheduleServerStop() { + LOG.info("Shutting down server in {} sec", DELAY_SEC); + Thread thread = new Thread() { + public void run() { + try { + Thread.sleep(DELAY_SEC*1000); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + server.stop(); + LOG.info("Service stopped."); + } + }; + thread.setDaemon(true); + thread.start(); + LOG.info("Service shutting down..."); + } + +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/resources/ConfigResource.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/resources/ConfigResource.java b/nutch-core/src/main/java/org/apache/nutch/service/resources/ConfigResource.java new file mode 100644 index 0000000..6afd621 --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/resources/ConfigResource.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nutch.service.resources; + + +import java.util.Map; +import java.util.Set; + +import javax.ws.rs.Consumes; +import javax.ws.rs.DELETE; +import javax.ws.rs.GET; +import javax.ws.rs.POST; +import javax.ws.rs.PUT; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.Response.Status; + +import org.apache.nutch.service.model.request.NutchConfig; +import com.fasterxml.jackson.jaxrs.annotation.JacksonFeatures; +import com.fasterxml.jackson.databind.SerializationFeature; + +@Path("/config") +public class ConfigResource extends AbstractResource{ + + public static final String DEFAULT = "default"; + + /** + * Returns a list of all configurations created. + * @return List of configurations + */ + @GET + @Path("/") + @JacksonFeatures(serializationEnable = { SerializationFeature.INDENT_OUTPUT }) + public Set<String> getConfigs() { + return configManager.list(); + } + + /** + * Get configuration properties + * @param configId The configuration ID to fetch + * @return HashMap of the properties set within the given configId + */ + @GET + @Path("/{configId}") + @JacksonFeatures(serializationEnable = { SerializationFeature.INDENT_OUTPUT }) + public Map<String, String> getConfig(@PathParam("configId") String configId) { + return configManager.getAsMap(configId); + } + + /** + * Get property + * @param configId The ID of the configuration + * @param propertyId The name(key) of the property + * @return value of the specified property in the provided configId. + */ + @GET + @Path("/{configId}/{propertyId}") + @Produces(MediaType.TEXT_PLAIN) + @JacksonFeatures(serializationEnable = { SerializationFeature.INDENT_OUTPUT }) + public String getProperty(@PathParam("configId") String configId, + @PathParam("propertyId") String propertyId) { + return configManager.getAsMap(configId).get(propertyId); + } + + /** + * Removes the configuration from the list of known configurations. + * @param configId The ID of the configuration to delete + */ + @DELETE + @Path("/{configId}") + public void deleteConfig(@PathParam("configId") String configId) { + configManager.delete(configId); + } + + /** + * Create new configuration. + * @param newConfig + * @return The name of the new configuration created + */ + @POST + @Path("/create") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.TEXT_PLAIN) + public Response createConfig(NutchConfig newConfig) { + if (newConfig == null) { + return Response.status(400) + .entity("Nutch configuration cannot be empty!").build(); + } + try{ + configManager.create(newConfig); + }catch(Exception e){ + return Response.status(400) + .entity(e.getMessage()).build(); + } + return Response.ok(newConfig.getConfigId()).build(); + } + + /** + * Adds/Updates a particular property value in the configuration + * @param confId Configuration ID whose property needs to be updated. Make sure that the given + * confId exists to prevent errors. + * @param propertyKey Name of the property + * @param value Value as a simple text + * @return Success code + */ + @PUT + @Path("/{configId}/{propertyId}") + @Consumes(MediaType.TEXT_PLAIN) + public Response updateProperty(@PathParam("configId")String confId, + @PathParam("propertyId")String propertyKey, String value) { + try{ + configManager.setProperty(confId, propertyKey, value); + }catch(Exception e) { + return Response.status(400).entity(e.getMessage()).build(); + } + return Response.ok().build(); + } +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/resources/DbResource.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/resources/DbResource.java b/nutch-core/src/main/java/org/apache/nutch/service/resources/DbResource.java new file mode 100644 index 0000000..2672fcc --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/resources/DbResource.java @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.resources; + + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import javax.ws.rs.Consumes; +import javax.ws.rs.DefaultValue; +import javax.ws.rs.GET; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.Response.Status; + +import org.apache.hadoop.conf.Configuration; +import org.apache.nutch.crawl.CrawlDbReader; +import org.apache.nutch.fetcher.FetchNode; +import org.apache.nutch.fetcher.FetchNodeDb; +import org.apache.nutch.service.model.request.DbQuery; +import org.apache.nutch.service.model.response.FetchNodeDbInfo; + +@Path(value = "/db") +public class DbResource extends AbstractResource { + + @POST + @Path(value = "/crawldb") + @Consumes(MediaType.APPLICATION_JSON) + public Response readdb(DbQuery dbQuery){ + if(dbQuery == null) + return Response.status(Status.BAD_REQUEST).build(); + + Configuration conf = configManager.get(dbQuery.getConfId()); + if(conf == null){ + conf = configManager.get(ConfigResource.DEFAULT); + } + if(dbQuery.getCrawlId() == null || dbQuery.getType() == null){ + return Response.status(Status.BAD_REQUEST).build(); + } + String type = dbQuery.getType(); + + if(type.equalsIgnoreCase("stats")){ + return crawlDbStats(conf, dbQuery.getArgs(), dbQuery.getCrawlId()); + } + if(type.equalsIgnoreCase("dump")){ + return crawlDbDump(conf, dbQuery.getArgs(), dbQuery.getCrawlId()); + } + if(type.equalsIgnoreCase("topN")){ + return crawlDbTopN(conf, dbQuery.getArgs(), dbQuery.getCrawlId()); + } + if(type.equalsIgnoreCase("url")){ + return crawlDbUrl(conf, dbQuery.getArgs(), dbQuery.getCrawlId()); + } + return null; + + } + + @GET + @Path(value="/fetchdb") + public List<FetchNodeDbInfo> fetchDb(@DefaultValue("0")@QueryParam("to")int to, @DefaultValue("0")@QueryParam("from")int from){ + List<FetchNodeDbInfo> listOfFetchedNodes = new ArrayList<FetchNodeDbInfo>(); + Map<Integer, FetchNode> fetchNodedbMap = FetchNodeDb.getInstance().getFetchNodeDb(); + + if(to ==0 || to>fetchNodedbMap.size()){ + to = fetchNodedbMap.size(); + } + for(int i=from;i<=to;i++){ + if(!fetchNodedbMap.containsKey(i)){ + continue; + } + FetchNode node = fetchNodedbMap.get(i); + FetchNodeDbInfo fdbInfo = new FetchNodeDbInfo(); + fdbInfo.setUrl(node.getUrl().toString()); + fdbInfo.setStatus(node.getStatus()); + fdbInfo.setNumOfOutlinks(node.getOutlinks().length); + fdbInfo.setChildNodes(node.getOutlinks()); + listOfFetchedNodes.add(fdbInfo); + } + + return listOfFetchedNodes; + } + @SuppressWarnings("resource") + private Response crawlDbStats(Configuration conf, Map<String, String> args, String crawlId){ + CrawlDbReader dbr = new CrawlDbReader(); + try{ + return Response.ok(dbr.query(args, conf, "stats", crawlId)).build(); + }catch(Exception e){ + e.printStackTrace(); + return Response.serverError().entity(e.getMessage()).type(MediaType.TEXT_PLAIN).build(); + } + } + + @Produces(MediaType.APPLICATION_OCTET_STREAM) + private Response crawlDbDump(Configuration conf, Map<String, String> args, String crawlId){ + CrawlDbReader dbr = new CrawlDbReader(); + try{ + return Response.ok(dbr.query(args, conf, "dump", crawlId), MediaType.APPLICATION_OCTET_STREAM).build(); + }catch(Exception e){ + e.printStackTrace(); + return Response.serverError().entity(e.getMessage()).type(MediaType.TEXT_PLAIN).build(); + } + } + + @Produces(MediaType.APPLICATION_OCTET_STREAM) + private Response crawlDbTopN(Configuration conf, Map<String, String> args, String crawlId) { + CrawlDbReader dbr = new CrawlDbReader(); + try{ + return Response.ok(dbr.query(args, conf, "topN", crawlId), MediaType.APPLICATION_OCTET_STREAM).build(); + }catch(Exception e){ + e.printStackTrace(); + return Response.serverError().entity(e.getMessage()).type(MediaType.TEXT_PLAIN).build(); + } + } + + private Response crawlDbUrl(Configuration conf, Map<String, String> args, String crawlId){ + CrawlDbReader dbr = new CrawlDbReader(); + try{ + return Response.ok(dbr.query(args, conf, "url", crawlId)).build(); + }catch(Exception e){ + e.printStackTrace(); + return Response.serverError().entity(e.getMessage()).type(MediaType.TEXT_PLAIN).build(); + } + } +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/resources/JobResource.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/resources/JobResource.java b/nutch-core/src/main/java/org/apache/nutch/service/resources/JobResource.java new file mode 100644 index 0000000..b142d73 --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/resources/JobResource.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.resources; + +import java.util.Collection; + +import javax.ws.rs.Consumes; +import javax.ws.rs.GET; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.MediaType; + +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.jaxrs.annotation.JacksonFeatures; +import org.apache.nutch.service.model.request.JobConfig; +import org.apache.nutch.service.model.response.JobInfo; +import org.apache.nutch.service.model.response.JobInfo.State; + +@Path(value = "/job") +public class JobResource extends AbstractResource { + + /** + * Get job history + * @param crawlId + * @return A nested JSON object of all the jobs created + */ + @GET + @Path(value = "/") + @JacksonFeatures(serializationEnable = { SerializationFeature.INDENT_OUTPUT }) + public Collection<JobInfo> getJobs(@QueryParam("crawlId") String crawlId) { + return jobManager.list(crawlId, State.ANY); + } + + /** + * Get job info + * @param id Job ID + * @param crawlId Crawl ID + * @return A JSON object of job parameters + */ + @GET + @Path(value = "/{id}") + @JacksonFeatures(serializationEnable = { SerializationFeature.INDENT_OUTPUT }) + public JobInfo getInfo(@PathParam("id") String id, + @QueryParam("crawlId") String crawlId) { + return jobManager.get(crawlId, id); + } + + /** + * Stop Job + * @param id Job ID + * @param crawlId + * @return + */ + @GET + @Path(value = "/{id}/stop") + public boolean stop(@PathParam("id") String id, + @QueryParam("crawlId") String crawlId) { + return jobManager.stop(crawlId, id); + } + + + @GET + @Path(value = "/{id}/abort") + public boolean abort(@PathParam("id") String id, + @QueryParam("crawlId") String crawlId) { + return jobManager.abort(crawlId, id); + } + + /** + * Create a new job + * @param config The parameters of the job to create + * @return A JSON object of the job created with its details + */ + @POST + @Path(value = "/create") + @Consumes(MediaType.APPLICATION_JSON) + public JobInfo create(JobConfig config) { + if (config == null) { + throwBadRequestException("Job configuration is required!"); + } + return jobManager.create(config); + } +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/resources/ReaderResouce.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/resources/ReaderResouce.java b/nutch-core/src/main/java/org/apache/nutch/service/resources/ReaderResouce.java new file mode 100644 index 0000000..030999e --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/resources/ReaderResouce.java @@ -0,0 +1,177 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.resources; + +import java.util.HashMap; + +import javax.ws.rs.Consumes; +import javax.ws.rs.DefaultValue; +import javax.ws.rs.GET; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.Response.Status; + +import org.apache.nutch.service.NutchReader; +import org.apache.nutch.service.impl.LinkReader; +import org.apache.nutch.service.impl.NodeReader; +import org.apache.nutch.service.impl.SequenceReader; +import org.apache.nutch.service.model.request.ReaderConfig; + +/** + * The Reader endpoint enables a user to read sequence files, + * nodes and links from the Nutch webgraph. + * @author Sujen Shah + * + */ +@Path("/reader") +public class ReaderResouce { + + /** + * Read a sequence file + * @param readerConf + * @param nrows Number of rows to read. If not specified all rows will be read + * @param start Specify a starting line number to read the file from + * @param end The line number to read the file till + * @param count Boolean value. If true, this endpoint will return the number of lines in the line + * @return Appropriate HTTP response based on the query + */ + @Path("/sequence/read") + @POST + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response seqRead(ReaderConfig readerConf, + @DefaultValue("-1")@QueryParam("nrows") int nrows, + @DefaultValue("-1")@QueryParam("start") int start, + @QueryParam("end")int end, @QueryParam("count") boolean count) { + + NutchReader reader = new SequenceReader(); + String path = readerConf.getPath(); + return performRead(reader, path, nrows, start, end, count); + } + + /** + * Get Link Reader response schema + * @return JSON object specifying the schema of the responses returned by the Link Reader + */ + @Path("/link") + @GET + @Produces(MediaType.APPLICATION_JSON) + public Response linkRead() { + HashMap<String, String> schema = new HashMap<>(); + schema.put("key_url","string"); + schema.put("timestamp", "int"); + schema.put("score","float"); + schema.put("anchor","string"); + schema.put("linktype","string"); + schema.put("url","string"); + return Response.ok(schema).type(MediaType.APPLICATION_JSON).build(); + } + + /** + * Read link object + * @param readerConf + * @param nrows + * @param start + * @param end + * @param count + * @return + */ + @Path("/link/read") + @POST + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response linkRead(ReaderConfig readerConf, + @DefaultValue("-1")@QueryParam("nrows") int nrows, + @DefaultValue("-1")@QueryParam("start") int start, + @QueryParam("end") int end, @QueryParam("count") boolean count) { + + NutchReader reader = new LinkReader(); + String path = readerConf.getPath(); + return performRead(reader, path, nrows, start, end, count); + } + + /** + * Get schema of the Node object + * @return + */ + @Path("/node") + @GET + @Produces(MediaType.APPLICATION_JSON) + public Response nodeRead() { + HashMap<String, String> schema = new HashMap<>(); + schema.put("key_url","string"); + schema.put("num_inlinks", "int"); + schema.put("num_outlinks","int"); + schema.put("inlink_score","float"); + schema.put("outlink_score","float"); + schema.put("metadata","string"); + return Response.ok(schema).type(MediaType.APPLICATION_JSON).build(); + } + + + /** + * Read Node object as stored in the Nutch Webgraph + * @param readerConf + * @param nrows + * @param start + * @param end + * @param count + * @return + */ + @Path("/node/read") + @POST + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response nodeRead(ReaderConfig readerConf, + @DefaultValue("-1")@QueryParam("nrows") int nrows, + @DefaultValue("-1")@QueryParam("start") int start, + @QueryParam("end") int end, @QueryParam("count") boolean count) { + + NutchReader reader = new NodeReader(); + String path = readerConf.getPath(); + return performRead(reader, path, nrows, start, end, count); + } + + + private Response performRead(NutchReader reader, String path, + int nrows, int start, int end, boolean count) { + Object result; + try{ + if(count){ + result = reader.count(path); + return Response.ok(result).type(MediaType.TEXT_PLAIN).build(); + } + else if(start>-1 && end>0) { + result = reader.slice(path, start, end); + } + else if(nrows>-1) { + result = reader.head(path, nrows); + } + else { + result = reader.read(path); + } + return Response.ok(result).type(MediaType.APPLICATION_JSON).build(); + }catch(Exception e){ + return Response.status(Status.BAD_REQUEST).entity("File not found").build(); + } + } + +} http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/service/resources/SeedResource.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/main/java/org/apache/nutch/service/resources/SeedResource.java b/nutch-core/src/main/java/org/apache/nutch/service/resources/SeedResource.java new file mode 100644 index 0000000..5261139 --- /dev/null +++ b/nutch-core/src/main/java/org/apache/nutch/service/resources/SeedResource.java @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.service.resources; + +import static javax.ws.rs.core.Response.status; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Collection; + +import javax.ws.rs.Consumes; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.Response.Status; + +import org.apache.commons.collections.CollectionUtils; +import org.apache.nutch.service.model.request.SeedList; +import org.apache.nutch.service.model.request.SeedUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.io.Files; + +@Path("/seed") +public class SeedResource extends AbstractResource { + private static final Logger log = LoggerFactory + .getLogger(AdminResource.class); + + /** + * Method creates seed list file and returns temporary directory path + * @param seedList + * @return + */ + @POST + @Path("/create") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.TEXT_PLAIN) + public Response createSeedFile(SeedList seedList) { + if (seedList == null) { + return Response.status(Status.BAD_REQUEST) + .entity("Seed list cannot be empty!").build(); + } + File seedFile = createSeedFile(); + BufferedWriter writer = getWriter(seedFile); + + Collection<SeedUrl> seedUrls = seedList.getSeedUrls(); + if (CollectionUtils.isNotEmpty(seedUrls)) { + for (SeedUrl seedUrl : seedUrls) { + writeUrl(writer, seedUrl); + } + } + + return Response.ok().entity(seedFile.getParent()).build(); + } + + private void writeUrl(BufferedWriter writer, SeedUrl seedUrl) { + try { + writer.write(seedUrl.getUrl()); + writer.newLine(); + writer.flush(); + } catch (IOException e) { + throw handleException(e); + } + } + + private BufferedWriter getWriter(File seedFile) { + try { + return new BufferedWriter(new FileWriter(seedFile)); + } catch (FileNotFoundException e) { + throw handleException(e); + } catch (IOException e) { + throw handleException(e); + } + } + + private File createSeedFile() { + try { + return File.createTempFile("seed", ".txt", Files.createTempDir()); + } catch (IOException e) { + throw handleException(e); + } + } + + private RuntimeException handleException(Exception e) { + log.error("Cannot create seed file!", e); + return new WebApplicationException(status(Status.INTERNAL_SERVER_ERROR) + .entity("Cannot create seed file!").build()); + } + +}
