Author: markus
Date: Mon Feb 10 11:45:30 2014
New Revision: 1566586
URL: http://svn.apache.org/r1566586
Log:
NUTCH-1707 DummyIndexingWriter
Added:
nutch/trunk/src/plugin/indexer-dummy/
nutch/trunk/src/plugin/indexer-dummy/build.xml
nutch/trunk/src/plugin/indexer-dummy/ivy.xml
nutch/trunk/src/plugin/indexer-dummy/plugin.xml
nutch/trunk/src/plugin/indexer-dummy/src/
nutch/trunk/src/plugin/indexer-dummy/src/java/
nutch/trunk/src/plugin/indexer-dummy/src/java/org/
nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/
nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/
nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/
nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/
nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java
Modified:
nutch/trunk/src/plugin/build.xml
Modified: nutch/trunk/src/plugin/build.xml
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/build.xml?rev=1566586&r1=1566585&r2=1566586&view=diff
==============================================================================
--- nutch/trunk/src/plugin/build.xml (original)
+++ nutch/trunk/src/plugin/build.xml Mon Feb 10 11:45:30 2014
@@ -34,6 +34,7 @@
<ant dir="index-more" target="deploy"/>
<ant dir="index-static" target="deploy"/>
<ant dir="index-metadata" target="deploy"/>
+ <ant dir="indexer-dummy" target="deploy"/>
<ant dir="indexer-elastic" target="deploy"/>
<ant dir="indexer-solr" target="deploy"/>
<ant dir="language-identifier" target="deploy"/>
@@ -123,6 +124,7 @@
<ant dir="index-more" target="clean"/>
<ant dir="index-static" target="clean"/>
<ant dir="index-metadata" target="clean"/>
+ <ant dir="indexer-dummy" target="clean"/>
<ant dir="indexer-elastic" target="clean"/>
<ant dir="indexer-solr" target="clean"/>
<ant dir="language-identifier" target="clean"/>
Added: nutch/trunk/src/plugin/indexer-dummy/build.xml
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/indexer-dummy/build.xml?rev=1566586&view=auto
==============================================================================
--- nutch/trunk/src/plugin/indexer-dummy/build.xml (added)
+++ nutch/trunk/src/plugin/indexer-dummy/build.xml Mon Feb 10 11:45:30 2014
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="indexer-dummy" default="jar-core">
+
+ <import file="../build-plugin.xml" />
+
+</project>
Added: nutch/trunk/src/plugin/indexer-dummy/ivy.xml
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/indexer-dummy/ivy.xml?rev=1566586&view=auto
==============================================================================
--- nutch/trunk/src/plugin/indexer-dummy/ivy.xml (added)
+++ nutch/trunk/src/plugin/indexer-dummy/ivy.xml Mon Feb 10 11:45:30 2014
@@ -0,0 +1,41 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<ivy-module version="1.0">
+ <info organisation="org.apache.nutch" module="${ant.project.name}">
+ <license name="Apache 2.0"/>
+ <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
+ <description>
+ Apache Nutch
+ </description>
+ </info>
+
+ <configurations>
+ <include file="../../..//ivy/ivy-configurations.xml"/>
+ </configurations>
+
+ <publications>
+ <!--get the artifact from our module name-->
+ <artifact conf="master"/>
+ </publications>
+
+ <dependencies>
+ </dependencies>
+
+</ivy-module>
Added: nutch/trunk/src/plugin/indexer-dummy/plugin.xml
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/indexer-dummy/plugin.xml?rev=1566586&view=auto
==============================================================================
--- nutch/trunk/src/plugin/indexer-dummy/plugin.xml (added)
+++ nutch/trunk/src/plugin/indexer-dummy/plugin.xml Mon Feb 10 11:45:30 2014
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<plugin id="indexer-dummy" name="DummyIndexWriter" version="1.0.0"
+ provider-name="nutch.apache.org">
+
+ <runtime>
+ <library name="indexer-dummy.jar">
+ <export name="*" />
+ </library>
+ </runtime>
+
+ <requires>
+ <import plugin="nutch-extensionpoints" />
+ </requires>
+
+ <extension id="org.apache.nutch.indexer.dummy"
+ name="Dummy Index Writer"
+ point="org.apache.nutch.indexer.IndexWriter">
+ <implementation id="DummyIndexWriter"
+ class="org.apache.nutch.indexwriter.dummy.DummyIndexWriter" />
+ </extension>
+
+</plugin>
Added:
nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java?rev=1566586&view=auto
==============================================================================
---
nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java
(added)
+++
nutch/trunk/src/plugin/indexer-dummy/src/java/org/apache/nutch/indexwriter/dummy/DummyIndexWriter.java
Mon Feb 10 11:45:30 2014
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexwriter.dummy;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.FileWriter;
+import java.io.Writer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.nutch.indexer.IndexWriter;
+import org.apache.nutch.indexer.IndexerMapReduce;
+import org.apache.nutch.indexer.NutchDocument;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * DummyIndexWriter. This pluggable indexer writes <action>\t<url>\n lines to a
+ * plain text file for debugging purposes. Possible actions are delete, update
+ * and add.
+ */
+public class DummyIndexWriter implements IndexWriter {
+ public static final Logger LOG =
LoggerFactory.getLogger(DummyIndexWriter.class);
+ private Configuration config;
+ private Writer writer;
+ private boolean delete = false;
+
+ public void open(JobConf job, String name) throws IOException {
+ delete = job.getBoolean(IndexerMapReduce.INDEXER_DELETE, false);
+ }
+
+ @Override
+ public void delete(String key) throws IOException {
+ if (delete) {
+ writer.write("delete\t" + key + "\n");
+ }
+ }
+
+ @Override
+ public void update(NutchDocument doc) throws IOException {
+ writer.write("update\t" + doc.getFieldValue("url") + "\n");
+ }
+
+ @Override
+ public void write(NutchDocument doc) throws IOException {
+ writer.write("add\t" + doc.getFieldValue("url") + "\n");
+ }
+
+ public void close() throws IOException {
+ writer.flush();
+ writer.close();
+ }
+
+ @Override
+ public void commit() throws IOException {
+ writer.write("commit\n");
+ }
+
+ @Override
+ public Configuration getConf() {
+ return config;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ config = conf;
+ String path = conf.get("dummy.path");
+ if (path == null) {
+ String message = "Missing path. Should be set via -Ddummy.path";
+ message+="\n"+describe();
+ LOG.error(message);
+ throw new RuntimeException(message);
+ }
+
+ try {
+ writer = new BufferedWriter(new FileWriter(conf.get("dummy.path")));
+ } catch (IOException e) {}
+ }
+
+ public String describe(){
+ StringBuffer sb = new StringBuffer("DummyIndexWriter\n");
+ sb.append("\t").append("dummy.path : Path of the file to write to
(mandatory)\n");
+ return sb.toString();
+ }
+}