Author: lewismc
Date: Sun Apr 15 19:00:19 2012
New Revision: 1326400
URL: http://svn.apache.org/viewvc?rev=1326400&view=rev
Log:
commit to address NUTCH-1333 and update to CHANGES.txt
Added:
nutch/branches/nutchgora/conf/gora-accumulo-mapping.xml
Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml
nutch/branches/nutchgora/conf/gora-hbase-mapping.xml
nutch/branches/nutchgora/conf/gora-sql-mapping.xml
nutch/branches/nutchgora/conf/gora.properties
nutch/branches/nutchgora/conf/log4j.properties
nutch/branches/nutchgora/conf/nutch-default.xml
nutch/branches/nutchgora/conf/suffix-urlfilter.txt.template
nutch/branches/nutchgora/ivy/ivy.xml
Modified: nutch/branches/nutchgora/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Sun Apr 15 19:00:19 2012
@@ -2,6 +2,8 @@ Nutch Change Log
Release nutchgora - Current Development
+* NUTCH-1333 Introduce AvroStore, DataFileAvroStore and Accumulo Datastore
implementations (lewismc)
+
* NUTCH-1312 Nutchgora to send HTTP-accept header (ferdy)
* NUTCH-1311 Add response headers to datastore for the protocol-httpclient
plugin (Dan Rosher via ferdy)
Added: nutch/branches/nutchgora/conf/gora-accumulo-mapping.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-accumulo-mapping.xml?rev=1326400&view=auto
==============================================================================
--- nutch/branches/nutchgora/conf/gora-accumulo-mapping.xml (added)
+++ nutch/branches/nutchgora/conf/gora-accumulo-mapping.xml Sun Apr 15 19:00:19
2012
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<gora-orm>
+
+ <table name="webpage">
+ <family name="p" maxVersions="1"/> <!-- This can also have params like
compression, bloom filters -->
+ <family name="f" maxVersions="1"/>
+ <family name="s" maxVersions="1"/>
+ <family name="il" maxVersions="1"/>
+ <family name="ol" maxVersions="1"/>
+ <family name="h" maxVersions="1"/>
+ <family name="mtdt" maxVersions="1"/>
+ <family name="mk" maxVersions="1"/>
+ </table>
+ <class table="webpage" keyClass="java.lang.String"
name="org.apache.nutch.storage.WebPage">
+
+ <!-- fetch fields -->
+ <field name="baseUrl" family="f" qualifier="bas"/>
+ <field name="status" family="f" qualifier="st"/>
+ <field name="prevFetchTime" family="f" qualifier="pts"/>
+ <field name="fetchTime" family="f" qualifier="ts"/>
+ <field name="fetchInterval" family="f" qualifier="fi"/>
+ <field name="retriesSinceFetch" family="f" qualifier="rsf"/>
+ <field name="reprUrl" family="f" qualifier="rpr"/>
+ <field name="content" family="f" qualifier="cnt"/>
+ <field name="contentType" family="f" qualifier="typ"/>
+ <field name="protocolStatus" family="f" qualifier="prot"/>
+ <field name="modifiedTime" family="f" qualifier="mod"/>
+
+ <!-- parse fields -->
+ <field name="title" family="p" qualifier="t"/>
+ <field name="text" family="p" qualifier="c"/>
+ <field name="parseStatus" family="p" qualifier="st"/>
+ <field name="signature" family="p" qualifier="sig"/>
+ <field name="prevSignature" family="p" qualifier="psig"/>
+
+ <!-- score fields -->
+ <field name="score" family="s" qualifier="s"/>
+ <field name="headers" family="h"/>
+ <field name="inlinks" family="il"/>
+ <field name="outlinks" family="ol"/>
+ <field name="metadata" family="mtdt"/>
+ <field name="markers" family="mk"/>
+ </class>
+
+</gora-orm>
Modified: nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml (original)
+++ nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml Sun Apr 15
19:00:19 2012
@@ -1,5 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
-
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
<gora-orm>
<keyspace name="webpage" cluster="Test Cluster" host="localhost">
@@ -40,4 +55,4 @@
<field name="protocolStatus" family="sc" qualifier="prs"/>
</class>
-</gora-orm>
\ No newline at end of file
+</gora-orm>
Modified: nutch/branches/nutchgora/conf/gora-hbase-mapping.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-hbase-mapping.xml?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/gora-hbase-mapping.xml (original)
+++ nutch/branches/nutchgora/conf/gora-hbase-mapping.xml Sun Apr 15 19:00:19
2012
@@ -1,5 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
-
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
<gora-orm>
<table name="webpage">
@@ -43,4 +58,4 @@
<field name="markers" family="mk"/>
</class>
-</gora-orm>
\ No newline at end of file
+</gora-orm>
Modified: nutch/branches/nutchgora/conf/gora-sql-mapping.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-sql-mapping.xml?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/gora-sql-mapping.xml (original)
+++ nutch/branches/nutchgora/conf/gora-sql-mapping.xml Sun Apr 15 19:00:19 2012
@@ -1,5 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
-
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
<gora-orm>
<class name="org.apache.nutch.storage.WebPage" keyClass="java.lang.String"
table="webpage">
Modified: nutch/branches/nutchgora/conf/gora.properties
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora.properties?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/gora.properties (original)
+++ nutch/branches/nutchgora/conf/gora.properties Sun Apr 15 19:00:19 2012
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
#gora.datastore.default=org.apache.gora.mock.store.MockDataStore
#gora.datastore.autocreateschema=true
@@ -7,8 +22,8 @@
gora.sqlstore.jdbc.driver=org.hsqldb.jdbcDriver
gora.sqlstore.jdbc.url=jdbc:hsqldb:hsql://localhost/nutchtest
-# gora.sqlstore.jdbc.user=
-# gora.sqlstore.jdbc.password=
+gora.sqlstore.jdbc.user=SA
+gora.sqlstore.jdbc.password=
################################
# Default AvroStore properties #
@@ -50,3 +65,13 @@ gora.sqlstore.jdbc.url=jdbc:hsqldb:hsql:
# gora.memstore.###=
+############################
+# AccumuloStore properties #
+############################
+#gora.datastore.default=org.apache.gora.accumulo.store.AccumuloStore
+#gora.datastore.accumulo.mock=true
+#gora.datastore.accumulo.instance=a14
+#gora.datastore.accumulo.zookeepers=localhost
+#gora.datastore.accumulo.user=root
+#gora.datastore.accumulo.password=secret
+
Modified: nutch/branches/nutchgora/conf/log4j.properties
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/log4j.properties?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/log4j.properties (original)
+++ nutch/branches/nutchgora/conf/log4j.properties Sun Apr 15 19:00:19 2012
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
# Define some default values that can be overridden by system properties
hadoop.log.dir=.
hadoop.log.file=hadoop.log
Modified: nutch/branches/nutchgora/conf/nutch-default.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/nutch-default.xml?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/nutch-default.xml (original)
+++ nutch/branches/nutchgora/conf/nutch-default.xml Sun Apr 15 19:00:19 2012
@@ -1110,6 +1110,41 @@
</property>
-->
+<!--
+<property>
+ <name>storage.data.store.class</name>
+ <value>org.apache.gora.hbase.store.AccumuloStore</value>
+ <description>Gora class for storing data in Apache Accumulo</description>
+</property>
+-->
+
+<!--
+<property>
+ <name>storage.data.store.class</name>
+ <value>org.apache.gora.hbase.store.AvroStore</value>
+ <description>Gora class for storing data in Apache Avro</description>
+</property>
+-->
+
+<!--
+<property>
+ <name>storage.data.store.class</name>
+ <value>org.apache.gora.hbase.store.DataFileAvroStore</value>
+ <description>Gora class for storing data in Apache Avro. DataFileAvroStore
is
+ a file based store which uses Avro's DataFile{Writer,Reader}'s as a backend.
+ This datastore supports mapreduce.</description>
+</property>
+-->
+
+<!--
+<property>
+ <name>storage.data.store.class</name>
+ <value>org.apache.gora.hbase.store.MemStore</value>
+ <description>Gora class for storing data in a Memory based {@link DataStore}
+ implementation for tests.</description>
+</property>
+-->
+
<property>
<name>storage.schema</name>
<value>webpage</value>
Modified: nutch/branches/nutchgora/conf/suffix-urlfilter.txt.template
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/suffix-urlfilter.txt.template?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/suffix-urlfilter.txt.template (original)
+++ nutch/branches/nutchgora/conf/suffix-urlfilter.txt.template Sun Apr 15
19:00:19 2012
@@ -1,3 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
# config file for urlfilter-suffix plugin
# case-insensitive, allow unknown suffixes
Modified: nutch/branches/nutchgora/ivy/ivy.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/nutchgora/ivy/ivy.xml?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/ivy/ivy.xml (original)
+++ nutch/branches/nutchgora/ivy/ivy.xml Sun Apr 15 19:00:19 2012
@@ -110,15 +110,17 @@
<exclude org="hsqldb" name="hsqldb" />
</dependency>
-->
-
+<!--
+ Uncomment this to use Accumulo as Gora backend.
+-->
+<!--
+ <dependency org="org.apache.gora" name="gora-accumulo"
rev="0.1.1-incubating" conf="*->default" />
+-->
<!--
Uncomment this to use Cassandra as Gora backend.
-->
<!--
- <dependency org="org.apache.gora" name="gora-cassandra"
rev="0.1.1-incubating" conf="*->compile">
- </dependency>
- // Should be another dependency here???
- </dependency>
+ <dependency org="org.apache.gora" name="gora-cassandra"
rev="0.1.1-incubating" conf="*->default" />
-->
<!--global exclusion-->