Author: lewismc
Date: Sun Apr 15 19:00:19 2012
New Revision: 1326400

URL: http://svn.apache.org/viewvc?rev=1326400&view=rev
Log:
commit to address NUTCH-1333 and update to CHANGES.txt

Added:
    nutch/branches/nutchgora/conf/gora-accumulo-mapping.xml
Modified:
    nutch/branches/nutchgora/CHANGES.txt
    nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml
    nutch/branches/nutchgora/conf/gora-hbase-mapping.xml
    nutch/branches/nutchgora/conf/gora-sql-mapping.xml
    nutch/branches/nutchgora/conf/gora.properties
    nutch/branches/nutchgora/conf/log4j.properties
    nutch/branches/nutchgora/conf/nutch-default.xml
    nutch/branches/nutchgora/conf/suffix-urlfilter.txt.template
    nutch/branches/nutchgora/ivy/ivy.xml

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Sun Apr 15 19:00:19 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release nutchgora - Current Development
 
+* NUTCH-1333 Introduce AvroStore, DataFileAvroStore and Accumulo Datastore 
implementations (lewismc)
+
 * NUTCH-1312 Nutchgora to send HTTP-accept header (ferdy)
 
 * NUTCH-1311 Add response headers to datastore for the protocol-httpclient 
plugin (Dan Rosher via ferdy)

Added: nutch/branches/nutchgora/conf/gora-accumulo-mapping.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-accumulo-mapping.xml?rev=1326400&view=auto
==============================================================================
--- nutch/branches/nutchgora/conf/gora-accumulo-mapping.xml (added)
+++ nutch/branches/nutchgora/conf/gora-accumulo-mapping.xml Sun Apr 15 19:00:19 
2012
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<gora-orm>
+    
+    <table name="webpage">
+        <family name="p" maxVersions="1"/> <!-- This can also have params like 
compression, bloom filters -->
+        <family name="f" maxVersions="1"/>
+        <family name="s" maxVersions="1"/>
+        <family name="il" maxVersions="1"/>
+        <family name="ol" maxVersions="1"/>
+        <family name="h" maxVersions="1"/>
+        <family name="mtdt" maxVersions="1"/>
+        <family name="mk" maxVersions="1"/>
+    </table>
+    <class table="webpage" keyClass="java.lang.String" 
name="org.apache.nutch.storage.WebPage">
+        
+        <!-- fetch fields                                       -->
+        <field name="baseUrl" family="f" qualifier="bas"/>
+        <field name="status" family="f" qualifier="st"/>
+        <field name="prevFetchTime" family="f" qualifier="pts"/>
+        <field name="fetchTime" family="f" qualifier="ts"/>
+        <field name="fetchInterval" family="f" qualifier="fi"/>
+        <field name="retriesSinceFetch" family="f" qualifier="rsf"/>
+        <field name="reprUrl" family="f" qualifier="rpr"/>
+        <field name="content" family="f" qualifier="cnt"/>
+        <field name="contentType" family="f" qualifier="typ"/>
+        <field name="protocolStatus" family="f" qualifier="prot"/>
+        <field name="modifiedTime" family="f" qualifier="mod"/>
+        
+        <!-- parse fields                                       -->
+        <field name="title" family="p" qualifier="t"/>
+        <field name="text" family="p" qualifier="c"/>
+        <field name="parseStatus" family="p" qualifier="st"/>
+        <field name="signature" family="p" qualifier="sig"/>
+        <field name="prevSignature" family="p" qualifier="psig"/>
+        
+        <!-- score fields                                       -->
+        <field name="score" family="s" qualifier="s"/>
+        <field name="headers" family="h"/>
+        <field name="inlinks" family="il"/>
+        <field name="outlinks" family="ol"/>
+        <field name="metadata" family="mtdt"/>
+        <field name="markers" family="mk"/>
+    </class>
+    
+</gora-orm>

Modified: nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml (original)
+++ nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml Sun Apr 15 
19:00:19 2012
@@ -1,5 +1,20 @@
 <?xml version="1.0" encoding="UTF-8"?>
-
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
 <gora-orm>
     
     <keyspace name="webpage" cluster="Test Cluster" host="localhost">
@@ -40,4 +55,4 @@
         <field name="protocolStatus" family="sc" qualifier="prs"/>
     </class>
     
-</gora-orm>
\ No newline at end of file
+</gora-orm>

Modified: nutch/branches/nutchgora/conf/gora-hbase-mapping.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-hbase-mapping.xml?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/gora-hbase-mapping.xml (original)
+++ nutch/branches/nutchgora/conf/gora-hbase-mapping.xml Sun Apr 15 19:00:19 
2012
@@ -1,5 +1,20 @@
 <?xml version="1.0" encoding="UTF-8"?>
-
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
 <gora-orm>
     
     <table name="webpage">
@@ -43,4 +58,4 @@
         <field name="markers" family="mk"/>
     </class>
     
-</gora-orm>
\ No newline at end of file
+</gora-orm>

Modified: nutch/branches/nutchgora/conf/gora-sql-mapping.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-sql-mapping.xml?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/gora-sql-mapping.xml (original)
+++ nutch/branches/nutchgora/conf/gora-sql-mapping.xml Sun Apr 15 19:00:19 2012
@@ -1,5 +1,20 @@
 <?xml version="1.0" encoding="UTF-8"?>
-
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
 <gora-orm>
 
 <class name="org.apache.nutch.storage.WebPage" keyClass="java.lang.String" 
table="webpage">

Modified: nutch/branches/nutchgora/conf/gora.properties
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora.properties?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/gora.properties (original)
+++ nutch/branches/nutchgora/conf/gora.properties Sun Apr 15 19:00:19 2012
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 #gora.datastore.default=org.apache.gora.mock.store.MockDataStore
 #gora.datastore.autocreateschema=true
 
@@ -7,8 +22,8 @@
 
 gora.sqlstore.jdbc.driver=org.hsqldb.jdbcDriver
 gora.sqlstore.jdbc.url=jdbc:hsqldb:hsql://localhost/nutchtest
-# gora.sqlstore.jdbc.user=
-# gora.sqlstore.jdbc.password=
+gora.sqlstore.jdbc.user=SA
+gora.sqlstore.jdbc.password=
 
 ################################
 # Default AvroStore properties #
@@ -50,3 +65,13 @@ gora.sqlstore.jdbc.url=jdbc:hsqldb:hsql:
 
 # gora.memstore.###=
 
+############################
+# AccumuloStore properties #
+############################
+#gora.datastore.default=org.apache.gora.accumulo.store.AccumuloStore
+#gora.datastore.accumulo.mock=true
+#gora.datastore.accumulo.instance=a14
+#gora.datastore.accumulo.zookeepers=localhost
+#gora.datastore.accumulo.user=root
+#gora.datastore.accumulo.password=secret
+

Modified: nutch/branches/nutchgora/conf/log4j.properties
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/log4j.properties?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/log4j.properties (original)
+++ nutch/branches/nutchgora/conf/log4j.properties Sun Apr 15 19:00:19 2012
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Define some default values that can be overridden by system properties
 hadoop.log.dir=.
 hadoop.log.file=hadoop.log

Modified: nutch/branches/nutchgora/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/nutch-default.xml?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/nutch-default.xml (original)
+++ nutch/branches/nutchgora/conf/nutch-default.xml Sun Apr 15 19:00:19 2012
@@ -1110,6 +1110,41 @@
 </property>
 -->
 
+<!--
+<property>
+  <name>storage.data.store.class</name>
+  <value>org.apache.gora.hbase.store.AccumuloStore</value>
+  <description>Gora class for storing data in Apache Accumulo</description>
+</property>
+-->
+
+<!--
+<property>
+  <name>storage.data.store.class</name>
+  <value>org.apache.gora.hbase.store.AvroStore</value>
+  <description>Gora class for storing data in Apache Avro</description>
+</property>
+-->
+
+<!--
+<property>
+  <name>storage.data.store.class</name>
+  <value>org.apache.gora.hbase.store.DataFileAvroStore</value>
+  <description>Gora class for storing data in Apache Avro. DataFileAvroStore 
is 
+  a file based store which uses Avro's DataFile{Writer,Reader}'s as a backend. 
+  This datastore supports mapreduce.</description>
+</property>
+-->
+
+<!--
+<property>
+  <name>storage.data.store.class</name>
+  <value>org.apache.gora.hbase.store.MemStore</value>
+  <description>Gora class for storing data in a Memory based {@link DataStore} 
+  implementation for tests.</description>
+</property>
+-->
+
 <property>
   <name>storage.schema</name>
   <value>webpage</value>

Modified: nutch/branches/nutchgora/conf/suffix-urlfilter.txt.template
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/suffix-urlfilter.txt.template?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/suffix-urlfilter.txt.template (original)
+++ nutch/branches/nutchgora/conf/suffix-urlfilter.txt.template Sun Apr 15 
19:00:19 2012
@@ -1,3 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 # config file for urlfilter-suffix plugin
 
 # case-insensitive, allow unknown suffixes

Modified: nutch/branches/nutchgora/ivy/ivy.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/ivy/ivy.xml?rev=1326400&r1=1326399&r2=1326400&view=diff
==============================================================================
--- nutch/branches/nutchgora/ivy/ivy.xml (original)
+++ nutch/branches/nutchgora/ivy/ivy.xml Sun Apr 15 19:00:19 2012
@@ -110,15 +110,17 @@
           <exclude org="hsqldb" name="hsqldb" />
        </dependency>
 -->
-
+<!--
+       Uncomment this to use Accumulo as Gora backend.
+-->
+<!--
+       <dependency org="org.apache.gora" name="gora-accumulo" 
rev="0.1.1-incubating" conf="*->default" />
+-->
 <!--
        Uncomment this to use Cassandra as Gora backend. 
 -->
 <!--
-               <dependency org="org.apache.gora" name="gora-cassandra" 
rev="0.1.1-incubating" conf="*->compile">
-               </dependency>
-               // Should be another dependency here???
-               </dependency>
+               <dependency org="org.apache.gora" name="gora-cassandra" 
rev="0.1.1-incubating" conf="*->default" />
 -->
 
              <!--global exclusion-->


Reply via email to