Author: lewismc Date: Mon Oct 31 10:49:17 2011 New Revision: 1195403 URL: http://svn.apache.org/viewvc?rev=1195403&view=rev Log: commit to address NUTCH-902 and update to changes.txt
Added: nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml nutch/branches/nutchgora/conf/gora-hbase-mapping.xml Modified: nutch/branches/nutchgora/CHANGES.txt nutch/branches/nutchgora/build.xml nutch/branches/nutchgora/conf/gora-sql-mapping.xml nutch/branches/nutchgora/conf/nutch-default.xml nutch/branches/nutchgora/ivy/ivy.xml Modified: nutch/branches/nutchgora/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1195403&r1=1195402&r2=1195403&view=diff ============================================================================== --- nutch/branches/nutchgora/CHANGES.txt (original) +++ nutch/branches/nutchgora/CHANGES.txt Mon Oct 31 10:49:17 2011 @@ -2,6 +2,8 @@ Nutch Change Log Release nutchgora - Current Development +* NUTCH-902 Add all necessary files and configuration so that nutch can be used with different backends out-of-the-box (lewismc) + * NUTCH-1081 & 1135 ant tests fail & Fix TestGoraStorage for Nutchgora (Ferdy via lewismc) * NUTCH-1156 building errors with gora-hbase as a backend; update ivy.xml to use correct dependancies (Ferdy via lewismc) Modified: nutch/branches/nutchgora/build.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1195403&r1=1195402&r2=1195403&view=diff ============================================================================== --- nutch/branches/nutchgora/build.xml (original) +++ nutch/branches/nutchgora/build.xml Mon Oct 31 10:49:17 2011 @@ -15,7 +15,7 @@ See the License for the specific language governing permissions and limitations under the License. --> -<project name="Nutch" default="runtime" xmlns:ivy="antlib:org.apache.ivy.ant" xmlns:artifact="antlib:org.apache.maven.artifact.ant"> +<project name="Nutchgora" default="runtime" xmlns:ivy="antlib:org.apache.ivy.ant" xmlns:artifact="antlib:org.apache.maven.artifact.ant"> <!-- Load all the default properties, and any the user wants --> <!-- to contribute (without having to type -D or edit this file --> Added: nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml?rev=1195403&view=auto ============================================================================== --- nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml (added) +++ nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml Mon Oct 31 10:49:17 2011 @@ -0,0 +1,43 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<gora-orm> + + <keyspace name="webpage" cluster="Test Cluster" host="localhost"> + <family name="p"/> + <family name="f"/> + <family name="sc" type="super"/> + </keyspace> + <class keyClass="java.lang.String" name="org.apache.nutch.storage.WebPage"> + + <!-- fetch fields --> + <field name="baseUrl" family="f" qualifier="bas"/> + <field name="status" family="f" qualifier="st"/> + <field name="prevFetchTime" family="f" qualifier="pts"/> + <field name="fetchTime" family="f" qualifier="ts"/> + <field name="fetchInterval" family="f" qualifier="fi"/> + <field name="retriesSinceFetch" family="f" qualifier="rsf"/> + <field name="reprUrl" family="f" qualifier="rpr"/> + <field name="content" family="f" qualifier="cnt"/> + <field name="contentType" family="f" qualifier="typ"/> + <field name="modifiedTime" family="f" qualifier="mod"/> + + <!-- parse fields --> + <field name="title" family="p" qualifier="t"/> + <field name="text" family="p" qualifier="c"/> + <field name="signature" family="p" qualifier="sig"/> + <field name="prevSignature" family="p" qualifier="psig"/> + + <!-- score fields --> + <field name="score" family="f" qualifier="s"/> + + <!-- super columns --> + <field name="markers" family="sc" qualifier="mk"/> + <field name="inlinks" family="sc" qualifier="il"/> + <field name="outlinks" family="sc" qualifier="ol"/> + <field name="metadata" family="sc" qualifier="mtdt"/> + <field name="headers" family="sc" qualifier="h"/> + <field name="parseStatus" family="sc" qualifier="pas"/> + <field name="protocolStatus" family="sc" qualifier="prs"/> + </class> + +</gora-orm> \ No newline at end of file Added: nutch/branches/nutchgora/conf/gora-hbase-mapping.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-hbase-mapping.xml?rev=1195403&view=auto ============================================================================== --- nutch/branches/nutchgora/conf/gora-hbase-mapping.xml (added) +++ nutch/branches/nutchgora/conf/gora-hbase-mapping.xml Mon Oct 31 10:49:17 2011 @@ -0,0 +1,46 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<gora-orm> + + <table name="webpage"> + <family name="p"/> <!-- This can also have params like compression, bloom filters --> + <family name="f"/> + <family name="s"/> + <family name="il"/> + <family name="ol"/> + <family name="h"/> + <family name="mtdt"/> + <family name="mk"/> + </table> + <class table="webpage" keyClass="java.lang.String" name="org.apache.nutch.storage.WebPage"> + + <!-- fetch fields --> + <field name="baseUrl" family="f" qualifier="bas"/> + <field name="status" family="f" qualifier="st"/> + <field name="prevFetchTime" family="f" qualifier="pts"/> + <field name="fetchTime" family="f" qualifier="ts"/> + <field name="fetchInterval" family="f" qualifier="fi"/> + <field name="retriesSinceFetch" family="f" qualifier="rsf"/> + <field name="reprUrl" family="f" qualifier="rpr"/> + <field name="content" family="f" qualifier="cnt"/> + <field name="contentType" family="f" qualifier="typ"/> + <field name="protocolStatus" family="f" qualifier="prot"/> + <field name="modifiedTime" family="f" qualifier="mod"/> + + <!-- parse fields --> + <field name="title" family="p" qualifier="t"/> + <field name="text" family="p" qualifier="c"/> + <field name="parseStatus" family="p" qualifier="st"/> + <field name="signature" family="p" qualifier="sig"/> + <field name="prevSignature" family="p" qualifier="psig"/> + + <!-- score fields --> + <field name="score" family="s" qualifier="s"/> + <field name="headers" family="h"/> + <field name="inlinks" family="il"/> + <field name="outlinks" family="ol"/> + <field name="metadata" family="mtdt"/> + <field name="markers" family="mk"/> + </class> + +</gora-orm> \ No newline at end of file Modified: nutch/branches/nutchgora/conf/gora-sql-mapping.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-sql-mapping.xml?rev=1195403&r1=1195402&r2=1195403&view=diff ============================================================================== --- nutch/branches/nutchgora/conf/gora-sql-mapping.xml (original) +++ nutch/branches/nutchgora/conf/gora-sql-mapping.xml Mon Oct 31 10:49:17 2011 @@ -25,15 +25,10 @@ <!-- score fields --> <field name="score" column="score"/> - <field name="headers" column="headers"/> - <field name="inlinks" column="inlinks"/> - <field name="outlinks" column="outlinks"/> - <field name="metadata" column="metadata"/> - <field name="markers" column="markers"/> </class> Modified: nutch/branches/nutchgora/conf/nutch-default.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/nutch-default.xml?rev=1195403&r1=1195402&r2=1195403&view=diff ============================================================================== --- nutch/branches/nutchgora/conf/nutch-default.xml (original) +++ nutch/branches/nutchgora/conf/nutch-default.xml Mon Oct 31 10:49:17 2011 @@ -1065,14 +1065,16 @@ <property> <name>storage.data.store.class</name> <value>org.apache.gora.sql.store.SqlStore</value> - <description>Default class for storing data</description> + <description>Default Gora class for storing data in the SQL Store. + A DataStore implementation for RDBMS with a SQL interface. SqlStore + uses JDBC drivers to communicate with the DB.</description> </property> <!-- <property> <name>storage.data.store.class</name> <value>org.apache.gora.cassandra.store.CassandraStore</value> - <description>Class for storing data in Apache Cassandra</description> + <description>Gora class for storing data in Apache Cassandra</description> </property> --> @@ -1080,7 +1082,7 @@ <property> <name>storage.data.store.class</name> <value>org.apache.gora.hbase.store.HBaseStore</value> - <description>Class for storing data in Apache HBase</description> + <description>Gora class for storing data in Apache HBase</description> </property> --> Modified: nutch/branches/nutchgora/ivy/ivy.xml URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/ivy/ivy.xml?rev=1195403&r1=1195402&r2=1195403&view=diff ============================================================================== --- nutch/branches/nutchgora/ivy/ivy.xml (original) +++ nutch/branches/nutchgora/ivy/ivy.xml Mon Oct 31 10:49:17 2011 @@ -114,6 +114,16 @@ </dependency> --> +<!-- + Uncomment this to use Cassandra as Gora backend. +--> +<!-- + <dependency org="org.apache.gora" name="gora-cassandra" rev="0.1.1-incubating" conf="*->compile"> + </dependency> + // Should be another dependency here??? + </dependency> +--> + <!--global exclusion--> <exclude module="ant" /> <exclude module="jmxtools" />