Author: snagel
Date: Mon Nov 16 20:29:33 2015
New Revision: 1714655
URL: http://svn.apache.org/viewvc?rev=1714655&view=rev
Log:
NUTCH-2130 copyField rawcontent creates error within schema.xml
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/conf/schema.xml
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1714655&r1=1714654&r2=1714655&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Nov 16 20:29:33 2015
@@ -3,6 +3,8 @@ Nutch Change Log
Nutch 2.3.1 Release 22092015 (ddmmyyyy)
Release Report - http://s.apache.org/nutch_2.3.1
+* NUTCH-2130 copyField rawcontent creates error within schema.xml (Sherban
Drulea, lewismc, snagel)
+
* NUTCH-2018 Ensure that the Docker containers for Nutch 2.X are part of the
Release Management Documentation (lewismc)
* NUTCH-2105 Update Nutch Cassandra Dockerfile to work with Gora Nutch 2.3.1
(lewismc)
Modified: nutch/branches/2.x/conf/schema.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/conf/schema.xml?rev=1714655&r1=1714654&r2=1714655&view=diff
==============================================================================
--- nutch/branches/2.x/conf/schema.xml (original)
+++ nutch/branches/2.x/conf/schema.xml Mon Nov 16 20:29:33 2015
@@ -32,6 +32,7 @@
<!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
<fieldType name="string" class="solr.StrField" sortMissingLast="true"
omitNorms="true"/>
+ <fieldtype name="binary" class="solr.BinaryField"/>
<!--
Default numeric field types. For faster range queries, consider the
tint/tfloat/tlong/tdouble types.
@@ -357,6 +358,12 @@
<!-- fields for tld plugin -->
<field name="tld" type="string" stored="false" indexed="false"/>
+
+ <!-- fields for index-html plugin
+ Note: although raw document content may be binary,
+ index-html adds a String to the index field -->
+ <field name="rawcontent" type="string" stored="true" indexed="false"/>
+
</fields>
<uniqueKey>id</uniqueKey>
<defaultSearchField>text</defaultSearchField>
@@ -367,7 +374,6 @@
or to add multiple fields to the same field for easier/faster
searching. -->
<copyField source="content" dest="text"/>
- <copyField source="rawcontent" dest="text"/>
<copyField source="url" dest="text"/>
<copyField source="title" dest="text"/>
<copyField source="anchor" dest="text"/>