After I run the crawl commands I can verify the data is in HBase. However, when 
I run "bin/nutch index elasticsearch -all” there is no data indexed into ES.

Nutch Version: 2.3.1
ES Version: 1.4.2 (I have tested latest ES version and v1.7.5 as well)
Java JDK v7

I have also updated the ES version in my 
$NUTCH_HOME/src/plugin/indexer-elastic/ivy.xml & 
$NUTCH_HOME/src/plugin/indexer-elastic/plugin.xml

my nutch-site.xml:
<configuration>
 <property>
    <name>storage.data.store.class</name>
    <value>org.apache.gora.hbase.store.HBaseStore</value>
    <description>Default class for storing data</description>
  </property>
  <property>
    <name>http.agent.name</name>
    <value>My Crawler</value>
  </property>
  <property>
    <name>plugin.includes</name>
    
<value>protocol-(http|httpclient)|urlfilter-regex|index-(basic|more)|query-(basic|site|url|lang)|indexer-elastic|nutch-extensionpoints|parse-(text|html|msexcel|msword|mspowerpoint|pdf)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)|parse-(html|tika|metatags)|index-(basic|anchor|more|metadata)</value>
  </property>
  <property>
    <name>elastic.host</name>
    <value>localhost</value>
  </property>
 <property>
    <name>elastic.port</name>
    <value>9300</value>
  </property>
  <property>
    <name>elastic.cluster</name>
    <value>searchcluster</value>
  </property>
  <property>
    <name>elastic.index</name>
    <value>websearch</value>
  </property>
  <property>
    <name>parser.character.encoding.default</name>
    <value>utf-8</value>
  </property>
  <property>
    <name>http.content.limit</name>
    <value>6553600</value>
  </property>
</configuration>

Thanks in advance for any suggestions.



Reply via email to