i'm using solr 4.3 which i just downloaded today and am using only jars that 
came with it. i have enabled the dataimporter and it runs without error. but 
the field "path" (included in schema.xml) and "text" (file content) aren't 
indexed. what am i doing wrong?

solr-path: C:\ColdFusion10\cfusion\jetty-new
collection-path: C:\ColdFusion10\cfusion\jetty-new\solr\collection1
pdf-doc-path: C:\web\development\tkb\internet\public


data-config.xml:

<dataConfig>
        <dataSource type="BinFileDataSource" name="data"/>
        <dataSource type="BinURLDataSource" name="dataUrl"/>
        <dataSource type="URLDataSource" 
baseUrl="http://127.0.0.1/tkb/internet/"; name="main"/>
<document>
        <entity name="rec" processor="XPathEntityProcessor" 
url="docImportUrl.xml" forEach="/albums/album" dataSource="main"> <!--

transformer="script:GenerateId"-->
                <field column="title" xpath="//title" />
                <field column="id" xpath="//file" />
                <field column="path" xpath="//path" />
                <field column="Author" xpath="//author" />
                
                <!-- <field column="tstamp">2013-07-05T14:59:46.889Z</field> -->
                
                <entity name="tika" processor="TikaEntityProcessor" 
url="../../../../../web/development/tkb/internet/public/${rec.path}/${rec.id}" 

dataSource="data" >
                                <field column="text" />
                        
                </entity>
        </entity>
</document>
</dataConfig>


docImportUrl.xml:

<?xml version="1.0" encoding="utf-8"?>
<albums>
        <album>
                <author>Peter Z.</author>
                <title>Beratungsseminar kundenbrief</title>
                <description>wie kommuniziert man</description>
                <file>0226520141_e-banking_Checkliste_CLX.Sentinel.pdf</file>
                <path>download/online</path>
        </album>
        <album>
                <author>Marcel X.</author>
                <title>kuchen backen</title>
                <description>torten, kuchen, geb‰ck ...</description>
                <file>Kundenbrief.pdf</file>
                <path>download/online</path>
        </album>
</albums>

Reply via email to