i'm trying to index a html page and only user the div with the id="content". 
unfortunately nothing is working within the tika-entity, only the standard text 
(content) is populated. 

        do i have to use copyField for test_text to get the data? 
        or is there a problem with the entity-hirarchy?
        or is the xpath wrong, even though i've tried it without and just using 
text?
        or should i use the updateextractor?

data-config.xml:

<dataConfig>
        <dataSource type="BinFileDataSource" name="data"/>
        <dataSource type="BinURLDataSource" name="dataUrl"/>
        <dataSource type="URLDataSource" 
baseUrl="http://127.0.0.1/tkb/internet/"; name="main"/>
<document>
        <entity name="rec" processor="XPathEntityProcessor" 
url="docImportUrl.xml" forEach="/docs/doc" dataSource="main"> 
                <field column="title" xpath="//title" />
                <field column="id" xpath="//id" />
                <field column="file" xpath="//file" />
                <field column="path" xpath="//path" />
                <field column="url" xpath="//url" />
                <field column="Author" xpath="//author" />              
                
                <entity name="tika" processor="TikaEntityProcessor" 
url="${rec.path}${rec.file}" dataSource="dataUrl" >
                        <!-- <copyField source="text" dest="text_test" /> -->
                        <field column="text_test" xpath="//div[@id='content']" 
/>       
                </entity>
        </entity>
</document>
</dataConfig>

docImporterUrl.xml:

<?xml version="1.0" encoding="utf-8"?>
<docs>
<doc>
                <id>5</id>
                <author>tkb</author>
                <title>Startseite</title>
                <description>blabla ...</description>
                <file>http://localhost/tkb/internet/index.cfm</file>
                <url>http://localhost/tkb/internet/index.cfm/url</url>
                <path2>http\specialConf</path2>
        </doc>
        <doc>
                <id>6</id>
                <author>tkb</author>
                <title>Eigenheim</title>
                <description>Machen Sie sich erste Gedanken über den Erwerb von 
Wohneigentum? Oder haben Sie bereits konkrete Pläne oder gar ein spruchreifes 
Projekt? Wir beraten Sie gerne in allen Fragen rund um den Erwerb oder Bau von 
Wohneigentum, damit Ihr Vorhaben auch in finanzieller Hinsicht 
gelingt.</description>
                
<file>http://127.0.0.1/tkb/internet/private/beratung/eigenheim.htm</file>
                
<url>http://127.0.0.1/tkb/internet/private/beratung/eigenheim.htm/url</url>
        </doc>
</docs>

Reply via email to