why does stripHTML="false" have no effect in dih? the html is strippedin text and text_nohtml when i do display the index with select?q=*
i'm trying to get a field without html and one with it so i can also index the links on the page. data-config.xml <entity name="rec" processor="XPathEntityProcessor" url="file:///C:\ColdFusion10\cfusion\solr\solr\tkbintranet\docImportUrl.xml" forEach="/docs/doc" dataSource="main"> <!-- transformer="script:GenerateId"--> <field column="title" xpath="//title" /> <field column="id" xpath="//id" /> <field column="file" xpath="//file" /> <field column="url" xpath="//url" /> <field column="urlParse" xpath="//urlParse" /> <field column="last_modified" xpath="//last_modified" /> <field column="Author" xpath="//author" /> <entity name="tika" processor="TikaEntityProcessor" url="${rec.urlParse}" dataSource="dataUrl" onError="skip" htmlMapper="identity" format="html" transformer="HTMLStripTransformer"> <field column="text" name="text" stripHTML="false" /> <field column="text" name="text_nohtml" stripHTML="true" /> <!-- transformer="RegexTransformer" <field column="text_html_b" regex="(?s)^.*<div.*id=.*>(.*)</div>.*$" replaceWith="$1" sourceColName="text" /> <field column="text_html_b" regex="(?s)^.*<!-body->(.*)<!-/body->.*$" replaceWith="$1" sourceColName="text" /> --> </entity> </entity>