Hello all, I have the following DIH data-config.xml file. Adding HTMLStripTransformer and the associated stripHTML on the para tag seems to have broke things. I am using a nightly build from 12-jan-2009
The /record/sect1/para contains HTML sub tags which need to be discarded. Is my use of stripHTML correct? <dataConfig> <dataSource name="myfilereader" type="FileDataSource"/> <document> <entity name="jcurrent" processor="FileListEntityProcessor" fileName=".*xml" newerThan="'NOW-1000DAYS'" recursive="true" rootEntity="false" dataSource="null" baseDir="/Volumes/spare/ts/jxml/data/news/groups"> <entity name="x" dataSource="myfilereader" processor="XPathEntityProcessor" url="${jcurrent.fileAbsolutePath}" stream="false" forEach="/record" transformer="DateFormatTransformer,TemplateTransformer,RegexTransformer,HTMLStripTransformer"> <field column="fileAbsPath" template="${jcurrent.fileAbsolutePath}" /> <field column="fileWebPath" regex="/Volumes/spare/ts/(.*)" replaceWith="$1" sourceColName="fileAbsePath"/> <field column="title" xpath="/record/title" /> <field column="para" xpath="/record/sect1/para" stripHTML="true" /> <field column="subject" xpath="/record/metadata/subje...@qualifier='fullTitle']" /> <field column="pubname" xpath="/record/metadata/subje...@qualifier='publication']" /> <field column="pubdate" xpath="/record/metadata/da...@qualifier='pubDate']" dateTimeFormat="yyyyMMdd" /> </entity> </entity> </document> </dataConfig> -- =============================================================== Fergus McMenemie Email:fer...@twig.me.uk Techmore Ltd Phone:(UK) 07721 376021 Unix/Mac/Intranets Analyst Programmer ===============================================================