All,

I am indexing some RSS feeds that are bound to specific namespaces. See
below...

<dataConfig>
<dataSource type="HttpDataSource"
            encoding="UTF-8"
            connectionTimeout="500000"
            readTimeout="500000"/>
  <document>
    <entity name="filedatasource"
            processor="FileListEntityProcessor"

 baseDir="C:/Apache/Solr-Nightly/solr/example/solr/conf/dataimporthandler"
            fileName="^.*xml$"
            recursive="true"
            rootEntity="false"
            dataSource="null">

      <entity name="CBP"
        pk="link"
        datasource="filedatasource"
        url="
http://ws.geonames.org/rssToGeoRSS?geoRSS=simple&amp;feedUrl=http://www.cbp.gov/xp/cgov/admin/rss/?rssUrl=/home.xml
"
        processor="XPathEntityProcessor"
        forEach="/rss/channel | /rss/channel/item"
        transformer="DateFormatTransformer,HTMLStripTransformer">

        <field column="source"       xpath="/rss/channel/title"
commonField="true" />
        <field column="source-link"  xpath="/rss/channel/link"
 commonField="true" />
        <field column="subject"      xpath="/rss/channel/description"
commonField="true" />
        <field column="title"        xpath="/rss/channel/item/title" />
        <field column="link"         xpath="/rss/channel/item/link" />
        <field column="description"  xpath="/rss/channel/item/description"
stripHTML="true" />
        <field column="creator"      xpath="/rss/channel/item/dc:creator" />
        <field column="item-subject" xpath="/rss/channel/item/subject" />
        <field column="author"       xpath="/rss/channel/item/author" />
        <field column="comments"     xpath="/rss/channel/item/comments" />
        <field column="pubdate"      xpath="/rss/channel/item/pubDate"
dateTimeFormat="yyyy-MM-dd'T'HH:mm:ss'Z'" />
        <field column="dcdate"       xpath="/rss/channel/item/dc:date"
dateTimeFormat="yyyy-MM-dd'T'HH:mm:ss'Z'" />
        <field column="store"        xpath="/rss/channel/item/georss:point"
/>
      </entity>

The process completely skips over any path with a colon in it.
ie. /rss/channel/item/georss:point.  Any ideas how to get around this using
the DIH?

Thanks to Chris Mattmann for the heads up on the geocoding services.

Adam

Reply via email to