I am using Solr 6.5.1 and working on importing xml files using the
DataImportHandler. I am wanting to get the files from a remote server, but I
am dealing with multiple xml files in multiple folders. I am using a nested
entity in my dataConfig. Below is an example of how I have my dataConfig set
up. I got most of this from an online reference. In this example I am getting
the xml files from a folder on the Solr server, but as I mentioned above I want
to get the files from a remote server. I have looked at the different Entity
Processors for the DIH, but have not seen anything that seems to work. Is
there a way to configure the below code to let me do this?
<dataConfig>
<dataSource name="hbk" encoding="UTF-8" type="FileDataSource" />
<document name="hbk">
<!--
Pickupdir fetches all files matching the filename regex in the
supplied directory
and passes them to other entities which parse the file contents.
-->
<entity
name="pickupdir"
processor="FileListEntityProcessor"
rootEntity="false"
dataSource="null"
fileName="^[\w\d-]+\.xml$"
baseDir="/var/solr/data/hbk/data/xml/"
recursive="true"
>
<!--
Pickupxmlfile
parses standard Solr update XML.
-->
<entity
name="xml"
pk="itemId"
processor="XPathEntityProcessor"
transformer="RegexTransformer,TemplateTransformer"
datasource="pickupdir"
stream="true"
xsl="/var/solr/data/hbk/data/xsl/solr_timdex.xsl"
url="${pickupdir.fileAbsolutePath}"
forEach="/eflow/section | /eflow/section/item"
>
<field
column="sectionId" xpath="/eflow/section/@id" commonField="true" />
<field
column="sectionTitle" xpath="/eflow/section/@title" commonField="true" />
<field
column="sectionNo" xpath="/eflow/section/@secno" commonField="true" />
<field
column="hbkNo" xpath="/eflow/section/@hbkno" commonField="true" />
<field
column="volumeNo" xpath="/eflow/section/@volno" commonField="true" />
<field
column="itemId" xpath="/eflow/section/item/@id" />
<field
column="itemTitle" xpath="/eflow/section/item/@title" />
<field
column="itemNo" xpath="/eflow/section/item/@mit" />
<field
column="itemFile" xpath="/eflow/section/item/@file" />
<field
column="itemType" xpath="/eflow/section/item/@type" />
</entity>
</entity>
</document>
</dataConfig>
~~~~~~~~~~~~~~~~~~~~~~~
William Kevin Miller
[ecsLogo]
ECS Federal, Inc.
USPS/MTSC
(405) 573-2158