Hi - you need a custom indexing filter that adds the fields from parsemeta to 
the document.

Cheers,

 
 
-----Original message-----
> From:Eyeris Rodriguez Rueda <[email protected]>
> Sent: Wed 24-Oct-2012 14:59
> To: [email protected]
> Subject: problems with image dynamic fields in nutch 1.4 
> 
> Hi all.
> I have a problem when I try to crawl images, specifically with dynamic
> fields of that images.
> When I do a crawl, nutch is ignoring this dynamic fields.
> When I upload manually some images directly  to solr index, solr's  tika is
> capable to extract some metadata in dynamic fields like width, height,
> content-type, but with nutch crawl those fields are ignored.
> I have tried to put in static in solr and nutch schema but continue without
> results, here is my schema and solrindex-mapping, Im using nutch 1.4 and
> solr 3.6 . Some help or advice will be appreciated.
> ************************************************
> Schema.xml
> 
> <fields>  
> <field name="segment" type="string" stored="true" indexed="false"/>
> <field name="digest" type="string" stored="true" indexed="false"/>
> <field name="boost" type="float" stored="true" indexed="false"/>
> 
> 
> <!-- fields for index-basic plugin -->
>         <field name="host" type="url" stored="true" indexed="true"/>
>         <field name="site" type="string" stored="true" indexed="true"/>
>         <field name="url" type="url" stored="true" indexed="true"/>
>         <field name="content" type="text" stored="true" indexed="false"/>
>         <field name="title" type="text" stored="true" indexed="true"/>
>         <field name="cache" type="string" stored="true" indexed="false"/>
>         <field name="tstamp" type="date" stored="true" indexed="true"
> default="NOW"/>
> 
> <!-- fields for index-more plugin -->
> <field name="date" type="date" stored="true" indexed="true"/>
> <field name="contentLength" type="long" stored="true" indexed="true"/>
> <field name="lastModified" type="date" stored="true" indexed="true"/>
> 
> 
> <!-- fields for languageidentifier plugin -->
>         <field name="lang" type="string" stored="true" indexed="true"/>
> 
>   <!-- general -->
>   <field name="id" type="string" indexed="true"  stored="true"
> multiValued="false"/>
>   <field name="type" type="string" indexed="true"  stored="true"
> multiValued="true" /> 
>   <field name="name" type="string" indexed="true"  stored="true"
> multiValued="false" /> 
>   <field name="thumbnail" type="string" stored="true" indexed="true"/>
>   <field name="core0" type="string" indexed="true"  stored="true"
> multiValued="false" />
> <!-- campos estáticos -->
> <!-- para los png -->
> <field name="content_type" type="string" indexed="true"  stored="true"
> multiValued="false"/>
> <field name="width" type="string" indexed="true"  stored="true"
> multiValued="false"/>
> <field name="height" type="string" indexed="true"  stored="true"
> multiValued="false"/>
> <field name="stream_name" type="string" indexed="true" stored="true"
> multiValued="false"/>
> <field name="stream_size" type="string" indexed="true" stored="true"
> multiValued="false"/>
> 
> 
>   <dynamicField name="attr_*" type="binary"/>
>  </fields>
> 
>  <!-- field to use to determine and enforce document uniqueness. -->
>  <uniqueKey>id</uniqueKey>
> 
>  <!-- field for the QueryParser to use when an explicit fieldname is absent
> -->
>  <defaultSearchField>name</defaultSearchField>
> 
>  <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
>  <solrQueryParser defaultOperator="OR"/>
> </schema>
> 
> ***********************************************************************
> Solrindex-mapping
> 
> <fields>
> <field dest="id" source="url"/>
> <field dest="host" source="host"/>
> <field dest="site" source="site"/>
> <field dest="title" source="title"/>
> <field dest="tstamp" source="tstamp"/>
> 
> <field dest="type" source="type"/>
> 
> <field dest="date" source="date"/>
> <field dest="lang" source="lang"/>
> <field dest="contentLength" source="contentLength"/>
> <field dest="lastModified" source="lastModified"/>
> 
> 
> <!--Para los tipos de imágenes específicos -->
> <field dest="content_type" source="content_type"/>
> <field dest="width" source="width"/>
> <field dest="height" source="height"/>
> <field dest="stream_name" source="stream_name"/>
> <field dest="stream_size" source="stream_size"/>
> 
> 
> <field dest="thumbnail" source="thumbnail"/>
>       </fields>
>       <uniqueKey>id</uniqueKey>
> </mapping>
> 
> 
> 
> 
> _____________________________________________________________________
> Ing. Eyeris Rodriguez Rueda
> Teléfono:837-3370
> Universidad de las Ciencias Informáticas
> _____________________________________________________________________
> 
> 
> 10mo. ANIVERSARIO DE LA CREACION DE LA UNIVERSIDAD DE LAS CIENCIAS 
> INFORMATICAS...
> CONECTADOS AL FUTURO, CONECTADOS A LA REVOLUCION
> 
> http://www.uci.cu
> http://www.facebook.com/universidad.uci
> http://www.flickr.com/photos/universidad_uci
> 

Reply via email to