Hello all, I’m working on a project with Solr. I had 1.4.1 working OK using ExtractingRequestHandler except that it was crashing on some PDFs. I noticed that Tika bundled with 1.4.1 was 0.4, which was kind of old. I decided to try updating to 0.7 as per the directions here: http://wiki.apache.org/solr/ExtractingRequestHandler but it was giving me errors (I forget what they were specifically).
Then I tried downloading Solr 3.1 from the source repository, which I noticed came with Tika 0.7. I figured this would be an easier route to get working. Now I’m testing with 3.1 and 0.7 and I’m noticing my documents are going into Solr OK, but they all have blank content (no document text stored in Solr). I did see that the default “text” field is not stored. Changing that to stored=true didn’t help. Changing to fmap.content=attr_content&uprefix=attr_content didn’t help either. I have attached all relevant info here. Please let me know if someone sees something I don’t (it’s entirely possible as I’m relatively new to Solr). Schema.xml: <?xml version="1.0" encoding="UTF-8" ?> <schema name="example" version="1.3"> <types> <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/> <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/> <fieldtype name="binary" class="solr.BinaryField"/> <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/> <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/> <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/> <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/> <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/> <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/> <fieldType name="pint" class="solr.IntField" omitNorms="true"/> <fieldType name="plong" class="solr.LongField" omitNorms="true"/> <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/> <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/> <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true"/> <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/> <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/> <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/> <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/> <fieldType name="random" class="solr.RandomSortField" indexed="true" /> <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.WhitespaceTokenizerFactory"/> </analyzer> </fieldType> <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <analyzer type="index"> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter class="solr.PorterStemFilterFactory"/> </analyzer> <analyzer type="query"> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter class="solr.PorterStemFilterFactory"/> </analyzer> </fieldType> <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" > <analyzer> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter class="solr.PorterStemFilterFactory"/> <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> </analyzer> </fieldType> <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/> <filter class="solr.LowerCaseFilterFactory"/> </analyzer> <analyzer type="query"> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/> <filter class="solr.LowerCaseFilterFactory"/> </analyzer> </fieldType> <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> </analyzer> <analyzer type="query"> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/> <filter class="solr.LowerCaseFilterFactory"/> </analyzer> </fieldType> <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true"> <analyzer> <tokenizer class="solr.KeywordTokenizerFactory"/> <filter class="solr.LowerCaseFilterFactory" /> <filter class="solr.TrimFilterFactory" /> <filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])" replacement="" replace="all" /> </analyzer> </fieldType> <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" > <analyzer> <tokenizer class="solr.StandardTokenizerFactory"/> <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> </analyzer> </fieldtype> <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" > <analyzer> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> </analyzer> </fieldtype> <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.KeywordTokenizerFactory"/> <filter class="solr.LowerCaseFilterFactory" /> </analyzer> </fieldType> <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> <fieldType name="location" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> <fieldtype name="geohash" class="solr.GeoHashField"/> <fieldType name="tile" class="solr.SpatialTileField" start="4" end="15" subFieldSuffix="_tiled"/> </types> <fields> <field name="id" type="string" indexed="true" stored="true" required="true" /> <field name="sku" type="textTight" indexed="true" stored="true" omitNorms="true"/> <field name="name" type="textgen" indexed="true" stored="true"/> <field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/> <field name="manu" type="textgen" indexed="true" stored="true" omitNorms="true"/> <field name="cat" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true" /> <field name="features" type="text" indexed="true" stored="true" multiValued="true"/> <field name="includes" type="text" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" /> <field name="weight" type="float" indexed="true" stored="true"/> <field name="price" type="float" indexed="true" stored="true"/> <field name="popularity" type="int" indexed="true" stored="true" /> <field name="inStock" type="boolean" indexed="true" stored="true" /> <field name="store" type="location" indexed="true" stored="true"/> <field name="store_hash" type="geohash" indexed="true" stored="false"/> <field name="store_tiles" type="tile" indexed="true" stored="false"/> <field name="title" type="text" indexed="true" stored="true" multiValued="true"/> <field name="subject" type="text" indexed="true" stored="true"/> <field name="description" type="text" indexed="true" stored="true"/> <field name="comments" type="text" indexed="true" stored="true"/> <field name="author" type="textgen" indexed="true" stored="true"/> <field name="keywords" type="textgen" indexed="true" stored="true"/> <field name="category" type="textgen" indexed="true" stored="true"/> <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/> <field name="last_modified" type="date" indexed="true" stored="true"/> <field name="links" type="string" indexed="true" stored="true" multiValued="true"/> <field name="text" type="text" indexed="true" stored="false" multiValued="true"/> <field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/> <field name="manu_exact" type="string" indexed="true" stored="false"/> <field name="payloads" type="payloads" indexed="true" stored="true"/> <dynamicField name="*_i" type="int" indexed="true" stored="true"/> <dynamicField name="*_s" type="string" indexed="true" stored="true"/> <dynamicField name="*_l" type="long" indexed="true" stored="true"/> <dynamicField name="*_t" type="text" indexed="true" stored="true"/> <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> <dynamicField name="*_f" type="float" indexed="true" stored="true"/> <dynamicField name="*_d" type="double" indexed="true" stored="true"/> <dynamicField name="*_tiled" type="double" indexed="true" stored="false"/> <dynamicField name="*_dt" type="date" indexed="true" stored="true"/> <dynamicField name="*_p" type="location" indexed="true" stored="true"/> <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/> <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/> <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/> <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/> <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/> <dynamicField name="*_pi" type="pint" indexed="true" stored="true"/> <dynamicField name="ignored_*" type="ignored" multiValued="true"/> <dynamicField name="attr_*" type="textgen" indexed="true" stored="true" multiValued="true"/> <dynamicField name="random_*" type="random" /> </fields> <uniqueKey>id</uniqueKey> <defaultSearchField>text</defaultSearchField> <solrQueryParser defaultOperator="OR"/> <copyField source="cat" dest="text"/> <copyField source="store" dest="store_hash"/> <copyField source="store" dest="store_tiles"/> <copyField source="name" dest="text"/> <copyField source="manu" dest="text"/> <copyField source="features" dest="text"/> <copyField source="includes" dest="text"/> <copyField source="manu" dest="manu_exact"/> </schema> Solrconfig.xml: <?xml version="1.0" encoding="UTF-8" ?> <config> <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError> <luceneMatchVersion>LUCENE_31</luceneMatchVersion> <lib dir="./contrib/extraction/lib" /> <lib dir="./lib"/> <lib dir="./contrib/clustering/lib" /> <dataDir>C:/Program Files/Apache Software Foundation/solr-3.1/data</dataDir> <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/> <indexDefaults> <useCompoundFile>false</useCompoundFile> <mergeFactor>10</mergeFactor> <ramBufferSizeMB>32</ramBufferSizeMB> <maxFieldLength>10000</maxFieldLength> <writeLockTimeout>1000</writeLockTimeout> <commitLockTimeout>10000</commitLockTimeout> <lockType>native</lockType> </indexDefaults> <mainIndex> <useCompoundFile>false</useCompoundFile> <ramBufferSizeMB>32</ramBufferSizeMB> <mergeFactor>10</mergeFactor> <unlockOnStartup>false</unlockOnStartup> <reopenReaders>true</reopenReaders> <deletionPolicy class="solr.SolrDeletionPolicy"> <str name="maxCommitsToKeep">1</str> <str name="maxOptimizedCommitsToKeep">0</str> </deletionPolicy> <infoStream file="INFOSTREAM.txt">false</infoStream> </mainIndex> <jmx /> <updateHandler class="solr.DirectUpdateHandler2"> </updateHandler> <query> <maxBooleanClauses>1024</maxBooleanClauses> <filterCache class="solr.FastLRUCache" size="512" initialSize="512" autowarmCount="0"/> <queryResultCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0"/> <documentCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0"/> <enableLazyFieldLoading>true</enableLazyFieldLoading> <queryResultWindowSize>20</queryResultWindowSize> <queryResultMaxDocsCached>200</queryResultMaxDocsCached> <listener event="newSearcher" class="solr.QuerySenderListener"> <arr name="queries"> </arr> </listener> <listener event="firstSearcher" class="solr.QuerySenderListener"> <arr name="queries"> <lst> <str name="q">solr rocks</str><str name="start">0</str><str name="rows">10</str></lst> <lst><str name="q">static firstSearcher warming query from solrconfig.xml</str></lst> </arr> </listener> <useColdSearcher>false</useColdSearcher> <maxWarmingSearchers>2</maxWarmingSearchers> </query> <requestDispatcher handleSelect="true" > <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048000" /> <httpCaching lastModifiedFrom="openTime" etagSeed="Solr"> </httpCaching> </requestDispatcher> <requestHandler name="standard" class="solr.SearchHandler" default="true"> <!-- default values for query parameters --> <lst name="defaults"> <str name="echoParams">explicit</str> <!-- <int name="rows">10</int> <str name="fl">*</str> <str name="version">2.1</str> --> </lst> </requestHandler> <requestHandler name="/browse" class="solr.SearchHandler"> <lst name="defaults"> <str name="wt">velocity</str> <str name="v.template">browse</str> <str name="v.layout">layout</str> <str name="title">Solritas</str> <str name="defType">dismax</str> <str name="q.alt">*:*</str> <str name="rows">10</str> <str name="fl">*,score</str> <str name="facet">on</str> <str name="facet.field">cat</str> <str name="facet.field">manu_exact</str> <str name="facet.mincount">1</str> <str name="qf"> text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 </str> <str name="hl">on</str> <str name="hl.fl">text features name</str> <str name="f.name.hl.fragsize">0</str> <str name="f.name.hl.alternateField">name</str> </lst> </requestHandler> <requestHandler name="dismax" class="solr.SearchHandler" > <lst name="defaults"> <str name="defType">dismax</str> <str name="echoParams">explicit</str> <float name="tie">0.01</float> <str name="qf"> text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 </str> <str name="pf"> text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9 </str> <str name="bf"> popularity^0.5 recip(price,1,1000,1000)^0.3 </str> <str name="fl"> id,name,price,score </str> <str name="mm"> 2<-1 5<-2 6<90% </str> <int name="ps">100</int> <str name="q.alt">*:*</str> <!-- example highlighter config, enable per-query with hl=true --> <str name="hl.fl">text features name</str> <!-- for this field, we want no fragmenting, just highlighting --> <str name="f.name.hl.fragsize">0</str> <!-- instructs Solr to return the field itself if no query terms are found --> <str name="f.name.hl.alternateField">name</str> <str name="f.text.hl.fragmenter">regex</str> <!-- defined below --> </lst> </requestHandler> <requestHandler name="partitioned" class="solr.SearchHandler" > <lst name="defaults"> <str name="defType">dismax</str> <str name="echoParams">explicit</str> <str name="qf">text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0</str> <str name="mm">2<-1 5<-2 6<90%</str> <!-- This is an example of using Date Math to specify a constantly moving date range in a config... --> <str name="bq">incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2</str> </lst> <lst name="appends"> <str name="fq">inStock:true</str> </lst> <lst name="invariants"> <str name="facet.field">cat</str> <str name="facet.field">manu_exact</str> <str name="facet.query">price:[* TO 500]</str> <str name="facet.query">price:[500 TO *]</str> </lst> </requestHandler> <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> <str name="queryAnalyzerFieldType">textSpell</str> <lst name="spellchecker"> <str name="name">default</str> <str name="field">name</str> <str name="spellcheckIndexDir">./spellchecker</str> </lst> </searchComponent> <requestHandler name="/spell" class="solr.SearchHandler" lazy="true"> <lst name="defaults"> <str name="spellcheck.onlyMorePopular">false</str> <str name="spellcheck.extendedResults">false</str> <str name="spellcheck.count">1</str> </lst> <arr name="last-components"> <str>spellcheck</str> </arr> </requestHandler> <searchComponent name="tvComponent" class="org.apache.solr.handler.component.TermVectorComponent"/> <requestHandler name="tvrh" class="org.apache.solr.handler.component.SearchHandler"> <lst name="defaults"> <bool name="tv">true</bool> </lst> <arr name="last-components"> <str>tvComponent</str> </arr> </requestHandler> <searchComponent name="clusteringComponent" enable="${solr.clustering.enabled:false}" class="org.apache.solr.handler.clustering.ClusteringComponent" > <lst name="engine"> <!-- The name, only one can be named "default" --> <str name="name">default</str> <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str> <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str> </lst> <lst name="engine"> <str name="name">stc</str> <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str> </lst> </searchComponent> <requestHandler name="/clustering" enable="${solr.clustering.enabled:false}" class="solr.SearchHandler"> <lst name="defaults"> <bool name="clustering">true</bool> <str name="clustering.engine">default</str> <bool name="clustering.results">true</bool> <!-- The title field --> <str name="carrot.title">name</str> <str name="carrot.url">id</str> <!-- The field to cluster on --> <str name="carrot.snippet">features</str> <!-- produce summaries --> <bool name="carrot.produceSummary">true</bool> <!-- the maximum number of labels per cluster --> <!--<int name="carrot.numDescriptions">5</int>--> <!-- produce sub clusters --> <bool name="carrot.outputSubClusters">false</bool> </lst> <arr name="last-components"> <str>clusteringComponent</str> </arr> </requestHandler> <requestHandler name="/update/extract" class="org.apache.solr.handler.extraction.ExtractingRequestHandler" startup="lazy"> <lst name="defaults"> <str name="fmap.content">text</str> <str name="lowernames">true</str> <str name="uprefix">ignored_</str> <!-- capture link hrefs but ignore div attributes --> <str name="captureAttr">true</str> <str name="fmap.a">links</str> <str name="fmap.div">ignored_</str> </lst> </requestHandler> <searchComponent name="termsComponent" class="org.apache.solr.handler.component.TermsComponent"/> <requestHandler name="/terms" class="org.apache.solr.handler.component.SearchHandler"> <lst name="defaults"> <bool name="terms">true</bool> </lst> <arr name="components"> <str>termsComponent</str> </arr> </requestHandler> <searchComponent name="elevator" class="solr.QueryElevationComponent" > <!-- pick a fieldType to analyze queries --> <str name="queryFieldType">string</str> <str name="config-file">elevate.xml</str> </searchComponent> <!-- a request handler utilizing the elevator component --> <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy"> <lst name="defaults"> <str name="echoParams">explicit</str> </lst> <arr name="last-components"> <str>elevator</str> </arr> </requestHandler> <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" /> <requestHandler name="/update/javabin" class="solr.BinaryUpdateRequestHandler" /> <requestHandler name="/analysis/document" class="solr.DocumentAnalysisRequestHandler" /> <requestHandler name="/analysis/field" class="solr.FieldAnalysisRequestHandler" /> <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" /> <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" /> <requestHandler name="/admin/ping" class="PingRequestHandler"> <lst name="defaults"> <str name="qt">standard</str> <str name="q">solrpingquery</str> <str name="echoParams">all</str> </lst> </requestHandler> <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > <lst name="defaults"> <str name="echoParams">explicit</str> <!-- for all params (including the default etc) use: 'all' --> <str name="echoHandler">true</str> </lst> </requestHandler> <searchComponent class="solr.HighlightComponent" name="highlight"> <highlighting> <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true"> <lst name="defaults"> <int name="hl.fragsize">100</int> </lst> </fragmenter> <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter"> <lst name="defaults"> <!-- slightly smaller fragsizes work better because of slop --> <int name="hl.fragsize">70</int> <!-- allow 50% slop on fragment sizes --> <float name="hl.regex.slop">0.5</float> <!-- a basic sentence pattern --> <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> </lst> </fragmenter> <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true"> <lst name="defaults"> <str name="hl.simple.pre"><![CDATA[<em>]]></str> <str name="hl.simple.post"><![CDATA[</em>]]></str> </lst> </formatter> <fragListBuilder name="simple" class="org.apache.solr.highlight.SimpleFragListBuilder" default="true"/> <fragListBuilder name="single" class="org.apache.solr.highlight.SingleFragListBuilder"/> <fragmentsBuilder name="colored" class="org.apache.solr.highlight.MultiColoredScoreOrderFragmentsBuilder" default="true"/> </highlighting> </searchComponent> <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter"> <int name="xsltCacheLifetimeSeconds">5</int> </queryResponseWriter> <admin> <defaultQuery>solr</defaultQuery> </admin> </config> Test1.txt document: Asdf Asdf Asdf Adsf Upload command: curl "http://localhost:8080/solr/update/extract?literal.id=123&uprefix=attr_&fmap.content=attr_content&commit=true" -F "myfi...@test1.txt” RESULTS from an id:[* TO *] query: <response> − <lst name="responseHeader"> <int name="status">0</int> <int name="QTime">91</int> − <lst name="params"> <str name="explainOther"/> <str name="fl">*,score</str> <str name="indent">on</str> <str name="start">0</str> <str name="q">id:[* TO *]</str> <str name="hl.fl"/> <str name="qt">standard</str> <str name="wt">standard</str> <str name="fq"/> <str name="rows">10</str> <str name="version">2.2</str> </lst> </lst> − <result name="response" numFound="1" start="0" maxScore="1.0"> − <doc> <float name="score">1.0</float> − <arr name="attr_content"> <str> </str> </arr> − <arr name="attr_stream_content_type"> <str>text/plain</str> </arr> − <arr name="attr_stream_name"> <str>test1.txt</str> </arr> − <arr name="attr_stream_size"> <str>24</str> </arr> − <arr name="attr_stream_source_info"> <str>myfile</str> </arr> − <arr name="content_type"> <str>text/plain</str> </arr> <str name="id">123</str> </doc> </result> </response> Note that the attr_content section of the response is blank. Any help & hints would be GREATLY appreciated…=) Best, Dave