Hi, I created a *token concat filter* to concat all the tokens from token stream. It creates the concatenated token as expected.
But when I am posting the xml containing more than 30,000 documents, then only first document is having the data of that field. *Schema:* *<field name="titlex" type="text" indexed="true" stored="false" > required="false" omitNorms="false" multiValued="false" />* > *<fieldType name="text" class="solr.TextField" positionIncrementGap="100">* > * <analyzer type="index">* > * <charFilter class="solr.HTMLStripCharFilterFactory"/>* > * <tokenizer class="solr.StandardTokenizerFactory"/>* > * <filter class="solr.WordDelimiterFilterFactory" > generateWordParts="1" generateNumberParts="1" catenateWords="0" > catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>* > * <filter class="solr.LowerCaseFilterFactory"/>* > * <filter class="solr.ShingleFilterFactory" maxShingleSize="3" > outputUnigrams="true" tokenSeparator=""/>* > * <filter class="solr.SnowballPorterFilterFactory" > language="English" protected="protwords.txt"/>* > * <filter > class="com.xyz.analysis.concat.ConcatenateWordsFilterFactory"/>* > * <filter class="solr.SynonymFilterFactory" > synonyms="stemmed_synonyms_text_prime_ex_index.txt" ignoreCase="true" > expand="true"/>* > * </analyzer>* > * <analyzer type="query">* > * <tokenizer class="solr.StandardTokenizerFactory"/>* > * <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" > ignoreCase="true" expand="true"/>* > * <filter class="solr.StopFilterFactory" ignoreCase="true" > words="stopwords_text_prime_search.txt" enablePositionIncrements="true" />* > * <filter class="solr.WordDelimiterFilterFactory" > generateWordParts="1" generateNumberParts="1" catenateWords="0" > catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>* > * <filter class="solr.LowerCaseFilterFactory"/>* > * <filter class="solr.SnowballPorterFilterFactory" > language="English" protected="protwords.txt"/>* > * <filter > class="com.xyz.analysis.concat.ConcatenateWordsFilterFactory"/>* > * </analyzer>** </fieldType>* Please help me, The code for the filter is as follows, please take a look. Here is the picture of what filter is doing <http://i.imgur.com/THCsYtG.png?1> The code of concat filter is : *package com.xyz.analysis.concat;* > > *import java.io.IOException;* > > >> *import org.apache.lucene.analysis.TokenFilter;* > > *import org.apache.lucene.analysis.TokenStream;* > > *import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;* > > *import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;* > > *import >> org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;* > > *import org.apache.lucene.analysis.tokenattributes.TypeAttribute;* > > >> *public class ConcatenateWordsFilter extends TokenFilter {* > > >> * private CharTermAttribute charTermAttribute = >> addAttribute(CharTermAttribute.class);* > > * private OffsetAttribute offsetAttribute = >> addAttribute(OffsetAttribute.class);* > > * PositionIncrementAttribute posIncr = >> addAttribute(PositionIncrementAttribute.class);* > > * TypeAttribute typeAtrr = addAttribute(TypeAttribute.class);* > > >> * private StringBuilder stringBuilder = new StringBuilder();* > > * private boolean exhausted = false;* > > >> * /*** > > * * Creates a new ConcatenateWordsFilter* > > * * @param input TokenStream that will be filtered* > > * */* > > * public ConcatenateWordsFilter(TokenStream input) {* > > * super(input);* > > * }* > > >> * /*** > > * * {@inheritDoc}* > > * */* > > * @Override* > > * public final boolean incrementToken() throws IOException {* > > * while (!exhausted && input.incrementToken()) {* > > * char terms[] = charTermAttribute.buffer();* > > * int termLength = charTermAttribute.length();* > > * if(typeAtrr.type().equals("<ALPHANUM>")){* > > * stringBuilder.append(terms, 0, termLength);* > > * }* > > * charTermAttribute.copyBuffer(terms, 0, termLength);* > > * return true;* > > * }* > > >> * if (!exhausted) {* > > * exhausted = true;* > > * String sb = stringBuilder.toString();* > > * System.err.println("The Data got is "+sb);* > > * int sbLength = sb.length();* > > * //posIncr.setPositionIncrement(0);* > > * charTermAttribute.copyBuffer(sb.toCharArray(), 0, sbLength);* > > * offsetAttribute.setOffset(offsetAttribute.startOffset(), >> offsetAttribute.startOffset()+sbLength);* > > * stringBuilder.setLength(0);* > > * //typeAtrr.setType("CONCATENATED");* > > * return true;* > > * }* > > * return false;* > > * }* > > *}* > > With Regards Aman Tandon