Hi, In your incrementToken method, you differs in both implementations: The original one uses setTermBuffer(), but the new one only resizes the buffer, but never changes it.
Uwe ----- Uwe Schindler H.-H.-Meier-Allee 63, D-28213 Bremen http://www.thetaphi.de eMail: u...@thetaphi.de > -----Original Message----- > From: Paul Taylor [mailto:paul_t...@fastmail.fm] > Sent: Thursday, September 15, 2011 11:39 AM > To: 'java-user@lucene.apache.org' > Subject: Converting from TermAttribute to CharTermAttribute > > Have updated from Lucene 3.0 to lucene 3.1 an dnow getting various > deprecations that Im trying to move > > I change this filter class and now my test are failing, anybody able to see what > Im missing please > > Paul > > package org.musicbrainz.search.analysis; > > import org.apache.lucene.analysis.TokenFilter; > import org.apache.lucene.analysis.TokenStream; > import org.apache.lucene.analysis.tokenattributes.TermAttribute; > > import java.io.IOException; > > /** > * A filter that replaces accented characters by their unaccented equivalents. > */ > public class AccentFilter extends TokenFilter { > > private char[] output = new char[256]; > private int outputPos; > > private TermAttribute termAttr; > > public AccentFilter(TokenStream input) { > super(input); > termAttr = (TermAttribute) addAttribute(TermAttribute.class); > } > > @Override > public boolean incrementToken() throws IOException { > if (!input.incrementToken()) > return false; > > final char[] buffer = termAttr.termBuffer(); > final int length = termAttr.termLength(); > if (removeAccents(buffer, length)) { > termAttr.setTermBuffer(output, 0, outputPos); > } > return true; > } > > protected final boolean removeAccents(char[] input, int length) { > final int maxSizeNeeded = 2 * length; > int size = output.length; > while (size < maxSizeNeeded) > size *= 2; > > int inputPos = 0; > outputPos = 0; > > for (int i = 0; i < length; i++) { > int c = (int) input[i]; > > int block = UnaccentIndexes.indexes[c >> UnaccentData.BLOCK_SHIFT]; > int position = c & UnaccentData.BLOCK_MASK; > > short[] positions = UnaccentPositions.positions[block]; > int unacPosition = positions[position]; > int unacLength = positions[position + 1] - unacPosition; > > if (unacLength > 0) { > // allocate a new char array, if necessary > if (size != output.length) > output = new char[size]; > // copy front of the input > if (inputPos < i) { > System.arraycopy(input, inputPos, output, outputPos, i - inputPos); > outputPos += i - inputPos; > } > // copy unaccented data > System.arraycopy(UnaccentData.data[block], unacPosition, > output, outputPos, unacLength); > outputPos += unacLength; > inputPos = i + 1; > } > } > > // no conversion needed... > if (inputPos == 0) > return false; > > // copy rest of the input > int copyLength = length - inputPos; > if (copyLength > 0) { > System.arraycopy(input, inputPos, output, outputPos, copyLength); > outputPos += copyLength; > } > > return true; > } > > } > > to > > package org.musicbrainz.search.analysis; > > import org.apache.lucene.analysis.TokenFilter; > import org.apache.lucene.analysis.TokenStream; > import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; > > import java.io.IOException; > > /** > * A filter that replaces accented characters by their unaccented equivalents. > */ > public class AccentFilter extends TokenFilter { > > private char[] output = new char[256]; > private int outputPos; > > private CharTermAttribute termAttr; > > public AccentFilter(TokenStream input) { > super(input); > termAttr = (CharTermAttribute) addAttribute(CharTermAttribute.class); > } > > @Override > public boolean incrementToken() throws IOException { > if (!input.incrementToken()) > return false; > > final char[] buffer = termAttr.buffer(); > final int length = termAttr.length(); > if (removeAccents(buffer, length)) { > termAttr.resizeBuffer(outputPos); > } > return true; > } > > protected final boolean removeAccents(char[] input, int length) { > final int maxSizeNeeded = 2 * length; > int size = output.length; > while (size < maxSizeNeeded) > size *= 2; > > int inputPos = 0; > outputPos = 0; > > for (int i = 0; i < length; i++) { > int c = (int) input[i]; > > int block = UnaccentIndexes.indexes[c >> UnaccentData.BLOCK_SHIFT]; > int position = c & UnaccentData.BLOCK_MASK; > > short[] positions = UnaccentPositions.positions[block]; > int unacPosition = positions[position]; > int unacLength = positions[position + 1] - unacPosition; > > if (unacLength > 0) { > // allocate a new char array, if necessary > if (size != output.length) > output = new char[size]; > // copy front of the input > if (inputPos < i) { > System.arraycopy(input, inputPos, output, outputPos, i - inputPos); > outputPos += i - inputPos; > } > // copy unaccented data > System.arraycopy(UnaccentData.data[block], unacPosition, > output, outputPos, unacLength); > outputPos += unacLength; > inputPos = i + 1; > } > } > > // no conversion needed... > if (inputPos == 0) > return false; > > // copy rest of the input > int copyLength = length - inputPos; > if (copyLength > 0) { > System.arraycopy(input, inputPos, output, outputPos, copyLength); > outputPos += copyLength; > } > > return true; > } > > } > > > > > --------------------------------------------------------------------- > To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org > For additional commands, e-mail: java-user-h...@lucene.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org For additional commands, e-mail: java-user-h...@lucene.apache.org