I can’t seem to detect any issues with the final custom analyzer declared in
this code snippet (The one that attempts to use a PatternMatchingTokenizer and
is initialized as sa), but it doesn’t seem to be hit when I run my indexing
code despite being in the map. It is indexed finally but I assume it’s just
falling to the StandardAnalyzer I’ve declared as default in the
PerfieldAnalyzerWrapper. The other inner Anonymous declarations seem to work
fine and I can see them hit a breakpoint. I don’t have any errors to standard
out or to my logs. Is there something obviously wrong with the
creatComponenets() method initialization?
Map<String,Analyzer> analyzerPerField = new HashMap<>();
analyzerPerField.put(LITERAL_PROPERTY_VALUE_FIELD, literalAnalyzer);
analyzerPerField.put(LITERAL_AND_REVERSE_PROPERTY_VALUE_FIELD,
literalAnalyzer);
if (doubleMetaphoneEnabled_) {
Analyzer temp = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String
fieldName) {
final StandardTokenizer source = new
StandardTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
source.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
TokenStream filter = new StandardFilter(source);
filter = new LowerCaseFilter( filter);
filter = new StopFilter(filter,
StandardAnalyzer.STOP_WORDS_SET);
filter = new DoubleMetaphoneFilter(filter, 4, true);
return new TokenStreamComponents(source, filter);
}
};
analyzerPerField.put(DOUBLE_METAPHONE_PROPERTY_VALUE_FIELD, temp);
}
if (normEnabled_) {
try {
Analyzer temp = new StandardAnalyzer(CharArraySet.EMPTY_SET);
analyzerPerField.put(NORM_PROPERTY_VALUE_FIELD, temp);
} catch (NoClassDefFoundError e) {
//
}
}
if (stemmingEnabled_) {
Analyzer temp = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String
fieldName) {
final StandardTokenizer source = new
StandardTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
source.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
TokenStream filter = new StandardFilter(source);
filter = new LowerCaseFilter( filter);
filter = new StopFilter(filter,
StandardAnalyzer.STOP_WORDS_SET);
filter = new SnowballFilter(filter, "English");
return new TokenStreamComponents(source, filter);
}
};
analyzerPerField.put(STEMMING_PROPERTY_VALUE_FIELD, temp);
}
Analyzer sa = new Analyzer(){
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Pattern pattern = Pattern.compile(STRING_TOKEINZER_TOKEN);
final PatternTokenizer source = new
PatternTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, pattern, -1);
TokenStream filter = new StandardFilter(source);
System.out.println("In pattern matching analyzer");
return new TokenStreamComponents(source, filter);
}
};
analyzerPerField.put("sources", sa);
analyzerPerField.put("usageContexts", sa);
analyzerPerField.put("qualifiers", sa);
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new
StandardAnalyzer(CharArraySet.EMPTY_SET), analyzerPerField);
return analyzer;