[ https://issues.apache.org/jira/browse/SOLR-5469?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Alexandre Rafalovitch closed SOLR-5469. --------------------------------------- Resolution: Cannot Reproduce > The Analysis Page on the Solr Admin Page does not work with Custom Analyzers > ---------------------------------------------------------------------------- > > Key: SOLR-5469 > URL: https://issues.apache.org/jira/browse/SOLR-5469 > Project: Solr > Issue Type: Bug > Components: Schema and Analysis > Affects Versions: 4.0 > Environment: Windows, Tomcat, Java 1.7 > Reporter: Swami Rajamohan > Priority: Minor > Labels: Admin, AnalysisPage, Solr > > The Analysis Page on the Solr Admin Page does not work with Custom Analyzers. > To be specific the Analyzer page does not display all of the tokens output by > the custom analyzer if the tokens themselves don't have a KeywordAttribute > added. It does not matter that the tokens are not keyword tokens, it is just > that the Tokens need to have the KeywordAttribute (even if it evaluates to > false). > I'm attaching the json output of the case of a text_en field (solr.Text > fieldType) and the json output of the case of a field (mapped to custom > fieldType). > The json generated for the custom fieldType (using a custom analyzer) while > similar in all aspects to the json generated for text_en fieldType does not > have KeywordAttribute set for the tokens (which seems valid). > JSON From the analysis page for the custom fieldType (custom analyzer) > { > "responseHeader":{ > "status":0, > "QTime":0}, > "analysis":{ > "field_types":{}, > "field_names":{ > "title":{ > "index":[ > > "org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter",[{ > "text":"concoct", > "raw_bytes":"[63 6f 6e 63 6f 63 74]", > "start":5, > "end":15, > "type":"word", > "position":1, > "positionHistory":[1]}, > { > "text":"solut", > "raw_bytes":"[73 6f 6c 75 74]", > "match":true, > "start":22, > "end":30, > "type":"word", > "position":4, > "positionHistory":[4]}, > { > "text":"trick", > "raw_bytes":"[74 72 69 63 6b]", > "match":true, > "start":31, > "end":37, > "type":"word", > "position":5, > "positionHistory":[5]}]], > "query":[ > > "org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter",[{ > "text":"solut", > "raw_bytes":"[73 6f 6c 75 74]", > "start":7, > "end":15, > "type":"word", > "position":2, > "positionHistory":[2]}, > { > "text":"trick", > "raw_bytes":"[74 72 69 63 6b]", > "start":16, > "end":22, > "type":"word", > "position":3, > "positionHistory":[3]}]]}}}} > JSON from the standard field (using the solr.TextField) which uses the > analyzer from the following built-in tokenizers/tokenfilters in the following > order. > solr.HTMLStripCharFilterFactory > solr.WhitespaceTokenizerFactory > solr.StopFilterFactory > solr.WordDelimiterFilterFactory > solr.ICUFoldingFilterFactory > solr.EnglishPossessiveFilterFactory > solr.PorterStemFilterFactory > { > "responseHeader":{ > "status":0, > "QTime":47}, > "analysis":{ > "field_types":{ > "text_en":{ > "index":[ > > "org.apache.lucene.analysis.charfilter.HTMLStripCharFilter","Concocting the a > Solution Tricks", > "org.apache.lucene.analysis.core.WhitespaceTokenizer",[{ > "text":"Concocting", > "raw_bytes":"[43 6f 6e 63 6f 63 74 69 6e 67]", > "start":0, > "end":10, > "position":1, > "positionHistory":[1], > "type":"word"}, > { > "text":"the", > "raw_bytes":"[74 68 65]", > "start":11, > "end":14, > "position":2, > "positionHistory":[2], > "type":"word"}, > { > "text":"a", > "raw_bytes":"[61]", > "start":15, > "end":16, > "position":3, > "positionHistory":[3], > "type":"word"}, > { > "text":"Solution", > "raw_bytes":"[53 6f 6c 75 74 69 6f 6e]", > "start":17, > "end":25, > "position":4, > "positionHistory":[4], > "type":"word"}, > { > "text":"Tricks", > "raw_bytes":"[54 72 69 63 6b 73]", > "start":26, > "end":32, > "position":5, > "positionHistory":[5], > "type":"word"}], > "org.apache.lucene.analysis.core.StopFilter",[{ > "text":"Concocting", > "raw_bytes":"[43 6f 6e 63 6f 63 74 69 6e 67]", > "position":1, > "positionHistory":[1, > 1], > "start":0, > "end":10, > "type":"word"}, > { > "text":"Solution", > "raw_bytes":"[53 6f 6c 75 74 69 6f 6e]", > "position":4, > "positionHistory":[4, > 4], > "start":17, > "end":25, > "type":"word"}, > { > "text":"Tricks", > "raw_bytes":"[54 72 69 63 6b 73]", > "position":5, > "positionHistory":[5, > 5], > "start":26, > "end":32, > "type":"word"}], > "org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter",[{ > "text":"Concocting", > "raw_bytes":"[43 6f 6e 63 6f 63 74 69 6e 67]", > "start":0, > "end":10, > "type":"word", > "position":1, > "positionHistory":[1, > 1, > 1]}, > { > "text":"Solution", > "raw_bytes":"[53 6f 6c 75 74 69 6f 6e]", > "start":17, > "end":25, > "type":"word", > "position":4, > "positionHistory":[4, > 4, > 4]}, > { > "text":"Tricks", > "raw_bytes":"[54 72 69 63 6b 73]", > "start":26, > "end":32, > "type":"word", > "position":5, > "positionHistory":[5, > 5, > 5]}], > "org.apache.lucene.analysis.icu.ICUFoldingFilter",[{ > "text":"concocting", > "raw_bytes":"[63 6f 6e 63 6f 63 74 69 6e 67]", > "position":1, > "positionHistory":[1, > 1, > 1, > 1], > "start":0, > "end":10, > "type":"word"}, > { > "text":"solution", > "raw_bytes":"[73 6f 6c 75 74 69 6f 6e]", > "position":4, > "positionHistory":[4, > 4, > 4, > 4], > "start":17, > "end":25, > "type":"word"}, > { > "text":"tricks", > "raw_bytes":"[74 72 69 63 6b 73]", > "position":5, > "positionHistory":[5, > 5, > 5, > 5], > "start":26, > "end":32, > "type":"word"}], > "org.apache.lucene.analysis.en.EnglishPossessiveFilter",[{ > "text":"concocting", > "raw_bytes":"[63 6f 6e 63 6f 63 74 69 6e 67]", > "position":1, > "positionHistory":[1, > 1, > 1, > 1, > 1], > "start":0, > "end":10, > "type":"word"}, > { > "text":"solution", > "raw_bytes":"[73 6f 6c 75 74 69 6f 6e]", > "position":4, > "positionHistory":[4, > 4, > 4, > 4, > 4], > "start":17, > "end":25, > "type":"word"}, > { > "text":"tricks", > "raw_bytes":"[74 72 69 63 6b 73]", > "position":5, > "positionHistory":[5, > 5, > 5, > 5, > 5], > "start":26, > "end":32, > "type":"word"}], > "org.apache.lucene.analysis.en.PorterStemFilter",[{ > "text":"concoct", > "raw_bytes":"[63 6f 6e 63 6f 63 74]", > > "org.apache.lucene.analysis.tokenattributes.KeywordAttribute#keyword":false, > "position":1, > "positionHistory":[1, > 1, > 1, > 1, > 1, > 1], > "start":0, > "end":10, > "type":"word"}, > { > "text":"solut", > "raw_bytes":"[73 6f 6c 75 74]", > "match":true, > > "org.apache.lucene.analysis.tokenattributes.KeywordAttribute#keyword":false, > "position":4, > "positionHistory":[4, > 4, > 4, > 4, > 4, > 4], > "start":17, > "end":25, > "type":"word"}, > { > "text":"trick", > "raw_bytes":"[74 72 69 63 6b]", > "match":true, > > "org.apache.lucene.analysis.tokenattributes.KeywordAttribute#keyword":false, > "position":5, > "positionHistory":[5, > 5, > 5, > 5, > 5, > 5], > "start":26, > "end":32, > "type":"word"}]], > "query":[ > "org.apache.lucene.analysis.charfilter.HTMLStripCharFilter","a > Solution Tricks", > "org.apache.lucene.analysis.core.WhitespaceTokenizer",[{ > "text":"a", > "raw_bytes":"[61]", > "start":0, > "end":1, > "position":1, > "positionHistory":[1], > "type":"word"}, > { > "text":"Solution", > "raw_bytes":"[53 6f 6c 75 74 69 6f 6e]", > "start":2, > "end":10, > "position":2, > "positionHistory":[2], > "type":"word"}, > { > "text":"Tricks", > "raw_bytes":"[54 72 69 63 6b 73]", > "start":11, > "end":17, > "position":3, > "positionHistory":[3], > "type":"word"}], > "org.apache.lucene.analysis.core.StopFilter",[{ > "text":"Solution", > "raw_bytes":"[53 6f 6c 75 74 69 6f 6e]", > "position":2, > "positionHistory":[2, > 2], > "start":2, > "end":10, > "type":"word"}, > { > "text":"Tricks", > "raw_bytes":"[54 72 69 63 6b 73]", > "position":3, > "positionHistory":[3, > 3], > "start":11, > "end":17, > "type":"word"}], > "org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter",[{ > "text":"Solution", > "raw_bytes":"[53 6f 6c 75 74 69 6f 6e]", > "start":2, > "end":10, > "type":"word", > "position":2, > "positionHistory":[2, > 2, > 2]}, > { > "text":"Tricks", > "raw_bytes":"[54 72 69 63 6b 73]", > "start":11, > "end":17, > "type":"word", > "position":3, > "positionHistory":[3, > 3, > 3]}], > "org.apache.lucene.analysis.icu.ICUFoldingFilter",[{ > "text":"solution", > "raw_bytes":"[73 6f 6c 75 74 69 6f 6e]", > "position":2, > "positionHistory":[2, > 2, > 2, > 2], > "start":2, > "end":10, > "type":"word"}, > { > "text":"tricks", > "raw_bytes":"[74 72 69 63 6b 73]", > "position":3, > "positionHistory":[3, > 3, > 3, > 3], > "start":11, > "end":17, > "type":"word"}], > "org.apache.lucene.analysis.en.EnglishPossessiveFilter",[{ > "text":"solution", > "raw_bytes":"[73 6f 6c 75 74 69 6f 6e]", > "position":2, > "positionHistory":[2, > 2, > 2, > 2, > 2], > "start":2, > "end":10, > "type":"word"}, > { > "text":"tricks", > "raw_bytes":"[74 72 69 63 6b 73]", > "position":3, > "positionHistory":[3, > 3, > 3, > 3, > 3], > "start":11, > "end":17, > "type":"word"}], > "org.apache.lucene.analysis.en.PorterStemFilter",[{ > "text":"solut", > "raw_bytes":"[73 6f 6c 75 74]", > > "org.apache.lucene.analysis.tokenattributes.KeywordAttribute#keyword":false, > "position":2, > "positionHistory":[2, > 2, > 2, > 2, > 2, > 2], > "start":2, > "end":10, > "type":"word"}, > { > "text":"trick", > "raw_bytes":"[74 72 69 63 6b]", > > "org.apache.lucene.analysis.tokenattributes.KeywordAttribute#keyword":false, > "position":3, > "positionHistory":[3, > 3, > 3, > 3, > 3, > 3], > "start":11, > "end":17, > "type":"word"}]]}}, > "field_names":{}}} > The latter json does show up correctly in the analysis page whereas the > former doesn't show up correctly. > Especially if the text involved in analysis involves StopWords. -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org