RES: BrazilianAnalyzer don't woks with any BooleanQuery

Marcelo Neves Thu, 12 Jul 2012 11:39:19 -0700

Ok. I'm using positions at ANALYZED fields where search is by terms. The others 
fields, "NOT_ANALYZED", the search is by complete term, as culture code, url, 
document code.
The index has documents in three languages (Spanish, English and Portuguese 
(BR)). When perform a search, I realize filters using TermoQuery with fields 
"NOT_ANALYZED" and use PhraseQuery or PrefixQuery TermoQuery and added in a 
BoolenQuery with fields analyzed. The same code works with StandardAnalyzer. By 
using BrazilianAnalyzer most terms do not return a result of the stored 
documents with this analyzer.


In the code attached to "searchExpresion" is the phrase containing the words 
entered to search by the end user. I do search containing all the words, or any 
of the words or the whole phrase. This method makes my object "BooleanQuery" 
only for the content (full text).

Below is a sample of the settings of my columns to index. Some values ??are 
listed here with the names of the columns.
Below this code snippet, a snippet of the implementation of the search.

[code]
// Here is the configuration of the fields for each document
                document = new DocumentIndex();
                
document.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT, 
HtmlConvert.Convert(content), IndexField.ANALYZED, StoreField.YES, 
StoreTermOccurrencesField.WITHPOSITIONSOFFSETS)
                
.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML, 
HtmlConvert.Convert(nodeXml), IndexField.ANALYZED, StoreField.YES, 
StoreTermOccurrencesField.WITHPOSITIONSOFFSETS)
                .AddField(ColumnsIndexedDocuments.COLUMN_SITENAME, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_SITENAME], 
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES, 
StoreTermOccurrencesField.NO)
                .AddField(ColumnsIndexedDocuments.COLUMN_NODESITEID, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODESITEID], 
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES, 
StoreTermOccurrencesField.NO)
                .AddField(ColumnsIndexedDocuments.COLUMN_CLASSNAME, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_CLASSNAME], 
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES, 
StoreTermOccurrencesField.NO)
                .AddField(ColumnsIndexedDocuments.COLUMN_NODEID, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODEID], ""), 
IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
                .AddField(ColumnsIndexedDocuments.COLUMN_NODEACLID, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODEACLID], ""), 
IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
                .AddField(ColumnsIndexedDocuments.COLUMN_NODEALIASPATH, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODEALIASPATH], 
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES, 
StoreTermOccurrencesField.NO)
                .AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTNAMEPATH, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTNAMEPATH],
 "").ToLower(), IndexField.NOTANALYZED, StoreField.YES, 
StoreTermOccurrencesField.NO)
                .AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTURLPATH, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTURLPATH],
 "").ToLower(), IndexField.NOTANALYZED, StoreField.YES, 
StoreTermOccurrencesField.NO)
                
.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTMENUITEMHIDEINNAVIGATION, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTMENUITEMHIDEINNAVIGATION],
 "").ToLower(), IndexField.NOTANALYZED, StoreField.YES, 
StoreTermOccurrencesField.NO)
                .AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTCULTURE, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTCULTURE],
 "").ToLower(), IndexField.NOTANALYZED, StoreField.YES, 
StoreTermOccurrencesField.NO)
                .AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTNAME, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTNAME], 
""), IndexField.ANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
                .AddField(ColumnsIndexedDocuments.COLUMN_NODENAME, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODENAME], ""), 
IndexField.ANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
                .AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTTAGS, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTTAGS], 
""), IndexField.ANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
                
.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTWASPUBLISHFROM, 
DateTools.DateToString(ValidationHelper.GetDateTime(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTWASPUBLISHFROM],
 DataHelper.DATETIME_MIN_BD, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTCULTURE],
 "")), DateTools.Resolution.MILLISECOND), IndexField.NOTANALYZED, 
StoreField.YES, StoreTermOccurrencesField.NO)
                .AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTWASPUBLISHTO, 
DateTools.DateToString(ValidationHelper.GetDateTime(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTWASPUBLISHTO],
 DataHelper.DATETIME_MIN_BD, 
ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTCULTURE],
 "")), DateTools.Resolution.MILLISECOND), IndexField.NOTANALYZED, 
StoreField.YES, StoreTermOccurrencesField.NO);
                IndexManagerWriter.AddDocument(document, Analyzer);
[/code]

[code]
// Here is the conjunction of queries
                public override DataSet Search(string siteName, string 
searchNodePath, string cultureCode, string searchExpression, SearchModeEnum 
searchMode, bool searchChildNodes, string classNames, bool 
filterResultsByReadPermission, bool searchOnlyPublished, string whereCondition, 
string orderBy, bool combineWithDefaultCulture, string filterCondition, bool 
searchDocumentHistory, bool onlyDocumentHistory)
        {
            //Consulta booleana principal
            BooleanQuery queryMain = new BooleanQuery();

            //Filtra a expressão de busca 
            if (!string.IsNullOrEmpty(searchExpression))
            {
                BooleanQuery queryExpression = 
GetExpressionBooleanQuery(searchExpression, searchMode, searchDocumentHistory, 
onlyDocumentHistory);
                queryMain.Add(queryExpression, BooleanClause.Occur.MUST);
            }

            //Filtra os parâmetros
            BooleanQuery queryParameters = GetParametersBooleanQuery(siteName, 
searchNodePath, cultureCode, combineWithDefaultCulture, searchOnlyPublished, 
classNames);
            //queryMain.Add(queryParameters, BooleanClause.Occur.MUST);
            Filter filterParameters = new QueryFilter(queryParameters);

            //Define os analizadores
            Analyzer = GetAnalyzer(cultureCode);            

            //Cria o objeto de parse
            QueryParser queryParser = new 
QueryParser(Lucene.Net.Util.Version.LUCENE_29, 
ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT, Analyzer);

            //Abre o index para leitura
            
IndexManagerReader.OpenReader(SearchProvider.DirectoryPathReaderIndex);

            string[] columns = new string[] { 
ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT, 
ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML, 
ColumnsIndexedDocuments.COLUMN_DOCUMENTNAME, 
ColumnsIndexedDocuments.COLUMN_DOCUMENTNAMEPATH, 
ColumnsIndexedDocuments.COLUMN_DOCUMENTURLPATH, 
ColumnsIndexedDocuments.COLUMN_NODEALIASPATH, 
ColumnsIndexedDocuments.COLUMN_NODEID, 
ColumnsIndexedDocuments.COLUMN_NODESITEID, 
ColumnsIndexedDocuments.COLUMN_NODEACLID, 
ColumnsIndexedDocuments.COLUMN_SITENAME, 
ColumnsIndexedDocuments.COLUMN_CLASSNAME };
            string fieldHighlight = CMSContext.ViewMode == 
ViewModeEnum.LiveSite ? ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT : 
ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML;
            return 
IndexManagerReader.Search(queryParser.Parse(queryMain.ToString()), 
filterParameters, ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT, Analyzer, 
NumberMaxSearchResults, columns, fieldHighlight, searchExpression);
        }
[/code]

[code]
// Here is the method that receives and filters and query q performs abusca and 
treats the result
                public DataSet Search(Lucene.Net.Search.Query query, Filter 
filter, string mainColumnContentName, Analyzer analyzer, int top, string[] 
fieldsReturn, string fieldHighlightName, string searchExpression)
        {
            Lucene.Net.Store.Directory directory = FSDirectory.Open(new 
DirectoryInfo(directoryPath));
            IndexSearcher searcher = new IndexSearcher(directory, true);
            TopDocs topDocs = searcher.Search(query, filter, top);
            DataSet result = null;
            if (topDocs.scoreDocs != null && topDocs.scoreDocs.Length > 0)
            {
                result = new DataSet();
                DataTable docs = new DataTable("SearchResult");
                docs.Columns.Add("Index");
                for (int i = 0; i < fieldsReturn.Length; i++)
                {
                    if (fieldsReturn[i] == fieldHighlightName)
                    {
                        docs.Columns.Add(mainColumnContentName, typeof(string));
                    }
                    else
                    {
                        if (fieldsReturn[i] != fieldHighlightName && 
fieldsReturn[i] != mainColumnContentName)
                        {
                            docs.Columns.Add(fieldsReturn[i], typeof(string));
                        }
                    }
                }
                for (int j = 0; j < topDocs.scoreDocs.Length; j++)
                {   
                    Document document = searcher.Doc(topDocs.scoreDocs[j].doc); 
                   
                    List<string> values = new List<string>();
                    values.Add(topDocs.scoreDocs[j].doc.ToString());
                    for (int k = 0; k < fieldsReturn.Length; k++)
                    {
                        if (fieldsReturn[k] == fieldHighlightName)
                        {   
                            FastVectorHighlighter highlighter = new 
FastVectorHighlighter();
                            string fragment = 
highlighter.GetBestFragment(highlighter.GetFieldQuery(query), 
searcher.GetIndexReader(), topDocs.scoreDocs[j].doc, fieldHighlightName, 
NumberMaxCaractersHighlight);
                            values.Add(fragment);
                        }
                        else
                        {
                            if (fieldsReturn[k] == mainColumnContentName)
                                continue;
                            values.Add(document.Get(fieldsReturn[k]));
                        }
                    }
                    docs.Rows.Add(values.ToArray());
                }
                result.Tables.Add(docs);
            }
            return result;
        }
[/code]

It's working perfectly just to other analyzers. With BrazilianAnalyzer I search 
the term "ferramenta" and returns nothing. There are documents in "es-ES" and 
"en" in the index with this term. If I search by filtering the field 
"DocumentCulture" with value "es-es" (field of culture not_analyzed and text 
field [SnowBallAnalyzer ("Spanish")]) has documents. If I filter by "pt-BR" 
(field of culture not_analyzed and text field [BrazilianAnalyzer]) comes 
nothing.

Is it a bug in [BrazilianAnalyzer]?


-----Mensagem original-----
De: Simon Willnauer [mailto:simon.willna...@gmail.com] 
Enviada em: quinta-feira, 12 de julho de 2012 04:48
Para: java-user@lucene.apache.org
Assunto: Re: BrazilianAnalyzer don't woks with any BooleanQuery

can you tell us more about your index side of things? Are you using positions 
in the index since I see PhraseQuery in your code?
Where are you passing the text you are searching for to the BrasilianAnalyzer, 
I don't see it in your code. You need to process you text at search time too to 
get results.

simon

On Wed, Jul 11, 2012 at 5:32 PM, Marcelo Neves <marcelo.ne...@xgen.com.br>wrote:

> Hi all,****
>
> ** **
>
> I create a method above que generate my boolean query based in many 
> parameters. The query's on not analyzed fields works perfect in 
> debug.****
>
> When start a search using any analyzed field with BrazilianAnalyzer, 
> always a return empty result (zero docs). I do test in separeted 
> solution with a unique field with Brazilian Analyzer in indexing and 
> searching. If use a BooleanQuery and not queryparse, don't works as 
> expected. Return empty result.****
>
> When change for StandardAnalyzer on Indexing and Serching, without 
> alter other parts of code, works. ****
>
> ** **
>
> Could anyone help me?****
>
> [Code]****
>
> ** **
>
> protected virtual BooleanQuery GetExpressionBooleanQuery(string 
> searchExpression, SearchModeEnum searchMode, bool 
> searchDocumentHistory, bool onlyDocumentHistory)****
>
> {****
>
>                 if (string.IsNullOrEmpty(searchExpression))****
>
>                 {****
>
>                                return null;****
>
>                 }****
>
> ** **
>
>                 BooleanQuery queryWords = new BooleanQuery();****
>
>                 PhraseQuery phrase = new PhraseQuery();****
>
>                 string[] expressions = null;****
>
> ** **
>
>                 expressions = searchExpression.ToLower().Replace("\"",
> "").Split(' ');****
>
>                 for (int i = 0; i < expressions.Length; i++)****
>
>                 {****
>
>                                if
> (!string.IsNullOrEmpty(expressions[i].Trim()))****
>
>                                {****
>
>                                                if (expressions.Length 
> > 1)
> ****
>
>                                                {****
>
>                                                                switch
> (searchMode)****
>
>                                                                {****
>
>
> case SearchModeEnum.ExactPhrase:****
>
>
> if (!onlyDocumentHistory)****
>
>
> {****
>
>
> phrase.Add(new Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
> expressions[i].Trim()), i);****
>
>
> }****
>
>
> if (searchDocumentHistory)****
>
>
> {****
>
>
> phrase.Add(new 
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML,
> expressions[i].Trim()), i);****
>
>
> }****
>
>
> break;****
>
>
> case SearchModeEnum.AnyWord:****
>
>
> if (!onlyDocumentHistory)****
>
>
> {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new 
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
> expressions[i].Trim())), BooleanClause.Occur.SHOULD));****
>
>
> }****
>
>
> if (searchDocumentHistory)****
>
>
> {****
>
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new 
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML,
> expressions[i].Trim())), BooleanClause.Occur.SHOULD));****
>
>
> }****
>
>
> break;****
>
>
> case SearchModeEnum.AllWords:****
>
>
> if (!onlyDocumentHistory)****
>
>
> {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new 
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
> expressions[i].Trim())), BooleanClause.Occur.MUST));****
>
>
> }****
>
>
> if (searchDocumentHistory)****
>
>
> {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new 
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML,
> expressions[i].Trim())), BooleanClause.Occur.MUST));****
>
>
> }****
>
>
> break;****
>
>
> default:****
>
>
> break;****
>
>                                                                }****
>
>                                                }****
>
>                                                else****
>
>                                                {****
>
>                                                                if
> (!onlyDocumentHistory)****
>
>                                                                {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new 
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
> expressions[i].Trim())), BooleanClause.Occur.MUST));****
>
>                                                                }****
>
>                                                                if
> (searchDocumentHistory)****
>
>                                                                {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new 
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML,
> expressions[i].Trim())), BooleanClause.Occur.MUST));****
>
>                                                                }****
>
>                                                }****
>
>                                }****
>
>                 }****
>
>                 if (phrase.GetTerms().Length > 0)****
>
>                 {****
>
>                                queryWords.Add(new 
> BooleanClause(phrase,
> BooleanClause.Occur.MUST));****
>
>                 }****
>
>                 return queryWords;****
>
> }****
>
> ** **
>
> [/Code]****
>
> ** **
>
> I wait for help. Please!****
>
> ** **
>
> Thanks!****
>
> ** **
>
> [image: Descrição: marcelo-neves]****
>
> ** **
>



---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org

RES: BrazilianAnalyzer don't woks with any BooleanQuery

Reply via email to