I was rather hoping for something smaller! One suggestion from a glance is that you're using some analyzer somewhere but building a BooleanQuery out of a TermQuery or two. Are you sure (test it and prove it) that the strings you pass to the TermQuery are EXACTLY what has been indexed?
-- Ian. On Wed, Aug 14, 2013 at 3:29 PM, Ankit Murarka <ankit.mura...@rancoretech.com> wrote: > Hello. The problem is as follows: > > I have a document containing information in lines. So I am indexing all > files line by line. > So If I say in my document I have, > INSIDE POST OF SERVER\ > and in my index file created I have, > INSIDE POST OF SERVER\ > > and I fire a boolean query with INSIDE and POST with MUST/MUST, I am getting > no HIT. > > I am providing the complete CODE I am using to create INDEX and TO > SEARCH..Both are drawn from sample code present online. > > /*INDEX CODE: > */ > package org.RunAllQueriesWithLineByLinePhrases; > > public class CreateIndex { > public static void main(String[] args) { > String indexPath = "D:\\INDEXFORQUERY"; //Place where indexes will be > created > String docsPath="Indexed"; //Place where the files are kept. > boolean create=true; > final File docDir = new File(docsPath); > if (!docDir.exists() || !docDir.canRead()) { > System.exit(1); > } > try { > Directory dir = FSDirectory.open(new File(indexPath)); > Analyzer analyzer=new > CustomAnalyzerForCaseSensitive(Version.LUCENE_44); > IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, > analyzer); > if (create) { > iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); > } else { > System.out.println("Trying to set IWC mode to UPDATE...NOT > DESIRED.."); > } > IndexWriter writer = new IndexWriter(dir, iwc); > indexDocs(writer, docDir); > writer.close(); > } catch (IOException e) { > System.out.println(" caught a " + e.getClass() + > "\n with message: " + e.getMessage()); > } > } > static void indexDocs(IndexWriter writer, File file) > throws IOException { > if (file.canRead()) > { > if (file.isDirectory()) { > String[] files = file.list(); > if (files != null) { > for (int i = 0; i < files.length; i++) { > if(files[i]!=null) > indexDocs(writer, new File(file, files[i])); > } > } > } else { > try { > Document doc = new Document(); > Field pathField = new StringField("path", file.getPath(), > Field.Store.YES); > doc.add(pathField); > doc.add(new LongField("modified", file.lastModified(), > Field.Store.NO)); > LineNumberReader lnr=new LineNumberReader(new FileReader(file)); > String line=null; > while( null != (line = lnr.readLine()) ){ > doc.add(new StringField("contents",line,Field.Store.YES)); > } > if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { > writer.addDocument(doc); > } else { > writer.updateDocument(new Term("path", file.getPath()), doc); > } > } finally { > } > } > } > } } > > /*SEARCHING CODE:-*/ > > package org.RunAllQueriesWithLineByLinePhrases; > > public class SearchFORALLQUERIES { > public static void main(String[] args) throws Exception { > > String[] argument=new String[20]; > argument[0]="-index"; > argument[1]="D:\\INDEXFORQUERY"; > argument[2]="-field"; > argument[3]="contents"; //field value > argument[4]="-repeat"; > argument[5]="2"; //repeat value > argument[6]="-raw"; > argument[7]="-paging"; > argument[8]="300"; //paging value > > String index = "index"; > String field = "contents"; > String queries = null; > int repeat = 0; > boolean raw = false; > String queryString = null; > int hitsPerPage = 10; > > for(int i = 0;i < argument.length;i++) { > if ("-index".equals(argument[i])) { > index = argument[i+1]; > i++; > } else if ("-field".equals(argument[i])) { > field = argument[i+1]; > i++; > } else if ("-queries".equals(argument[i])) { > queries = argument[i+1]; > i++; > } else if ("-query".equals(argument[i])) { > queryString = argument[i+1]; > i++; > } else if ("-repeat".equals(argument[i])) { > repeat = Integer.parseInt(argument[i+1]); > i++; > } else if ("-raw".equals(argument[i])) { > raw = true; //set it true to just display the count. If false then > it also display file name. > } else if ("-paging".equals(argument[i])) { > hitsPerPage = Integer.parseInt(argument[i+1]); > if (hitsPerPage <= 0) { > System.err.println("There must be at least 1 hit per page."); > System.exit(1); > } > i++; > } > } > System.out.println("processing input"); > IndexReader reader = DirectoryReader.open(FSDirectory.open(new > File(index))); //location where indexes are. > IndexSearcher searcher = new IndexSearcher(reader); > BufferedReader in = null; > if (queries != null) { > in = new BufferedReader(new InputStreamReader(new > FileInputStream(queries), "UTF-8")); //provide query as input > } else { > in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); > } > while (true) { > if (queries == null && queryString == null) { // > prompt the user > System.out.println("Enter query: "); //if query is not present, > prompt the user to enter query. > } > String line = queryString != null ? queryString : in.readLine(); > > if (line == null || line.length() == -1) { > break; > } > line = line.trim(); > if (line.length() == 0) { > break; > } > String[] str=line.split(" "); > System.out.println("queries are " + str[0] + " and is " + str[1]); > Query query1 = new TermQuery(new Term(field, str[0])); > Query query2=new TermQuery(new Term(field,str[1])); > BooleanQuery booleanQuery = new BooleanQuery(); > booleanQuery.add(query1, BooleanClause.Occur.MUST); > booleanQuery.add(query2, BooleanClause.Occur.MUST); > if (repeat > 0) { //repeat=2 // repeat & > time as benchmark > Date start = new Date(); > for (int i = 0; i < repeat; i++) { > searcher.search(booleanQuery, null, 100); > } > Date end = new Date(); > System.out.println("Time: "+(end.getTime()-start.getTime())+"ms"); > } > doPagingSearch(in, searcher, booleanQuery, hitsPerPage, raw, queries > == null && queryString == null); > if (queryString != null) { > break; > } > } > reader.close(); > } > public static void doPagingSearch(BufferedReader in, IndexSearcher > searcher, Query query, > int hitsPerPage, boolean raw, boolean > interactive) throws IOException { > TopDocs results = searcher.search(query, 5 * hitsPerPage); > ScoreDoc[] hits = results.scoreDocs; > int numTotalHits = results.totalHits; > System.out.println(numTotalHits + " total matching documents"); > int start = 0; > int end = Math.min(numTotalHits, hitsPerPage); > while (true) { > if (end > hits.length) { > System.out.println("Only results 1 - " + hits.length +" of " + > numTotalHits + " total matching documents collected."); > System.out.println("Collect more (y/n) ?"); > String line = in.readLine(); > if (line.length() == 0 || line.charAt(0) == 'n') { > break; > } > hits = searcher.search(query, numTotalHits).scoreDocs; > } > end = Math.min(hits.length, start + hitsPerPage); //3 and 5. > for (int i = start; i < end; i++) { //0 to 3. > if (raw) { > > System.out.println("doc="+hits[i].doc+" score="+hits[i].score); > } > Document doc = searcher.doc(hits[i].doc); > List<IndexableField> filed=doc.getFields(); > filed.size(); > String path = doc.get("path"); > if (path != null) { > System.out.println((i+1) + ". " + path); > String title = doc.get("title"); > if (title != null) { > System.out.println(" Title: " + doc.get("title")); > } > } else { > System.out.println((i+1) + ". " + "No path for this document"); > } > } > if (!interactive || end == 0) { > break; > } > if (numTotalHits >= end) { > boolean quit = false; > while (true) { > System.out.print("Press "); > if (start - hitsPerPage >= 0) { > System.out.print("(p)revious page, "); > } > if (start + hitsPerPage < numTotalHits) { > System.out.print("(n)ext page, "); > } > System.out.println("(q)uit or enter number to jump to a page."); > String line = in.readLine(); > if (line.length() == 0 || line.charAt(0)=='q') { > quit = true; > break; > } > if (line.charAt(0) == 'p') { > start = Math.max(0, start - hitsPerPage); > break; > } else if (line.charAt(0) == 'n') { > if (start + hitsPerPage < numTotalHits) { > start+=hitsPerPage; > } > break; > } else { > int page = Integer.parseInt(line); > if ((page - 1) * hitsPerPage < numTotalHits) { > start = (page - 1) * hitsPerPage; > break; > } else { > System.out.println("No such page"); > } > } > } > if (quit) break; > end = Math.min(numTotalHits, start + hitsPerPage); > } > } > } > } > > /*CUSTOM ANALYZER CODE:*/ > > package com.rancore.demo; > > import java.io.IOException; > import java.io.Reader; > > import org.apache.lucene.analysis.TokenStream; > import org.apache.lucene.analysis.core.StopAnalyzer; > import org.apache.lucene.analysis.core.StopFilter; > import org.apache.lucene.analysis.standard.StandardFilter; > import org.apache.lucene.analysis.standard.StandardTokenizer; > import org.apache.lucene.analysis.util.CharArraySet; > import org.apache.lucene.analysis.util.StopwordAnalyzerBase; > import org.apache.lucene.util.Version; > > public class CustomAnalyzerForCaseSensitive extends StopwordAnalyzerBase { > > public static final int DEFAULT_MAX_TOKEN_LENGTH = 255; > private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH; > public static final CharArraySet STOP_WORDS_SET = > StopAnalyzer.ENGLISH_STOP_WORDS_SET; > public CustomAnalyzerForCaseSensitive(Version matchVersion, > CharArraySet stopWords) { > super(matchVersion, stopWords); > } > public CustomAnalyzerForCaseSensitive(Version matchVersion) { > this(matchVersion, STOP_WORDS_SET); > } > public CustomAnalyzerForCaseSensitive(Version matchVersion, Reader > stopwords) throws IOException { > this(matchVersion, loadStopwordSet(stopwords, matchVersion)); > } > public void setMaxTokenLength(int length) { > maxTokenLength = length; > } > /** > * @see #setMaxTokenLength > */ > public int getMaxTokenLength() { > return maxTokenLength; > } > @Override > protected TokenStreamComponents createComponents(final String fieldName, > final Reader reader) { > final StandardTokenizer src = new StandardTokenizer(matchVersion, > reader); > src.setMaxTokenLength(maxTokenLength); > TokenStream tok = new StandardFilter(matchVersion, src); > // tok = new LowerCaseFilter(matchVersion, tok); > tok = new StopFilter(matchVersion, tok, stopwords); > return new TokenStreamComponents(src, tok) { > @Override > protected void setReader(final Reader reader) throws > IOException { > > src.setMaxTokenLength(CustomAnalyzerForCaseSensitive.this.maxTokenLength); > super.setReader(reader); > } > }; > } > } > > > > I HOPE I HAVE GIVEN THE COMPLETE CODE SAMPLE FOR PEOPLE TO WORK ON.. > > PLEASE GUIDE ME NOW: IN case any further information is required please let > me know. > > > On 8/14/2013 7:43 PM, Ian Lea wrote: >> >> Well, you have supplied a bit more info - good - but I still can't >> spot the problem. Unless someone else can I suggest you post a very >> small self-contained program that demonstrates the problem. >> >> >> -- >> Ian. >> >> >> On Wed, Aug 14, 2013 at 2:50 PM, Ankit Murarka >> <ankit.mura...@rancoretech.com> wrote: >> >>> >>> Hello. >>> The problem does not seem to be getting solved. >>> >>> As mentioned, I am indexing each line of each file. >>> The sample text present inside LUKE is >>> >>> <am name="notification" value="10"/>\ >>> <type="DE">\ >>> java.lang.Thread.run(Thread.java:619) >>> >>>>> >>>>> Size of list array::0\ >>>>> >>> >>> at java.lang.reflect.Method.invoke(Method.java:597) >>> org.com.dummy,INFO,<< Still figuring out how to run >>> >>>>> >>>>> ,SERVER,100.100.100.100:8080,EXCEPTION,10613349 >>>>> >>> >>> INSIDE POST OF Listener\ >>> >>> In my Luke, I can see the text as "INSIDE POST OF Listener" .. This is >>> present in many files. >>> >>> /*Query is +contents:INSIDE contents:POST */ --/The field >>> name >>> is contents. Same analyzer is being used. This is a boolean query./ >>> >>> To test, I indexed only 20 files. In 19 files, this is present. >>> >>> The boolean query should give me a hit for this document. >>> >>> BUT IT IS RETURNING ME NO HIT.. >>> >>> If I index the same files WITHOUT line by line then, it gives me proper >>> hits.. >>> >>> But for me it should work on Indexes created by Line by Line parsing >>> also. >>> >>> Please guide. >>> >>> >>> >>> >>> >>> On 8/13/2013 4:41 PM, Ian Lea wrote: >>> >>>> >>>> remedialaction != "remedial action"? >>>> >>>> Show us your query. Show a small self-contained sample program or >>>> test case that demonstrates the problem. You need to give us >>>> something more to go on. >>>> >>>> >>>> -- >>>> Ian. >>>> >>>> >>>> On Tue, Aug 13, 2013 at 11:13 AM, Ankit Murarka >>>> <ankit.mura...@rancoretech.com> wrote: >>>> >>>> >>>>> >>>>> Hello, >>>>> I am aware of that link and I have been through that link >>>>> many >>>>> number of times. >>>>> >>>>> Problem I have is: >>>>> >>>>> 1. Each line is indexed. So indexed line looks something like >>>>> "<attribute >>>>> name="remedial action" value="Checking"/>\" >>>>> 2. I am easily firing a phrase query on this line. It suggest me the >>>>> possible values. No problem,. >>>>> 3. If I fire a Boolean Query with "remedialaction" and "Checking" as a >>>>> must/must , then it is not providing me this document as a hit. >>>>> 4. I am using StandardAnalyzer both during the indexing and searching >>>>> time. >>>>> >>>>> >>>>> On 8/13/2013 2:31 PM, Ian Lea wrote: >>>>> >>>>> >>>>>> >>>>>> Should be straightforward enough. Work through the tips in the FAQ >>>>>> entry at >>>>>> >>>>>> >>>>>> http://wiki.apache.org/lucene-java/LuceneFAQ#Why_am_I_getting_no_hits_.2F_incorrect_hits.3F >>>>>> and post back if that doesn't help, with details of how you are >>>>>> analyzing the data and how you are searching. >>>>>> >>>>>> >>>>>> -- >>>>>> Ian. >>>>>> >>>>>> >>>>>> On Tue, Aug 13, 2013 at 8:56 AM, Ankit Murarka >>>>>> <ankit.mura...@rancoretech.com> wrote: >>>>>> >>>>>> >>>>>> >>>>>>> >>>>>>> Hello All, >>>>>>> I have 2 different usecases. >>>>>>> I am trying to provide both boolean query and phrase search query in >>>>>>> the >>>>>>> application. >>>>>>> >>>>>>> In every line of the document which I am indexing I have content like >>>>>>> : >>>>>>> >>>>>>> <attribute name="remedial action" value="Checking"/>\ >>>>>>> >>>>>>> Due to the phrase search requirement, I am indexing each line of the >>>>>>> file >>>>>>> as >>>>>>> a new document. >>>>>>> >>>>>>> Now when I am trying to do a phrase query (Did you Mean, Infix >>>>>>> Analyzer >>>>>>> etc, >>>>>>> or phrase suggest) this seems to work fine and provide me with >>>>>>> desired >>>>>>> suggestions. >>>>>>> >>>>>>> Problem is : >>>>>>> >>>>>>> How do I invoke boolean query for this. I mean when I verified the >>>>>>> indexes >>>>>>> in Luke, I saw the whole line as expected is indexed. >>>>>>> >>>>>>> So, if user wish to perform a boolean query say suppose containing >>>>>>> "remedialaction" and "Checking" how do I get this document as a hit. >>>>>>> I >>>>>>> believe since I am indexing each line, this seems to be bit tricky. >>>>>>> >>>>>>> Please guide. >>>>>>> >>>>>>> -- >>>>>>> Regards >>>>>>> >>>>>>> Ankit >>>>>>> >>>>>>> >>>>>>> --------------------------------------------------------------------- >>>>>>> To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org >>>>>>> For additional commands, e-mail: java-user-h...@lucene.apache.org >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>> >>>>>> --------------------------------------------------------------------- >>>>>> To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org >>>>>> For additional commands, e-mail: java-user-h...@lucene.apache.org >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> >>>>> >>>>> >>>>> >>>>> -- >>>>> Regards >>>>> >>>>> Ankit Murarka >>>>> >>>>> "What lies behind us and what lies before us are tiny matters compared >>>>> with >>>>> what lies within us" >>>>> >>>>> >>>>> --------------------------------------------------------------------- >>>>> To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org >>>>> For additional commands, e-mail: java-user-h...@lucene.apache.org >>>>> >>>>> >>>>> >>>> >>>> --------------------------------------------------------------------- >>>> To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org >>>> For additional commands, e-mail: java-user-h...@lucene.apache.org >>>> >>>> >>>> >>>> >>> >>> >>> >>> -- >>> Regards >>> >>> Ankit Murarka >>> >>> "What lies behind us and what lies before us are tiny matters compared >>> with >>> what lies within us" >>> >>> >> >> --------------------------------------------------------------------- >> To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org >> For additional commands, e-mail: java-user-h...@lucene.apache.org >> >> >> > > > > -- > Regards > > Ankit Murarka > > "What lies behind us and what lies before us are tiny matters compared with > what lies within us" > --------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org For additional commands, e-mail: java-user-h...@lucene.apache.org