Revision: 7171
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7171&view=rev
Author:   dnaber
Date:     2012-06-02 14:41:57 +0000 (Sat, 02 Jun 2012)
Log Message:
-----------
index checking: return the number of sentences actually checked

Modified Paths:
--------------
    trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java
    
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java

Added Paths:
-----------
    trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java

Modified: trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java
===================================================================
--- trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java        
2012-06-02 14:19:35 UTC (rev 7170)
+++ trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java        
2012-06-02 14:41:57 UTC (rev 7171)
@@ -63,7 +63,7 @@
     this.maxHits = maxHits;
   }
 
-  public List<MatchingSentence> findRuleMatchesOnIndex(PatternRule rule, 
Language language, File indexDir) throws IOException {
+  public SearcherResult findRuleMatchesOnIndex(PatternRule rule, Language 
language, File indexDir) throws IOException {
     final IndexSearcher indexSearcher = new 
IndexSearcher(FSDirectory.open(indexDir));
     try {
       return findRuleMatchesOnIndex(rule, language, indexSearcher);
@@ -72,12 +72,13 @@
     }
   }
 
-  public List<MatchingSentence> findRuleMatchesOnIndex(PatternRule rule, 
Language language, IndexSearcher indexSearcher) throws IOException {
-    final Query query = createQuery(rule);
-    final TopDocs topDocs = indexSearcher.search(query, maxHits);
+  public SearcherResult findRuleMatchesOnIndex(PatternRule rule, Language 
language, IndexSearcher indexSearcher) throws IOException {
+    final PossiblyRelaxedQuery query = createQuery(rule);
+    final TopDocs topDocs = indexSearcher.search(query.query, maxHits);
     final JLanguageTool languageTool = getLanguageToolWithOneRule(language, 
rule);
     final List<MatchingSentence> matchingSentences = 
findMatchingSentences(indexSearcher, topDocs, languageTool);
-    return matchingSentences;
+    final int sentencesChecked = getSentenceCheckCount(query, topDocs, 
indexSearcher);
+    return new SearcherResult(matchingSentences, sentencesChecked);
   }
 
   PatternRule getRuleById(String ruleId, File xmlRuleFile) throws IOException {
@@ -91,6 +92,20 @@
     throw new PatternRuleNotFoundException(ruleId, xmlRuleFile);
   }
 
+  private int getSentenceCheckCount(PossiblyRelaxedQuery query, TopDocs 
topDocs, IndexSearcher indexSearcher) {
+    final int sentencesChecked;
+    final int indexSize = indexSearcher.getIndexReader().numDocs();
+    if (query.isRelaxed) {
+      // unsupported rules: the number of documents we really ran LT on:
+      sentencesChecked = Math.min(maxHits, topDocs.totalHits);
+    } else {
+      // supported rules: no need to run LT (other than getting the exact 
match position), so we can claim
+      // that we really have checked all the sentences in the index:
+      sentencesChecked = indexSize;
+    }
+    return sentencesChecked;
+  }
+
   private List<MatchingSentence> findMatchingSentences(IndexSearcher 
indexSearcher, TopDocs topDocs, JLanguageTool languageTool) throws IOException {
     final List<MatchingSentence> matchingSentences = new 
ArrayList<MatchingSentence>();
     for (ScoreDoc match : topDocs.scoreDocs) {
@@ -105,15 +120,18 @@
     return matchingSentences;
   }
 
-  private Query createQuery(PatternRule rule) {
+  private PossiblyRelaxedQuery createQuery(PatternRule rule) {
     final PatternRuleQueryBuilder patternRuleQueryBuilder = new 
PatternRuleQueryBuilder();
     Query query;
+    boolean relaxed;
     try {
       query = patternRuleQueryBuilder.buildQuery(rule);
+      relaxed = false;
     } catch (UnsupportedPatternRuleException e) {
       query = patternRuleQueryBuilder.buildPossiblyRelaxedQuery(rule);
+      relaxed = true;
     }
-    return query;
+    return new PossiblyRelaxedQuery(query, relaxed);
   }
 
   private JLanguageTool getLanguageToolWithOneRule(Language lang, PatternRule 
patternRule) throws IOException {
@@ -125,6 +143,17 @@
     return langTool;
   }
 
+  class PossiblyRelaxedQuery {
+
+    Query query;
+    boolean isRelaxed;
+
+    PossiblyRelaxedQuery(Query query, boolean relaxed) {
+      this.query = query;
+      isRelaxed = relaxed;
+    }
+  }
+
   private static void ensureCorrectUsageOrExit(String[] args) {
     if (args.length != 4) {
       System.err.println("Usage: Searcher <ruleId> <ruleXML> <languageCode> 
<indexDir>");
@@ -148,9 +177,9 @@
     }
     final File indexDir = new File(args[3]);
     final PatternRule rule = searcher.getRuleById(ruleId, ruleFile);
-    final List<MatchingSentence> ruleMatchesOnIndex = 
searcher.findRuleMatchesOnIndex(rule, language, indexDir);
+    final SearcherResult searcherResult = 
searcher.findRuleMatchesOnIndex(rule, language, indexDir);
     int i = 1;
-    for (MatchingSentence ruleMatch : ruleMatchesOnIndex) {
+    for (MatchingSentence ruleMatch : searcherResult.getMatchingSentences()) {
       System.out.println(i + ": " + ruleMatch.getSentence());
       i++;
     }

Added: 
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java
===================================================================
--- trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java  
                        (rev 0)
+++ trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java  
2012-06-02 14:41:57 UTC (rev 7171)
@@ -0,0 +1,43 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.dev.index;
+
+import java.util.List;
+
+/**
+ * The result of a {@link Searcher}.
+ */
+public class SearcherResult {
+
+  private final List<MatchingSentence> matchingSentences;
+  private final int checkedSentences;
+
+  public SearcherResult(List<MatchingSentence> matchingSentences, int 
checkedSentences) {
+    this.matchingSentences = matchingSentences;
+    this.checkedSentences = checkedSentences;
+  }
+
+  public List<MatchingSentence> getMatchingSentences() {
+    return matchingSentences;
+  }
+
+  public int getCheckedSentences() {
+    return checkedSentences;
+  }
+}

Modified: 
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java
    2012-06-02 14:19:35 UTC (rev 7170)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java
    2012-06-02 14:41:57 UTC (rev 7171)
@@ -43,11 +43,6 @@
   @Override
   public void setUp() throws Exception {
     super.setUp();
-    directory = newDirectory();
-    // Note that the second sentence ends with "lid" instead of "lids" (the 
inflated one)
-    final String content = "How to move back and fourth from linux to xmb? 
Calcium deposits on eye lid.";
-    Indexer.run(content, directory, Language.ENGLISH, false);
-    searcher = new IndexSearcher(directory);
   }
 
   @Override
@@ -60,15 +55,20 @@
   }
 
   public void testIndexerSearcher() throws Exception {
-    List<MatchingSentence> matchingSentences =
+    // Note that the second sentence ends with "lid" instead of "lids" (the 
inflated one)
+    createIndex("How to move back and fourth from linux to xmb? Calcium 
deposits on eye lid.");
+    SearcherResult searcherResult =
             errorSearcher.findRuleMatchesOnIndex(getRule("BACK_AND_FOURTH"), 
Language.ENGLISH, searcher);
-    assertEquals(1, matchingSentences.size());
+    assertEquals(2, searcherResult.getCheckedSentences());
+    assertEquals(1, searcherResult.getMatchingSentences().size());
 
-    matchingSentences = 
errorSearcher.findRuleMatchesOnIndex(getRule("EYE_BROW"), Language.ENGLISH, 
searcher);
-    assertEquals(1, matchingSentences.size());
+    searcherResult = errorSearcher.findRuleMatchesOnIndex(getRule("EYE_BROW"), 
Language.ENGLISH, searcher);
+    assertEquals(1, searcherResult.getCheckedSentences());
+    assertEquals(1, searcherResult.getMatchingSentences().size());
 
-    matchingSentences = 
errorSearcher.findRuleMatchesOnIndex(getRule("ALL_OVER_THE_WORD"), 
Language.ENGLISH, searcher);
-    assertEquals(0, matchingSentences.size());
+    searcherResult = 
errorSearcher.findRuleMatchesOnIndex(getRule("ALL_OVER_THE_WORD"), 
Language.ENGLISH, searcher);
+    assertEquals(2, searcherResult.getCheckedSentences());
+    assertEquals(0, searcherResult.getMatchingSentences().size());
 
     try {
       errorSearcher.findRuleMatchesOnIndex(getRule("Invalid Rule Id"), 
Language.ENGLISH, searcher);
@@ -81,6 +81,7 @@
   }
 
   public void testIndexerSearcherWithNewRule() throws Exception {
+    createIndex("How to move back and fourth from linux to xmb?");
     final Searcher errorSearcher = new Searcher();
     final List<Element> elements = Arrays.asList(
             new Element("move", false, false, false),
@@ -89,9 +90,10 @@
     final PatternRule rule1 = new PatternRule("RULE1", Language.ENGLISH, 
elements, "desc", "msg", "shortMsg");
     final IndexSearcher indexSearcher = new IndexSearcher(directory);
     try {
-      final List<MatchingSentence> matchingSentences = 
errorSearcher.findRuleMatchesOnIndex(rule1, Language.ENGLISH, indexSearcher);
-      assertEquals(1, matchingSentences.size());
-      final List<RuleMatch> ruleMatches = 
matchingSentences.get(0).getRuleMatches();
+      final SearcherResult searcherResult = 
errorSearcher.findRuleMatchesOnIndex(rule1, Language.ENGLISH, indexSearcher);
+      assertEquals(1, searcherResult.getCheckedSentences());
+      assertEquals(1, searcherResult.getMatchingSentences().size());
+      final List<RuleMatch> ruleMatches = 
searcherResult.getMatchingSentences().get(0).getRuleMatches();
       assertEquals(1, ruleMatches.size());
       final Rule rule = ruleMatches.get(0).getRule();
       assertEquals("RULE1", rule.getId());
@@ -100,4 +102,32 @@
     }
   }
 
+  /*public void testFoo() throws Exception {
+    createIndex("Daily Bleed's Anarchist Encyclopedia");
+    final List<Element> elements = Arrays.asList(
+            new Element("Bleed", false, false, false),
+            new Element("'", false, false, false),
+            new Element("s", false, false, false)
+    );
+    final PatternRule rule1 = new PatternRule("RULE1", Language.ENGLISH, 
elements, "desc", "msg", "shortMsg");
+    final IndexSearcher indexSearcher = new IndexSearcher(directory);
+    try {
+      final SearcherResult searcherResult = 
errorSearcher.findRuleMatchesOnIndex(rule1, Language.ENGLISH, indexSearcher);
+      assertEquals(1, searcherResult.getMatchingSentences().size());
+      final List<RuleMatch> ruleMatches = 
searcherResult.getMatchingSentences().get(0).getRuleMatches();
+      assertEquals(1, ruleMatches.size());
+      final Rule rule = ruleMatches.get(0).getRule();
+      assertEquals("RULE1", rule.getId());
+    } finally {
+      indexSearcher.close();
+    }
+  }*/
+
+  private void createIndex(String content) throws IOException {
+    directory = newDirectory();
+    //directory = FSDirectory.open(new File("/tmp/lucenetest"));
+    Indexer.run(content, directory, Language.ENGLISH, false);
+    searcher = new IndexSearcher(directory);
+  }
+
 }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to