Revision: 7274
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7274&view=rev
Author:   dnaber
Date:     2012-06-06 22:26:49 +0000 (Wed, 06 Jun 2012)
Log Message:
-----------
corpus search: limit search time (some queries with regex and negation might 
take very long and cause OOM)

Modified Paths:
--------------
    trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java
    trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java
    
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java

Modified: trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java
===================================================================
--- trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java        
2012-06-06 22:22:25 UTC (rev 7273)
+++ trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java        
2012-06-06 22:26:49 UTC (rev 7274)
@@ -45,13 +45,21 @@
  */
 public class Searcher {
 
-  private static final int DEFAULT_MAX_HITS = 1000;
+  private int maxHits = 1000;
+  private int maxSearchTimeMillis = 5000;
 
-  private int maxHits = DEFAULT_MAX_HITS;
-
   public Searcher() {
   }
 
+  public SearcherResult findRuleMatchesOnIndex(PatternRule rule, Language 
language, File indexDir) throws IOException {
+    final IndexSearcher indexSearcher = new 
IndexSearcher(FSDirectory.open(indexDir));
+    try {
+      return findRuleMatchesOnIndex(rule, language, indexSearcher);
+    } finally {
+      indexSearcher.close();
+    }
+  }
+
   public int getMaxHits() {
     return maxHits;
   }
@@ -60,28 +68,41 @@
     this.maxHits = maxHits;
   }
 
-  public SearcherResult findRuleMatchesOnIndex(PatternRule rule, Language 
language, File indexDir) throws IOException {
-    final IndexSearcher indexSearcher = new 
IndexSearcher(FSDirectory.open(indexDir));
-    try {
-      return findRuleMatchesOnIndex(rule, language, indexSearcher);
-    } finally {
-      indexSearcher.close();
-    }
+  public int getMaxSearchTimeMillis() {
+    return maxSearchTimeMillis;
   }
 
+  public void setMaxSearchTimeMillis(int maxSearchTimeMillis) {
+    this.maxSearchTimeMillis = maxSearchTimeMillis;
+  }
+
   public SearcherResult findRuleMatchesOnIndex(PatternRule rule, Language 
language, IndexSearcher indexSearcher) throws IOException {
     final PossiblyRelaxedQuery query = createQuery(rule);
     final Sort sort = new Sort(new SortField("docCount", SortField.INT));  // 
do not sort by relevance as this will move the shortest documents to the top
     if (query.query == null) {
       throw new NullPointerException("Cannot search on null query for rule: " 
+ rule);
     }
-    final TopDocs topDocs = indexSearcher.search(query.query, maxHits, sort);
+    final PossiblyLimitedTopDocs limitedTopDocs = getTopDocs(indexSearcher, 
query, sort);
     final JLanguageTool languageTool = getLanguageToolWithOneRule(language, 
rule);
-    final List<MatchingSentence> matchingSentences = 
findMatchingSentences(indexSearcher, topDocs, languageTool);
-    final int sentencesChecked = getSentenceCheckCount(query, topDocs, 
indexSearcher);
-    return new SearcherResult(matchingSentences, sentencesChecked, 
query.isRelaxed);
+    final List<MatchingSentence> matchingSentences = 
findMatchingSentences(indexSearcher, limitedTopDocs.topDocs, languageTool);
+    final int sentencesChecked = getSentenceCheckCount(query, 
limitedTopDocs.topDocs, indexSearcher);
+    final SearcherResult searcherResult = new 
SearcherResult(matchingSentences, sentencesChecked, query.isRelaxed);
+    searcherResult.setResultIsTimeLimited(limitedTopDocs.resultIsTimeLimited);
+    return searcherResult;
   }
 
+  private PossiblyLimitedTopDocs getTopDocs(IndexSearcher indexSearcher, 
PossiblyRelaxedQuery query, Sort sort) throws IOException {
+    final TopFieldCollector topCollector = TopFieldCollector.create(sort, 
maxHits, true, false, false, false);
+    final TimeLimitingCollector collector = new 
TimeLimitingCollector(topCollector, maxSearchTimeMillis);
+    boolean timeLimitActivated = false;
+    try {
+      indexSearcher.search(query.query, collector);
+    } catch (TimeLimitingCollector.TimeExceededException e) {
+      timeLimitActivated = true;
+    }
+    return new PossiblyLimitedTopDocs(topCollector.topDocs(), 
timeLimitActivated);
+  }
+
   PatternRule getRuleById(String ruleId, File xmlRuleFile) throws IOException {
     final PatternRuleLoader ruleLoader = new PatternRuleLoader();
     final List<PatternRule> rules = ruleLoader.getRules(xmlRuleFile);
@@ -146,6 +167,16 @@
     return langTool;
   }
 
+  class PossiblyLimitedTopDocs {
+    TopDocs topDocs;
+    boolean resultIsTimeLimited;
+
+    PossiblyLimitedTopDocs(TopDocs topDocs, boolean resultIsTimeLimited) {
+      this.topDocs = topDocs;
+      this.resultIsTimeLimited = resultIsTimeLimited;
+    }
+  }
+
   class PossiblyRelaxedQuery {
 
     Query query;

Modified: 
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java
===================================================================
--- trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java  
2012-06-06 22:22:25 UTC (rev 7273)
+++ trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java  
2012-06-06 22:26:49 UTC (rev 7274)
@@ -29,6 +29,8 @@
   private final int checkedSentences;
   private final boolean relaxedQuery;
 
+  private boolean resultIsTimeLimited;
+
   public SearcherResult(List<MatchingSentence> matchingSentences, int 
checkedSentences, boolean relaxedQuery) {
     this.matchingSentences = matchingSentences;
     this.checkedSentences = checkedSentences;
@@ -46,4 +48,12 @@
   public boolean isRelaxedQuery() {
     return relaxedQuery;
   }
+
+  public boolean isResultIsTimeLimited() {
+    return resultIsTimeLimited;
+  }
+
+  public void setResultIsTimeLimited(boolean resultIsTimeLimited) {
+    this.resultIsTimeLimited = resultIsTimeLimited;
+  }
 }

Modified: 
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java
    2012-06-06 22:22:25 UTC (rev 7273)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java
    2012-06-06 22:26:49 UTC (rev 7274)
@@ -60,16 +60,19 @@
     SearcherResult searcherResult =
             errorSearcher.findRuleMatchesOnIndex(getRule("BACK_AND_FOURTH"), 
Language.ENGLISH, searcher);
     assertEquals(2, searcherResult.getCheckedSentences());
+    assertEquals(false, searcherResult.isResultIsTimeLimited());
     assertEquals(1, searcherResult.getMatchingSentences().size());
     assertEquals(false, searcherResult.isRelaxedQuery());
 
     searcherResult = errorSearcher.findRuleMatchesOnIndex(getRule("EYE_BROW"), 
Language.ENGLISH, searcher);
     assertEquals(2, searcherResult.getCheckedSentences());
+    assertEquals(false, searcherResult.isResultIsTimeLimited());
     assertEquals(1, searcherResult.getMatchingSentences().size());
     assertEquals(true, searcherResult.isRelaxedQuery());
 
     searcherResult = 
errorSearcher.findRuleMatchesOnIndex(getRule("ALL_OVER_THE_WORD"), 
Language.ENGLISH, searcher);
     assertEquals(2, searcherResult.getCheckedSentences());
+    assertEquals(false, searcherResult.isResultIsTimeLimited());
     assertEquals(0, searcherResult.getMatchingSentences().size());
     assertEquals(false, searcherResult.isRelaxedQuery());
 

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to