Revision: 7274
http://languagetool.svn.sourceforge.net/languagetool/?rev=7274&view=rev
Author: dnaber
Date: 2012-06-06 22:26:49 +0000 (Wed, 06 Jun 2012)
Log Message:
-----------
corpus search: limit search time (some queries with regex and negation might
take very long and cause OOM)
Modified Paths:
--------------
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java
Modified: trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java
===================================================================
--- trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java
2012-06-06 22:22:25 UTC (rev 7273)
+++ trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java
2012-06-06 22:26:49 UTC (rev 7274)
@@ -45,13 +45,21 @@
*/
public class Searcher {
- private static final int DEFAULT_MAX_HITS = 1000;
+ private int maxHits = 1000;
+ private int maxSearchTimeMillis = 5000;
- private int maxHits = DEFAULT_MAX_HITS;
-
public Searcher() {
}
+ public SearcherResult findRuleMatchesOnIndex(PatternRule rule, Language
language, File indexDir) throws IOException {
+ final IndexSearcher indexSearcher = new
IndexSearcher(FSDirectory.open(indexDir));
+ try {
+ return findRuleMatchesOnIndex(rule, language, indexSearcher);
+ } finally {
+ indexSearcher.close();
+ }
+ }
+
public int getMaxHits() {
return maxHits;
}
@@ -60,28 +68,41 @@
this.maxHits = maxHits;
}
- public SearcherResult findRuleMatchesOnIndex(PatternRule rule, Language
language, File indexDir) throws IOException {
- final IndexSearcher indexSearcher = new
IndexSearcher(FSDirectory.open(indexDir));
- try {
- return findRuleMatchesOnIndex(rule, language, indexSearcher);
- } finally {
- indexSearcher.close();
- }
+ public int getMaxSearchTimeMillis() {
+ return maxSearchTimeMillis;
}
+ public void setMaxSearchTimeMillis(int maxSearchTimeMillis) {
+ this.maxSearchTimeMillis = maxSearchTimeMillis;
+ }
+
public SearcherResult findRuleMatchesOnIndex(PatternRule rule, Language
language, IndexSearcher indexSearcher) throws IOException {
final PossiblyRelaxedQuery query = createQuery(rule);
final Sort sort = new Sort(new SortField("docCount", SortField.INT)); //
do not sort by relevance as this will move the shortest documents to the top
if (query.query == null) {
throw new NullPointerException("Cannot search on null query for rule: "
+ rule);
}
- final TopDocs topDocs = indexSearcher.search(query.query, maxHits, sort);
+ final PossiblyLimitedTopDocs limitedTopDocs = getTopDocs(indexSearcher,
query, sort);
final JLanguageTool languageTool = getLanguageToolWithOneRule(language,
rule);
- final List<MatchingSentence> matchingSentences =
findMatchingSentences(indexSearcher, topDocs, languageTool);
- final int sentencesChecked = getSentenceCheckCount(query, topDocs,
indexSearcher);
- return new SearcherResult(matchingSentences, sentencesChecked,
query.isRelaxed);
+ final List<MatchingSentence> matchingSentences =
findMatchingSentences(indexSearcher, limitedTopDocs.topDocs, languageTool);
+ final int sentencesChecked = getSentenceCheckCount(query,
limitedTopDocs.topDocs, indexSearcher);
+ final SearcherResult searcherResult = new
SearcherResult(matchingSentences, sentencesChecked, query.isRelaxed);
+ searcherResult.setResultIsTimeLimited(limitedTopDocs.resultIsTimeLimited);
+ return searcherResult;
}
+ private PossiblyLimitedTopDocs getTopDocs(IndexSearcher indexSearcher,
PossiblyRelaxedQuery query, Sort sort) throws IOException {
+ final TopFieldCollector topCollector = TopFieldCollector.create(sort,
maxHits, true, false, false, false);
+ final TimeLimitingCollector collector = new
TimeLimitingCollector(topCollector, maxSearchTimeMillis);
+ boolean timeLimitActivated = false;
+ try {
+ indexSearcher.search(query.query, collector);
+ } catch (TimeLimitingCollector.TimeExceededException e) {
+ timeLimitActivated = true;
+ }
+ return new PossiblyLimitedTopDocs(topCollector.topDocs(),
timeLimitActivated);
+ }
+
PatternRule getRuleById(String ruleId, File xmlRuleFile) throws IOException {
final PatternRuleLoader ruleLoader = new PatternRuleLoader();
final List<PatternRule> rules = ruleLoader.getRules(xmlRuleFile);
@@ -146,6 +167,16 @@
return langTool;
}
+ class PossiblyLimitedTopDocs {
+ TopDocs topDocs;
+ boolean resultIsTimeLimited;
+
+ PossiblyLimitedTopDocs(TopDocs topDocs, boolean resultIsTimeLimited) {
+ this.topDocs = topDocs;
+ this.resultIsTimeLimited = resultIsTimeLimited;
+ }
+ }
+
class PossiblyRelaxedQuery {
Query query;
Modified:
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java
===================================================================
--- trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java
2012-06-06 22:22:25 UTC (rev 7273)
+++ trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java
2012-06-06 22:26:49 UTC (rev 7274)
@@ -29,6 +29,8 @@
private final int checkedSentences;
private final boolean relaxedQuery;
+ private boolean resultIsTimeLimited;
+
public SearcherResult(List<MatchingSentence> matchingSentences, int
checkedSentences, boolean relaxedQuery) {
this.matchingSentences = matchingSentences;
this.checkedSentences = checkedSentences;
@@ -46,4 +48,12 @@
public boolean isRelaxedQuery() {
return relaxedQuery;
}
+
+ public boolean isResultIsTimeLimited() {
+ return resultIsTimeLimited;
+ }
+
+ public void setResultIsTimeLimited(boolean resultIsTimeLimited) {
+ this.resultIsTimeLimited = resultIsTimeLimited;
+ }
}
Modified:
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java
===================================================================
---
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java
2012-06-06 22:22:25 UTC (rev 7273)
+++
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java
2012-06-06 22:26:49 UTC (rev 7274)
@@ -60,16 +60,19 @@
SearcherResult searcherResult =
errorSearcher.findRuleMatchesOnIndex(getRule("BACK_AND_FOURTH"),
Language.ENGLISH, searcher);
assertEquals(2, searcherResult.getCheckedSentences());
+ assertEquals(false, searcherResult.isResultIsTimeLimited());
assertEquals(1, searcherResult.getMatchingSentences().size());
assertEquals(false, searcherResult.isRelaxedQuery());
searcherResult = errorSearcher.findRuleMatchesOnIndex(getRule("EYE_BROW"),
Language.ENGLISH, searcher);
assertEquals(2, searcherResult.getCheckedSentences());
+ assertEquals(false, searcherResult.isResultIsTimeLimited());
assertEquals(1, searcherResult.getMatchingSentences().size());
assertEquals(true, searcherResult.isRelaxedQuery());
searcherResult =
errorSearcher.findRuleMatchesOnIndex(getRule("ALL_OVER_THE_WORD"),
Language.ENGLISH, searcher);
assertEquals(2, searcherResult.getCheckedSentences());
+ assertEquals(false, searcherResult.isResultIsTimeLimited());
assertEquals(0, searcherResult.getMatchingSentences().size());
assertEquals(false, searcherResult.isRelaxedQuery());
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs