Revision: 7393
http://languagetool.svn.sourceforge.net/languagetool/?rev=7393&view=rev
Author: dnaber
Date: 2012-06-17 22:37:16 +0000 (Sun, 17 Jun 2012)
Log Message:
-----------
indexing: more fixes; more verbose output
Modified Paths:
--------------
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/PatternRuleQueryBuilder.java
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java
Modified:
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/PatternRuleQueryBuilder.java
===================================================================
---
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/PatternRuleQueryBuilder.java
2012-06-17 22:02:37 UTC (rev 7392)
+++
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/PatternRuleQueryBuilder.java
2012-06-17 22:37:16 UTC (rev 7393)
@@ -83,7 +83,7 @@
} else {
// create an empty token for the unsupported token, so that it can
match any term with any
// POS tag.
- if (patternElement.hasExceptionList() &&
!patternElement.getString().isEmpty()) {
+ if (patternElement.hasExceptionList() && !patternElement.isInflected()
&& !patternElement.getString().isEmpty()) {
// having an exception causes the rule not to be supported but we
can ignore it
// and search for the token to get a super set of matches:
tokenQuery = createTokenQuery(patternElement.getString(),
patternElement.getNegation(),
@@ -155,6 +155,7 @@
throw new UnsupportedPatternRuleException(
"Pattern rules with inflected tokens are not supported.");
}
+ // TODO: exception for <match no="0"/> etc. (patternElement.getMatch()?)
}
private SpanQuery createTokenQuery(String token, boolean isNegation,
Modified: trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java
===================================================================
--- trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java
2012-06-17 22:02:37 UTC (rev 7392)
+++ trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java
2012-06-17 22:37:16 UTC (rev 7393)
@@ -86,7 +86,7 @@
final JLanguageTool languageTool = getLanguageToolWithOneRule(language,
rule);
final List<MatchingSentence> matchingSentences =
findMatchingSentences(indexSearcher, limitedTopDocs.topDocs, languageTool);
final int sentencesChecked = getSentenceCheckCount(query,
limitedTopDocs.topDocs, indexSearcher);
- final SearcherResult searcherResult = new
SearcherResult(matchingSentences, sentencesChecked, query.isRelaxed);
+ final SearcherResult searcherResult = new
SearcherResult(matchingSentences, sentencesChecked, query);
searcherResult.setResultIsTimeLimited(limitedTopDocs.resultIsTimeLimited);
return searcherResult;
}
@@ -186,6 +186,11 @@
this.query = query;
isRelaxed = relaxed;
}
+
+ @Override
+ public String toString() {
+ return query.toString() + "[relaxed=" + isRelaxed + "]";
+ }
}
private static void ensureCorrectUsageOrExit(String[] args) {
Modified:
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java
===================================================================
--- trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java
2012-06-17 22:02:37 UTC (rev 7392)
+++ trunk/JLanguageTool/src/dev/org/languagetool/dev/index/SearcherResult.java
2012-06-17 22:37:16 UTC (rev 7393)
@@ -27,14 +27,14 @@
private final List<MatchingSentence> matchingSentences;
private final int checkedSentences;
- private final boolean relaxedQuery;
+ private final Searcher.PossiblyRelaxedQuery possiblyRelaxedQuery;
private boolean resultIsTimeLimited;
- public SearcherResult(List<MatchingSentence> matchingSentences, int
checkedSentences, boolean relaxedQuery) {
+ public SearcherResult(List<MatchingSentence> matchingSentences, int
checkedSentences, Searcher.PossiblyRelaxedQuery relaxedQuery) {
this.matchingSentences = matchingSentences;
this.checkedSentences = checkedSentences;
- this.relaxedQuery = relaxedQuery;
+ this.possiblyRelaxedQuery = relaxedQuery;
}
public List<MatchingSentence> getMatchingSentences() {
@@ -45,8 +45,12 @@
return checkedSentences;
}
+ public Searcher.PossiblyRelaxedQuery getPossiblyRelaxedQuery() {
+ return possiblyRelaxedQuery;
+ }
+
public boolean isRelaxedQuery() {
- return relaxedQuery;
+ return possiblyRelaxedQuery.isRelaxed;
}
public boolean isResultIsTimeLimited() {
Modified:
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java
===================================================================
---
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java
2012-06-17 22:02:37 UTC (rev 7392)
+++
trunk/JLanguageTool/src/test/org/languagetool/dev/index/IndexerSearcherTest.java
2012-06-17 22:37:16 UTC (rev 7393)
@@ -73,6 +73,7 @@
int ruleCounter = 0;
int ruleProblems = 0;
+ int relaxedQueryCount = 0;
searcher = new IndexSearcher(directory);
final List<Rule> rules = lt.getAllActiveRules();
for (Rule rule : rules) {
@@ -81,18 +82,26 @@
try {
ruleCounter++;
final SearcherResult searcherResult =
errorSearcher.findRuleMatchesOnIndex(patternRule, language, searcher);
+ if (searcherResult.isRelaxedQuery()) {
+ relaxedQueryCount++;
+ }
final List<MatchingSentence> matchingSentences =
searcherResult.getMatchingSentences();
boolean foundExpectedMatch = false;
for (MatchingSentence matchingSentence : matchingSentences) {
final List<RuleMatch> ruleMatches =
matchingSentence.getRuleMatches();
final List<String> ruleMatchIds = getRuleMatchIds(ruleMatches);
if (ruleMatchIds.contains(getFullId(patternRule))) {
+ // TODO: there can be more than one expected match, can't it?
foundExpectedMatch = true;
break;
}
}
if (!foundExpectedMatch) {
System.out.println("Error: No match found for " + patternRule);
+ System.out.println("Query : " +
searcherResult.getPossiblyRelaxedQuery());
+ System.out.println("Matches : " + matchingSentences);
+ System.out.println("Examples: " + rule.getIncorrectExamples());
+ System.out.println();
ruleProblems++;
}
} catch (NullPointerException e) {
@@ -103,6 +112,7 @@
}
}
System.out.println(language + ": problems: " + ruleProblems + ", total
rules: " + ruleCounter);
+ System.out.println(language + ": relaxedQueryCount: " + relaxedQueryCount);
}
@@ -144,18 +154,15 @@
}
}
- /* for debugging
- public void testForDebugging() throws Exception {
+ /** for manual debugging only */
+ public void IGNOREtestForDebugging() throws Exception {
// Note that the second sentence ends with "lid" instead of "lids" (the
inflated one)
- createIndex("I thin that's true.");
- SearcherResult searcherResult =
- errorSearcher.findRuleMatchesOnIndex(getRule("I_THIN"),
Language.ENGLISH, searcher);
- System.out.println("matches: " + searcherResult.getMatchingSentences());
- assertEquals(false, searcherResult.isResultIsTimeLimited());
+ createIndex("Das machen Sinn");
+ final PatternRule rule = getRule("SINN_MACHEN", new
File("src/rules/de/grammar.xml"));
+ final SearcherResult searcherResult =
errorSearcher.findRuleMatchesOnIndex(rule, Language.GERMAN, searcher);
+ System.out.println("Matches: " + searcherResult.getMatchingSentences());
assertEquals(1, searcherResult.getMatchingSentences().size());
- //assertEquals(false, searcherResult.isRelaxedQuery());
}
- */
public void testIndexerSearcherWithEnglish() throws Exception {
// Note that the second sentence ends with "lid" instead of "lids" (the
inflated one)
@@ -189,6 +196,10 @@
return errorSearcher.getRuleById(ruleId, ruleFile);
}
+ private PatternRule getRule(String ruleId, File grammarFile) throws
IOException {
+ return errorSearcher.getRuleById(ruleId, grammarFile);
+ }
+
public void testWithNewRule() throws Exception {
createIndex("How to move back and fourth from linux to xmb?");
final Searcher errorSearcher = new Searcher();
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs