Revision: 7836
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7836&view=rev
Author:   dnaber
Date:     2012-08-11 23:48:16 +0000 (Sat, 11 Aug 2012)
Log Message:
-----------
wikipedia indexing: fix skipping matching; don't crash if doc count field 
doesn't exist

Modified Paths:
--------------
    
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/PatternRuleQueryBuilder.java
    trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java
    
trunk/JLanguageTool/src/test/org/languagetool/dev/index/PatternRuleQueryBuilderTest.java

Modified: 
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/PatternRuleQueryBuilder.java
===================================================================
--- 
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/PatternRuleQueryBuilder.java
 2012-08-11 23:18:43 UTC (rev 7835)
+++ 
trunk/JLanguageTool/src/dev/org/languagetool/dev/index/PatternRuleQueryBuilder.java
 2012-08-11 23:48:16 UTC (rev 7836)
@@ -69,6 +69,7 @@
     SpanQuery query = null;
     Element prevElement = null;
     int position = 0;
+    int skipCount = 0;
     for (Element element : patternRule.getElements()) {
 
       SpanQuery spanQuery;
@@ -93,7 +94,14 @@
         if (element.getNegation()) {
           query = new SpanNotQuery(query, spanQuery);
         } else {
-          query = new SpanNearQuery(new SpanQuery[] { query, spanQuery }, 
getSkip(prevElement), true);
+          final int skip = getSkip(prevElement);
+          if (skip == 0) {
+            // we need to increase the skip because counting start from the 
beginning of a span query match:
+            skipCount++;
+          } else {
+            skipCount = 0;
+          }
+          query = new SpanNearQuery(new SpanQuery[] { query, spanQuery }, 
getSkip(prevElement) + skipCount, true);
         }
       }
       prevElement = element;

Modified: trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java
===================================================================
--- trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java        
2012-08-11 23:18:43 UTC (rev 7835)
+++ trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java        
2012-08-11 23:48:16 UTC (rev 7836)
@@ -72,7 +72,7 @@
     final Term searchTerm = new Term(MAX_DOC_COUNT_FIELD, 
MAX_DOC_COUNT_FIELD_VAL);
     final TopDocs search = indexSearcher.search(new TermQuery(searchTerm), 1);
     if (search.totalHits != 1) {
-      throw new RuntimeException("Got " + search.totalHits + " hits for the 
docCount query in " + indexSearcher.getIndexReader() + ", expected 1");
+      return -1;
     }
     final ScoreDoc scoreDoc = search.scoreDocs[0];
     final Document doc = indexSearcher.doc(scoreDoc.doc);

Modified: 
trunk/JLanguageTool/src/test/org/languagetool/dev/index/PatternRuleQueryBuilderTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/dev/index/PatternRuleQueryBuilderTest.java
    2012-08-11 23:18:43 UTC (rev 7835)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/dev/index/PatternRuleQueryBuilderTest.java
    2012-08-11 23:48:16 UTC (rev 7836)
@@ -257,6 +257,9 @@
     assertMatches(makeRule("<token>X</token> <token>.</token> <token>G</token> 
<token>.</token>"), 0);
     assertMatches(makeRule("<token>E</token> <token>,</token> <token>G</token> 
<token>.</token>"), 0);
 
+    assertMatches(makeRule("<token>E</token> <token>.</token> <token>G</token> 
<token>.</token> <token>LanguageTool</token>"), 1);
+    assertMatches(makeRule("<token>E</token> <token>.</token> <token>G</token> 
<token>.</token> <token>foo</token>"), 0);
+
     assertRelaxedMatches(makeCaseSensitiveRule("<token>The</token> <token 
inflected=\"yes\">is</token>"), 1);
 
     // negation:

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
Languagetool-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to