Revision: 7836 http://languagetool.svn.sourceforge.net/languagetool/?rev=7836&view=rev Author: dnaber Date: 2012-08-11 23:48:16 +0000 (Sat, 11 Aug 2012) Log Message: ----------- wikipedia indexing: fix skipping matching; don't crash if doc count field doesn't exist
Modified Paths: -------------- trunk/JLanguageTool/src/dev/org/languagetool/dev/index/PatternRuleQueryBuilder.java trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java trunk/JLanguageTool/src/test/org/languagetool/dev/index/PatternRuleQueryBuilderTest.java Modified: trunk/JLanguageTool/src/dev/org/languagetool/dev/index/PatternRuleQueryBuilder.java =================================================================== --- trunk/JLanguageTool/src/dev/org/languagetool/dev/index/PatternRuleQueryBuilder.java 2012-08-11 23:18:43 UTC (rev 7835) +++ trunk/JLanguageTool/src/dev/org/languagetool/dev/index/PatternRuleQueryBuilder.java 2012-08-11 23:48:16 UTC (rev 7836) @@ -69,6 +69,7 @@ SpanQuery query = null; Element prevElement = null; int position = 0; + int skipCount = 0; for (Element element : patternRule.getElements()) { SpanQuery spanQuery; @@ -93,7 +94,14 @@ if (element.getNegation()) { query = new SpanNotQuery(query, spanQuery); } else { - query = new SpanNearQuery(new SpanQuery[] { query, spanQuery }, getSkip(prevElement), true); + final int skip = getSkip(prevElement); + if (skip == 0) { + // we need to increase the skip because counting start from the beginning of a span query match: + skipCount++; + } else { + skipCount = 0; + } + query = new SpanNearQuery(new SpanQuery[] { query, spanQuery }, getSkip(prevElement) + skipCount, true); } } prevElement = element; Modified: trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java =================================================================== --- trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java 2012-08-11 23:18:43 UTC (rev 7835) +++ trunk/JLanguageTool/src/dev/org/languagetool/dev/index/Searcher.java 2012-08-11 23:48:16 UTC (rev 7836) @@ -72,7 +72,7 @@ final Term searchTerm = new Term(MAX_DOC_COUNT_FIELD, MAX_DOC_COUNT_FIELD_VAL); final TopDocs search = indexSearcher.search(new TermQuery(searchTerm), 1); if (search.totalHits != 1) { - throw new RuntimeException("Got " + search.totalHits + " hits for the docCount query in " + indexSearcher.getIndexReader() + ", expected 1"); + return -1; } final ScoreDoc scoreDoc = search.scoreDocs[0]; final Document doc = indexSearcher.doc(scoreDoc.doc); Modified: trunk/JLanguageTool/src/test/org/languagetool/dev/index/PatternRuleQueryBuilderTest.java =================================================================== --- trunk/JLanguageTool/src/test/org/languagetool/dev/index/PatternRuleQueryBuilderTest.java 2012-08-11 23:18:43 UTC (rev 7835) +++ trunk/JLanguageTool/src/test/org/languagetool/dev/index/PatternRuleQueryBuilderTest.java 2012-08-11 23:48:16 UTC (rev 7836) @@ -257,6 +257,9 @@ assertMatches(makeRule("<token>X</token> <token>.</token> <token>G</token> <token>.</token>"), 0); assertMatches(makeRule("<token>E</token> <token>,</token> <token>G</token> <token>.</token>"), 0); + assertMatches(makeRule("<token>E</token> <token>.</token> <token>G</token> <token>.</token> <token>LanguageTool</token>"), 1); + assertMatches(makeRule("<token>E</token> <token>.</token> <token>G</token> <token>.</token> <token>foo</token>"), 0); + assertRelaxedMatches(makeCaseSensitiveRule("<token>The</token> <token inflected=\"yes\">is</token>"), 1); // negation: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs