Revision: 15954
http://gate.svn.sourceforge.net/gate/?rev=15954&view=rev
Author: valyt
Date: 2012-07-18 13:12:07 +0000 (Wed, 18 Jul 2012)
Log Message:
-----------
New terms query: Boolean OR.
More code/comments cleanup.
Modified Paths:
--------------
mimir/trunk/mimir-core/src/gate/mimir/search/terms/AbstractTermsQuery.java
mimir/trunk/mimir-core/src/gate/mimir/search/terms/AndTermsQuery.java
mimir/trunk/mimir-core/src/gate/mimir/search/terms/TermsQuery.java
Added Paths:
-----------
mimir/trunk/mimir-core/src/gate/mimir/search/terms/OrTermsQuery.java
Modified:
mimir/trunk/mimir-core/src/gate/mimir/search/terms/AbstractTermsQuery.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/search/terms/AbstractTermsQuery.java
2012-07-18 10:00:17 UTC (rev 15953)
+++ mimir/trunk/mimir-core/src/gate/mimir/search/terms/AbstractTermsQuery.java
2012-07-18 13:12:07 UTC (rev 15954)
@@ -20,11 +20,11 @@
*/
public abstract class AbstractTermsQuery implements TermsQuery {
- protected boolean stringsEnabled;
+ protected final boolean stringsEnabled;
- protected boolean countsEnabled;
+ protected final boolean countsEnabled;
- public static final int NO_LIMIT = Integer.MAX_VALUE;
+
/**
* The maximum number of results to be returned.
*/
Modified: mimir/trunk/mimir-core/src/gate/mimir/search/terms/AndTermsQuery.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/search/terms/AndTermsQuery.java
2012-07-18 10:00:17 UTC (rev 15953)
+++ mimir/trunk/mimir-core/src/gate/mimir/search/terms/AndTermsQuery.java
2012-07-18 13:12:07 UTC (rev 15954)
@@ -30,8 +30,21 @@
*/
public class AndTermsQuery extends AbstractTermsQuery {
+ /**
+ * The sub-queries being AND'ed.
+ */
protected TermsQuery[] subQueries;
+ /**
+ * Constructs a new AND term query.
+ *
+ * @param stringsEnabled should terms strings be returned.
+ * @param countsEnabled should term counts be returned. Counts are
+ * accumulated across all sub-queries: the count for a term is the sum of all
+ * counts for the same term in all sub-queries.
+ * @param limit the maximum number of terms to be returned.
+ * @param subQueries the term queries that form the disjunction.
+ */
public AndTermsQuery(boolean stringsEnabled, boolean countsEnabled,
int limit, TermsQuery... subQueries) {
super(stringsEnabled, countsEnabled, limit);
Added: mimir/trunk/mimir-core/src/gate/mimir/search/terms/OrTermsQuery.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/search/terms/OrTermsQuery.java
(rev 0)
+++ mimir/trunk/mimir-core/src/gate/mimir/search/terms/OrTermsQuery.java
2012-07-18 13:12:07 UTC (rev 15954)
@@ -0,0 +1,120 @@
+/*
+ * OrTermsQuery.java
+ *
+ * Copyright (c) 2007-2011, The University of Sheffield.
+ *
+ * This file is part of GATE MÃmir (see http://gate.ac.uk/family/mimir.html),
+ * and is free software, licenced under the GNU Lesser General Public License,
+ * Version 3, June 2007 (also included with this distribution as file
+ * LICENCE-LGPL3.html).
+ *
+ * Valentin Tablan, 18 Jul 2012
+ *
+ * $Id$
+ */
+package gate.mimir.search.terms;
+
+import gate.mimir.search.QueryEngine;
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.longs.LongArrayList;
+import it.unimi.dsi.fastutil.longs.LongHeapSemiIndirectPriorityQueue;
+import it.unimi.dsi.fastutil.objects.ObjectArrayList;
+
+import java.io.IOException;
+
+/**
+ * Boolean OR operator for term queries.
+ */
+public class OrTermsQuery extends AbstractTermsQuery {
+
+ /**
+ * The sub-queries being OR'ed
+ */
+ protected TermsQuery[] subQueries;
+
+ /**
+ * Constructs a new OR terms query.
+ * @param stringsEnabled should terms strings be returned.
+ * @param countsEnabled should term counts be returned. Counts are
+ * accumulated across all sub-queries: the count for a term is the sum of all
+ * counts for the same term in all sub-queries.
+ * @param limit the maximum number of terms to be returned.
+ * @param subQueries the term queries that form the disjunction.
+ */
+ public OrTermsQuery(boolean stringsEnabled, boolean countsEnabled,
+ int limit, TermsQuery... subQueries) {
+ super(stringsEnabled, countsEnabled, limit);
+ this.subQueries = subQueries;
+ }
+
+ /* (non-Javadoc)
+ * @see
gate.mimir.search.terms.TermsQuery#execute(gate.mimir.search.QueryEngine)
+ */
+ @Override
+ public TermsResultSet execute(QueryEngine engine) throws IOException {
+ TermsResultSet[] resSets = new TermsResultSet[subQueries.length];
+ long[] currentTerm = new long[resSets.length];
+ LongHeapSemiIndirectPriorityQueue queue =
+ new LongHeapSemiIndirectPriorityQueue(currentTerm);
+ int[] termIndex = new int[resSets.length];
+ for(int i = 0; i < subQueries.length; i++) {
+ resSets[i] = subQueries[i].execute(engine);
+ if(resSets[i].termIds.length > 0){
+ termIndex[i] = 0;
+ currentTerm[i] = resSets[i].termIds[termIndex[i]];
+ queue.enqueue(i);
+ }
+ }
+
+ // prepare local data
+ LongArrayList termIds = new LongArrayList();
+ ObjectArrayList<String> termStrings = stringsEnabled ?
+ new ObjectArrayList<String>() : null;
+ IntArrayList termCounts = countsEnabled ? new IntArrayList() : null;
+ int front[] = null;
+ if(stringsEnabled || countsEnabled) front = new int[resSets.length];
+ // enumerate all terms
+ top:while(!queue.isEmpty()) {
+ int first = queue.first();
+ long termId = resSets[first].termIds[termIndex[first]];
+ termIds.add(termId);
+ if(countsEnabled || stringsEnabled) {
+ int frontSize = queue.front(front);
+ String termString = null;
+ int count = 0;
+ for(int i = 0; i < frontSize; i++) {
+ int subRunnerId = front[i];
+ if(stringsEnabled &&
+ termString == null &&
+ resSets[subRunnerId].termStrings != null) {
+ termString =
resSets[subRunnerId].termStrings[termIndex[subRunnerId]];
+ }
+ if(resSets[subRunnerId].termCounts != null) {
+ count += resSets[subRunnerId].termCounts[termIndex[subRunnerId]];
+ }
+ }
+ if(stringsEnabled) termStrings.add(termString);
+ if(countsEnabled) termCounts.add(count);
+ }
+ // consume all equal terms
+ while(resSets[first].termIds[termIndex[first]] == termId) {
+ // advance this subRunner
+ termIndex[first]++;
+ if(termIndex[first] == resSets[first].termIds.length) {
+ // 'first' is out
+ queue.dequeue();
+ if(queue.isEmpty()) break top;
+ } else {
+ currentTerm[first] = resSets[first].termIds[termIndex[first]];
+ queue.changed();
+ }
+ first = queue.first();
+ }
+ }
+ // construct the result
+ return new TermsResultSet(termIds.toLongArray(),
+ stringsEnabled ? termStrings.toArray(new String[termStrings.size()]) :
null,
+ null,
+ countsEnabled ? termCounts.toIntArray() : null);
+ }
+}
Property changes on:
mimir/trunk/mimir-core/src/gate/mimir/search/terms/OrTermsQuery.java
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Added: svn:keywords
+ Id
Added: svn:eol-style
+ native
Modified: mimir/trunk/mimir-core/src/gate/mimir/search/terms/TermsQuery.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/search/terms/TermsQuery.java
2012-07-18 10:00:17 UTC (rev 15953)
+++ mimir/trunk/mimir-core/src/gate/mimir/search/terms/TermsQuery.java
2012-07-18 13:12:07 UTC (rev 15954)
@@ -23,6 +23,9 @@
* order of their term ID.
*/
public interface TermsQuery {
+
+ public static final int NO_LIMIT = Integer.MAX_VALUE;
+
/**
* Runs the term query (in the calling thread) and returns the matched terms.
* The terms returned must be sorted in ascending order of their term ID.
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs