Revision: 15977
http://gate.svn.sourceforge.net/gate/?rev=15977&view=rev
Author: valyt
Date: 2012-07-24 15:18:51 +0000 (Tue, 24 Jul 2012)
Log Message:
-----------
More work on direct indexes: support for describing annotation terms.
While this compiles, it's unlikely to work. Checking in just to save the
current work state.
Modified Paths:
--------------
mimir/trunk/mimir-core/src/gate/mimir/AbstractSemanticAnnotationHelper.java
mimir/trunk/mimir-core/src/gate/mimir/SemanticAnnotationHelper.java
mimir/trunk/mimir-core/src/gate/mimir/search/IndexReaderPool.java
mimir/trunk/mimir-core/src/gate/mimir/search/QueryEngine.java
mimir/trunk/mimir-core/src/gate/mimir/search/terms/AbstractIndexTermsQuery.java
mimir/trunk/mimir-test/src/gate/mimir/test/Scratch.java
mimir/trunk/plugins/db-h2/src/gate/mimir/db/DBSemanticAnnotationHelper.java
Modified:
mimir/trunk/mimir-core/src/gate/mimir/AbstractSemanticAnnotationHelper.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/AbstractSemanticAnnotationHelper.java
2012-07-24 09:53:01 UTC (rev 15976)
+++ mimir/trunk/mimir-core/src/gate/mimir/AbstractSemanticAnnotationHelper.java
2012-07-24 15:18:51 UTC (rev 15977)
@@ -20,6 +20,7 @@
import gate.mimir.search.QueryEngine;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -88,6 +89,12 @@
protected String annotationType;
/**
+ * The list of names for all the features that should be used when describing
+ * an annotation mention (see {@link #describeMention(String)}).
+ */
+ protected String[] descriptiveFeatures;
+
+ /**
* The working mode for this helper (defaults to {@link Mode#ANNOTATION}).
*/
protected Mode mode = Mode.ANNOTATION;
@@ -157,10 +164,31 @@
public void setUriFeatures(String[] uriFeatureNames) {
this.uriFeatureNames = uriFeatureNames;
}
+
+ /**
+ * Gets the names of features that should be used when describing an
+ * annotation mention.
+ *
+ * @return the descriptiveFeatures
+ */
+ public String[] getDescriptiveFeatures() {
+ return descriptiveFeatures;
+ }
+ /**
+ * Sets the names of features that should be used when describing an
+ * annotation mention. This should be called <strong>before</strong> the
+ * helper is initialised (i.e. before calling {@link #init(QueryEngine)}).
+ *
+ * If no custom value has been set before {@link #init(QueryEngine)} is
+ * called, then all features are used as descriptive features.
+ *
+ * @param descriptiveFeatures the descriptiveFeatures to set
+ */
+ public void setDescriptiveFeatures(String[] descriptiveFeatures) {
+ this.descriptiveFeatures = descriptiveFeatures;
+ }
-
-
/* (non-Javadoc)
* @see gate.mimir.SemanticAnnotationHelper#documentEnd()
*/
@@ -182,7 +210,48 @@
return getMentions(annotationType, predicates, engine);
}
+
+
+ /* (non-Javadoc)
+ * @see gate.mimir.SemanticAnnotationHelper#describeMention(java.lang.String)
+ */
+ @Override
+ public String describeMention(String mentionUri) {
+ String[] values = getDescriptiveFeatureValues(mentionUri);
+ if(values == null) {
+ return mentionUri;
+ } else {
+ StringBuilder res = new StringBuilder("{").append(annotationType);
+ for(int i = 0; i < descriptiveFeatures.length; i++) {
+ if(values[i] != null && values[i].length() > 0) {
+ res.append(' ').append(descriptiveFeatures[i]).append(" = ");
+ res.append(values[i]);
+ }
+ }
+ res.append('}');
+ return res.toString();
+ }
+ }
+
/**
+ * Calculates the textual representations for the values of features that are
+ * part of the description of an annotation mention. The list of features for
+ * which the values should be returned is {@link #descriptiveFeatures}.
+ *
+ * This implementation always returns <code>null</code> as the abstract class
+ * has no way of accessing the actual feature values. Subclasses should
+ * provide an actual implementation to support proper mention descriptions.
+ *
+ * @param mentionUri the URI for the mention that needs to be described.
+ *
+ * @return an array of strings parallel with {@link #descriptiveFeatures}, or
+ * null if the feature values are not known.
+ */
+ protected String[] getDescriptiveFeatureValues(String mentionUri) {
+ return null;
+ }
+
+ /**
* Helper method to concatenate a number of arrays into one, for helpers
* that don't support all the feature types and want to combine some of
* them together.
@@ -210,6 +279,7 @@
private void checkInit() {
if(isInited) throw new IllegalStateException(
"This helper has already been initialised!");
+
isInited = true;
}
@@ -222,8 +292,26 @@
@Override
public void init(QueryEngine queryEngine) {
checkInit();
+ // calculate the list of descriptive features if needed
+ if(descriptiveFeatures == null) {
+ List<String> featNames = new ArrayList<String>();
+ if(nominalFeatureNames != null){
+ Collections.addAll(featNames, nominalFeatureNames);
+ }
+ if(integerFeatureNames != null) {
+ Collections.addAll(featNames, integerFeatureNames);
+ }
+ if(floatFeatureNames != null) {
+ Collections.addAll(featNames, floatFeatureNames);
+ }
+ if(textFeatureNames != null) {
+ Collections.addAll(featNames, textFeatureNames);
+ }
+ if(uriFeatureNames != null) {
+ Collections.addAll(featNames, uriFeatureNames);
+ }
+ descriptiveFeatures = featNames.toArray(new String[featNames.size()]);
+ }
}
-
-
}
Modified: mimir/trunk/mimir-core/src/gate/mimir/SemanticAnnotationHelper.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/SemanticAnnotationHelper.java
2012-07-24 09:53:01 UTC (rev 15976)
+++ mimir/trunk/mimir-core/src/gate/mimir/SemanticAnnotationHelper.java
2012-07-24 15:18:51 UTC (rev 15977)
@@ -133,7 +133,19 @@
public List<Mention> getMentions(String annotationType,
List<Constraint> constraints, QueryEngine engine);
+
/**
+ * Provides a human-friendly representation of a mention, specified by the
+ * given URI.
+ * @param mentionUri the mention URI, a string identical to the one that
would
+ * be returned by one of the getMentions() methods. There is no requirement
+ * that the actual string value was previously obtained from a getMentions()
+ * call.
+ * @return a textual representation of the specified mention.
+ */
+ public String describeMention(String mentionUri) ;
+
+ /**
* Closes this annotation helper. Implementers should perform maintenance
* operations (such as closing connections to ORDI, etc) on this call.
*/
Modified: mimir/trunk/mimir-core/src/gate/mimir/search/IndexReaderPool.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/search/IndexReaderPool.java
2012-07-24 09:53:01 UTC (rev 15976)
+++ mimir/trunk/mimir-core/src/gate/mimir/search/IndexReaderPool.java
2012-07-24 15:18:51 UTC (rev 15977)
@@ -41,10 +41,6 @@
super(terms);
}
- /**
- * A mutable string used internally.
- */
- private MutableString ms = new MutableString();
/**
* Gets the term string for a given term ID. This method is synchronised to
@@ -52,8 +48,8 @@
* @param termId the ID for the term being sought.
* @return the string for the given term.
*/
- public synchronized String getTerm(long termId) {
- return super.getTerm(termId, ms).toString();
+ public String getTerm(long termId) {
+ return super.getTerm(termId, new MutableString()).toString();
}
}
Modified: mimir/trunk/mimir-core/src/gate/mimir/search/QueryEngine.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/search/QueryEngine.java
2012-07-24 09:53:01 UTC (rev 15976)
+++ mimir/trunk/mimir-core/src/gate/mimir/search/QueryEngine.java
2012-07-24 15:18:51 UTC (rev 15977)
@@ -458,6 +458,19 @@
return null;
}
+ public SemanticAnnotationHelper getAnnotationHelper(String annotationType) {
+ for(int i = 0; i < indexConfig.getSemanticIndexers().length; i++) {
+ String[] annTypes = indexConfig.getSemanticIndexers()[i]
+ .getAnnotationTypes();
+ for(int j = 0; j < annTypes.length; j++) {
+ if(annTypes[j].equals(annotationType)) {
+ return indexConfig.getSemanticIndexers()[i].getHelpers()[j];
+ }
+ }
+ }
+ return null;
+ }
+
/**
* @return the index configuration for this index
*/
Modified:
mimir/trunk/mimir-core/src/gate/mimir/search/terms/AbstractIndexTermsQuery.java
===================================================================
---
mimir/trunk/mimir-core/src/gate/mimir/search/terms/AbstractIndexTermsQuery.java
2012-07-24 09:53:01 UTC (rev 15976)
+++
mimir/trunk/mimir-core/src/gate/mimir/search/terms/AbstractIndexTermsQuery.java
2012-07-24 15:18:51 UTC (rev 15977)
@@ -14,6 +14,7 @@
*/
package gate.mimir.search.terms;
+import gate.mimir.SemanticAnnotationHelper;
import gate.mimir.search.IndexReaderPool;
import gate.mimir.search.QueryEngine;
import gate.mimir.search.QueryEngine.IndexType;
@@ -59,6 +60,12 @@
protected IndexReaderPool indirectIndexPool;
/**
+ * If {@link #indexType} is {@link IndexType#ANNOTATIONS}, this holds a
+ * reference to the annotation helper.
+ */
+ protected SemanticAnnotationHelper annotationHelper;
+
+ /**
* Should stop words be filtered out of the results?
*/
protected boolean stopWordsBlocked = false;
@@ -147,6 +154,7 @@
case ANNOTATIONS:
directIndexPool = engine.getAnnotationDirectIndex(indexName);
indirectIndexPool = engine.getAnnotationIndex(indexName);
+ annotationHelper = engine.getAnnotationHelper(indexName);
break;
case TOKENS:
directIndexPool = engine.getTokenDirectIndex(indexName);
@@ -208,7 +216,13 @@
for (int aCount : counterSetupVisitor.count ) count += aCount;
termCounts.add(count);
}
- if(stringsEnabled) termStrings.add(termString);
+ if(stringsEnabled){
+ if(indexType == IndexType.ANNOTATIONS) {
+ // describe the term
+ termString = annotationHelper.describeMention(termString);
+ }
+ termStrings.add(termString);
+ }
}
termId = documentIterator.nextDocument();
}
Modified: mimir/trunk/mimir-test/src/gate/mimir/test/Scratch.java
===================================================================
--- mimir/trunk/mimir-test/src/gate/mimir/test/Scratch.java 2012-07-24
09:53:01 UTC (rev 15976)
+++ mimir/trunk/mimir-test/src/gate/mimir/test/Scratch.java 2012-07-24
15:18:51 UTC (rev 15977)
@@ -48,6 +48,9 @@
public class Scratch {
+ public static void main (String[] args) throws Exception {
+ mainSimple(args);
+ }
public static void mainSimple(String[] args) throws Exception {
@@ -58,10 +61,10 @@
Gate.getCreoleRegister().registerDirectories(
new File("gate-home/plugins/ANNIE-tokeniser").toURI().toURL());
// load the DB plugin
+ Gate.getCreoleRegister().registerDirectories(
+ new File("../plugins/db-h2").toURI().toURL());
// Gate.getCreoleRegister().registerDirectories(
-// new File("../plugins/db-h2").toURI().toURL());
- Gate.getCreoleRegister().registerDirectories(
- new File("../plugins/sesame").toURI().toURL());
+// new File("../plugins/sesame").toURI().toURL());
// load the measurements plugin
Gate.getCreoleRegister().registerDirectories(
new File("../plugins/measurements").toURI().toURL());
@@ -208,7 +211,7 @@
* @param args
* @throws Exception sometimes
*/
- public static void main(String[] args) throws Exception {
+ public static void mainDirectIndexes(String[] args) throws Exception {
Gate.setGateHome(new File("gate-home"));
Gate.setUserConfigFile(new File("gate-home/user-gate.xml"));
Gate.init();
@@ -227,6 +230,7 @@
new File("../plugins/sparql").toURI().toURL());
QueryEngine qEngine = new QueryEngine(new File(args[0]));
+
TermsQuery query = null;
// query = new DocumentTermsQuery("root", IndexType.TOKENS,
@@ -243,14 +247,17 @@
// printTermQuery(query, qEngine);
// System.out.println("\n=======================================");
- TermsQuery q1 = new DocumentTermsQuery("root", IndexType.TOKENS,
- true, true, TermsQuery.NO_LIMIT, 0);
- TermsQuery q2 = new DocumentTermsQuery("root", IndexType.TOKENS,
- true, true, TermsQuery.NO_LIMIT, 1);
- query = new OrTermsQuery(true, true, TermsQuery.NO_LIMIT, q1, q2);
+// TermsQuery q1 = new DocumentTermsQuery("root", IndexType.TOKENS,
+// true, true, TermsQuery.NO_LIMIT, 0);
+// TermsQuery q2 = new DocumentTermsQuery("root", IndexType.TOKENS,
+// true, true, TermsQuery.NO_LIMIT, 1);
+// query = new OrTermsQuery(true, true, TermsQuery.NO_LIMIT, q1, q2);
+//
+// query = new LimitTermsQuery(new SortedTermsQuery(query), 100);
- query = new LimitTermsQuery(new SortedTermsQuery(query), 100);
- // now for real
+ query = new LimitTermsQuery(new SortedTermsQuery(
+ new DocumentsOrTermsQuery("root", IndexType.TOKENS,
+ true, true, TermsQuery.NO_LIMIT, 0, 1)) , 100);
printTermQuery(query, qEngine);
System.out.println("\n=======================================");
Modified:
mimir/trunk/plugins/db-h2/src/gate/mimir/db/DBSemanticAnnotationHelper.java
===================================================================
--- mimir/trunk/plugins/db-h2/src/gate/mimir/db/DBSemanticAnnotationHelper.java
2012-07-24 09:53:01 UTC (rev 15976)
+++ mimir/trunk/plugins/db-h2/src/gate/mimir/db/DBSemanticAnnotationHelper.java
2012-07-24 15:18:51 UTC (rev 15977)
@@ -37,6 +37,8 @@
import java.sql.Statement;
import java.sql.Types;
import java.text.NumberFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
@@ -99,7 +101,20 @@
*/
protected transient PreparedStatement level1SelectStmt;
+
/**
+ * Prepared statement used to obtain the Level-1 feature values based on a
+ * mention ID. Only used at search time.
+ */
+ protected transient PreparedStatement level1DescribeStmt;
+
+ /**
+ * Prepared statement used to obtain the Level-1 and Level-2 feature values
+ * based on a mention ID. Only used at search time.
+ */
+ protected transient PreparedStatement level1And2DescribeStmt;
+
+ /**
* Prepared statement used to insert anew row into the Level-1 table.
* Only used at indexing time.
*/
@@ -255,8 +270,79 @@
if(textFeatureNames != null){
for(String name : textFeatureNames) nonNominalFeatureNameSet.add(name);
}
+ try {
+ constructDescriptionStatements();
+ } catch(SQLException e) {
+ throw new RuntimeException("Error while opening database", e);
+ }
}
+ protected void constructDescriptionStatements() throws SQLException {
+ // level 1 query
+ List<String> nomFeatNames = new ArrayList<String>(
+ Arrays.asList(descriptiveFeatures));
+ nomFeatNames.retainAll(nominalFeatureNameSet);
+
+ StringBuilder stmt = new StringBuilder("SELECT DISTINCT ");
+ stmt.append(tableName(null, MENTIONS_TABLE_SUFFIX)).append(".\"ID\"");
+ for(int i = 0; i < nomFeatNames.size(); i++) {
+ String featName = nomFeatNames.get(i);
+ stmt.append(", ").append(tableName(null, L1_TABLE_SUFFIX))
+ .append(".\"").append(featName).append("\"");
+ }
+ stmt.append(" FROM ")
+ .append(tableName(null, MENTIONS_TABLE_SUFFIX))
+ .append(", ").append(tableName(null, L1_TABLE_SUFFIX))
+ .append(" WHERE ").append(tableName(null, MENTIONS_TABLE_SUFFIX))
+ .append(".\"ID\" IS ?")
+ .append(" AND ").append(tableName(null, MENTIONS_TABLE_SUFFIX))
+ .append(".\"L1_ID\" = ").append(tableName(null, L1_TABLE_SUFFIX))
+ .append(".\"ID\"");
+ if(level2Used){
+ stmt.append(" AND ").append(tableName(null, MENTIONS_TABLE_SUFFIX))
+ .append(".\"L2_ID\" IS NULL;");
+ }else {
+ stmt.append(";");
+ }
+// logger.debug("L1 description statement: " + stmt.toString());
+ level1DescribeStmt = dbConnection.prepareStatement(stmt.toString());
+
+ if(level2Used) {
+ // levels 1 and 2 query
+ List<String> nonNomFeatNames = new ArrayList<String>(
+ Arrays.asList(descriptiveFeatures));
+ nonNomFeatNames.retainAll(nonNominalFeatureNameSet);
+ stmt = new StringBuilder("SELECT DISTINCT ");
+ stmt.append(tableName(null, MENTIONS_TABLE_SUFFIX)).append(".\"ID\"");
+ for(int i = 0; i < nomFeatNames.size(); i++) {
+ String featName = nomFeatNames.get(i);
+ stmt.append(", ").append(tableName(null, L1_TABLE_SUFFIX))
+ .append(".\"").append(featName).append("\"");
+ }
+ for(int i = 0; i < nonNomFeatNames.size(); i++) {
+ String featName = nonNomFeatNames.get(i);
+ stmt.append(", ").append(tableName(null, L2_TABLE_SUFFIX))
+ .append(".\"").append(featName).append("\"");
+ }
+ stmt.append(" FROM ")
+ .append(tableName(null, MENTIONS_TABLE_SUFFIX))
+ .append(", ").append(tableName(null, L1_TABLE_SUFFIX))
+ .append(", ").append(tableName(null, L2_TABLE_SUFFIX))
+ .append(" WHERE ").append(tableName(null, MENTIONS_TABLE_SUFFIX))
+ .append(".\"ID\" IS ?")
+ .append(" AND ").append(tableName(null, MENTIONS_TABLE_SUFFIX))
+ .append(".\"L1_ID\" = ").append(tableName(null, L1_TABLE_SUFFIX))
+ .append(".\"ID\" AND ").append(tableName(null,
MENTIONS_TABLE_SUFFIX))
+ .append(".\"L2_ID\" = ").append(tableName(null, L2_TABLE_SUFFIX))
+ .append(".\"ID\";");
+// logger.debug("L1+2 description statement: " + stmt.toString());
+ level1And2DescribeStmt = dbConnection.prepareStatement(stmt.toString());
+ } else {
+ level1And2DescribeStmt = null;
+ }
+
+ }
+
/**
* Creates in the database the tables required by this helper for indexing.
* Called at index creation, during the initialisation process.
@@ -621,6 +707,69 @@
}
}
+ /* (non-Javadoc)
+ * @see
gate.mimir.AbstractSemanticAnnotationHelper#getDescriptiveFeatureValues(java.lang.String)
+ */
+ @Override
+ protected String[] getDescriptiveFeatureValues(String mentionUri) {
+ long mentionId = -1;
+ try {
+ mentionId = Long.parseLong(
+ mentionUri.substring(annotationType.length() + 1));
+ } catch(Exception e) {
+ logger.error("Could not describe mention with invalid URI: \"" +
+ mentionUri + "\"", e);
+ return null;
+ }
+ ResultSet res = null;
+ try {
+ level1DescribeStmt.setLong(1, mentionId);
+ res = level1DescribeStmt.executeQuery();
+ if(!res.next()) {
+ // no level 1 results: try levels 1+2
+ res.close();
+ level1And2DescribeStmt.setLong(1, mentionId);
+ res = level1And2DescribeStmt.executeQuery();
+ if(!res.next()){
+ logger.error("Was asked to describe mention with ID " + mentionId +
+ " but was unable to find it.");
+ return null;
+ }
+ }
+ // by this point the result set was advanced to the one and only row
+ String[] result = new String[descriptiveFeatures.length];
+ for(int i = 0; i < descriptiveFeatures.length; i++) {
+ String columnName = null;
+ if(nominalFeatureNameSet.contains(descriptiveFeatures[i])) {
+ columnName = tableName(null, L1_TABLE_SUFFIX) + "\"" +
+ descriptiveFeatures[i] + "\"";
+ } else {
+ columnName = tableName(null, L2_TABLE_SUFFIX) + "\"" +
+ descriptiveFeatures[i] + "\"";
+ }
+ try {
+ Object sqlValue = res.getObject(columnName);
+ if(sqlValue != null) result[i] = sqlValue.toString();
+ } catch(Exception e) {
+ // ignore
+ }
+ }
+ return result;
+ } catch(SQLException e) {
+ logger.error("Database error while describing mention with ID: " +
+ mentionId, e);
+ return null;
+ } finally {
+ if(res != null){
+ try {
+ res.close();
+ } catch(SQLException e) {
+ logger.error("Error while closing SQL result set", e);
+ }
+ }
+ }
+ }
+
/**
* Sets all the values for a prepared statement (which must be one of the
* cached transient statements!)
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs