Author: joern
Date: Wed Oct 29 19:14:20 2014
New Revision: 1635260
URL: http://svn.apache.org/r1635260
Log:
OPENNLP-579 Updated the Entity Linker interface
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java?rev=1635260&r1=1635259&r2=1635260&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
Wed Oct 29 19:14:20 2014
@@ -94,35 +94,34 @@ public class EntityLinkerTool extends Ba
StringBuilder text = new StringBuilder();
Span sentences[] = new Span[document.size()];
- List<Span> tokens = new ArrayList<Span>();
- List<Span> names = new ArrayList<Span>();
+ Span[][] tokensBySentence = new Span[document.size()][];
+ Span[][] namesBySentence = new Span[document.size()][];
for (int i = 0; i < document.size(); i++) {
NameSample sample = document.get(i);
-
+
+ namesBySentence[i] = sample.getNames();
+
int sentenceBegin = text.length();
-
- int tokenSentOffset = tokens.size();
+
+ Span[] tokens = new Span[sample.getSentence().length];
// for all tokens
- for (String token : sample.getSentence()) {
+ for (int ti = 0; ti < sample.getSentence().length; ti++) {
int tokenBegin = text.length();
- text.append(token);
- Span tokenSpan = new Span(tokenBegin, text.length());
+ text.append(sample.getSentence()[ti]);
text.append(" ");
+ tokens[i] = new Span(tokenBegin, text.length());
}
-
- for (Span name : sample.getNames()) {
- names.add(new Span(tokenSentOffset + name.getStart(),
tokenSentOffset + name.getEnd(), name.getType()));
- }
-
+
+ tokensBySentence[i] = tokens;
+
sentences[i] = new Span(sentenceBegin, text.length());
text.append("\n");
}
- List<Span> linkedSpans = entityLinker.find(text.toString(),
sentences, tokens.toArray(new Span[tokens.size()]),
- names.toArray(new Span[names.size()]));
+ List<Span> linkedSpans = entityLinker.find(text.toString(),
sentences, tokensBySentence, namesBySentence);
for (int i = 0; i < linkedSpans.size(); i++) {
System.out.println(linkedSpans.get(i));
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java?rev=1635260&r1=1635259&r2=1635260&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java
Wed Oct 29 19:14:20 2014
@@ -51,9 +51,7 @@ public interface EntityLinker<T extends
* Links an entire document of named entities to an external source
*
* @param doctext the full text of the document
- * @param sentences the list of sentences spans that correspond to the
- * text.
- * @param tokensBySentence a list of tokens that correspond to each sentence.
+ * @param tokensBySentence a list of tokens spans that correspond to each
sentence.
* The outer array refers to the sentence, the inner
* array is the tokens for the outer sentence.
Similar
* in nature to Map of SentenceIndex keys to Listof
@@ -66,47 +64,29 @@ public interface EntityLinker<T extends
* Sentence's Tokens>> @ return
* @return
*/
- List<T> find(String doctext, Span[] sentences, String[][] tokensBySentence,
Span[][] namesBySentence);
+ List<T> find(String doctext, Span[] sentences, Span[][] tokensBySentence,
Span[][] namesBySentence);
- /**
- *
- * @param doctext the document text to be used as additional context,
and to
- * derive sentences and tokens String[]
- * @param sentences the list of sentences spans that correspond to the text.
- * @param tokens the spans that correspond to one of the sentences.
- * @param nameSpans the named entity spans that correspond to the tokens
- * @return
- */
- List<T> find(String doctext, Span sentences[], Span tokens[], Span
nameSpans[]);
/**
* Links the names that correspond to the tokens[] spans. The sentenceindex
* can be used to get the sentence text and tokens from the text based on the
* sentence and token spans. The text is available for additional context.
*
- * @param doctext the document text to be used as additional
context,
- * and to derive sentences and tokens String[]
- * @param sentences the list of sentences spans that correspond to the
- * text.
- * @param tokens the spans that correspond to one of the sentences.
- * @param nameSpans the named entity spans that correspond to the tokens
+ * @param doctext the full text of the document
+ * @param tokensBySentence a list of tokens spans that correspond to each
sentence.
+ * The outer array refers to the sentence, the inner
+ * array is the tokens for the outer sentence.
Similar
+ * in nature to Map of SentenceIndex keys to Listof
+ * tokens as values
+ * @param namesBySentence a list of name spans that correspond to each
+ * sentence. The outer array refers to the sentence,
+ * the inner array refers to the tokens that for the
+ * same sentence.Similar in nature to
+ * Map<SentenceIndex,List<Name Spans For This
+ * Sentence's Tokens>> @ return
* @param sentenceIndex the index to the sentence span that the tokens[]
* Span[] corresponds to
* @return
*/
- List<T> find(String doctext, Span sentences[], Span tokens[], Span
nameSpans[], int sentenceIndex);
-
- /**
- * Links the names that correspond to the tokens[]. The Sentences and text
are
- * available for additional context.
- *
- * @param doctext the document text to be used as additional context,
and to
- * derive sentences and tokens String[]
- * @param sentences the list of sentences spans that correspond to the text.
- * @param tokens the actual String[] of tokens that correspond to one of
- * the sentences.
- * @param nameSpans the named entity spans that correspond to the tokens
- * @return
- */
- List<T> find(String doctext, Span sentences[], String tokens[], Span
nameSpans[]);
+ List<T> find(String doctext, Span[] sentences, Span[][] tokensBySentence,
Span[][] namesBySentence, int sentenceIndex);
}