EntityLinker.java

joern Wed, 29 Oct 2014 12:14:59 -0700

Author: joern
Date: Wed Oct 29 19:14:20 2014
New Revision: 1635260

URL: http://svn.apache.org/r1635260
Log:
OPENNLP-579 Updated the Entity Linker interface


Modified:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java?rev=1635260&r1=1635259&r2=1635260&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
 Wed Oct 29 19:14:20 2014
@@ -94,35 +94,34 @@ public class EntityLinkerTool extends Ba
 
             StringBuilder text = new StringBuilder();
             Span sentences[] = new Span[document.size()];
-            List<Span> tokens = new ArrayList<Span>();
-            List<Span> names = new ArrayList<Span>();
+            Span[][] tokensBySentence = new Span[document.size()][];
+            Span[][] namesBySentence = new Span[document.size()][];
 
             for (int i = 0; i < document.size(); i++) {
 
               NameSample sample = document.get(i);
-
+              
+              namesBySentence[i] = sample.getNames();
+              
               int sentenceBegin = text.length();
-
-              int tokenSentOffset = tokens.size();
+              
+              Span[] tokens = new Span[sample.getSentence().length];
 
               // for all tokens
-              for (String token : sample.getSentence()) {
+              for (int ti = 0; ti < sample.getSentence().length; ti++) {
                 int tokenBegin = text.length();
-                text.append(token);
-                Span tokenSpan = new Span(tokenBegin, text.length());
+                text.append(sample.getSentence()[ti]);
                 text.append(" ");
+                tokens[i] = new Span(tokenBegin, text.length());
               }
-
-              for (Span name : sample.getNames()) {
-                names.add(new Span(tokenSentOffset + name.getStart(), 
tokenSentOffset + name.getEnd(), name.getType()));
-              }
-
+              
+              tokensBySentence[i] = tokens;
+              
               sentences[i] = new Span(sentenceBegin, text.length());
               text.append("\n");
             }
 
-            List<Span> linkedSpans = entityLinker.find(text.toString(), 
sentences, tokens.toArray(new Span[tokens.size()]),
-                names.toArray(new Span[names.size()]));
+            List<Span> linkedSpans = entityLinker.find(text.toString(), 
sentences, tokensBySentence, namesBySentence);
 
             for (int i = 0; i < linkedSpans.size(); i++) {
               System.out.println(linkedSpans.get(i));

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java?rev=1635260&r1=1635259&r2=1635260&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java
 Wed Oct 29 19:14:20 2014
@@ -51,9 +51,7 @@ public interface EntityLinker<T extends 
    * Links an entire document of named entities to an external source
    *
    * @param doctext          the full text of the document
-   * @param sentences        the list of sentences spans that correspond to the
-   *                         text.
-   * @param tokensBySentence a list of tokens that correspond to each sentence.
+   * @param tokensBySentence a list of tokens spans that correspond to each 
sentence.
    *                         The outer array refers to the sentence, the inner
    *                         array is the tokens for the outer sentence. 
Similar
    *                         in nature to Map of SentenceIndex keys to Listof
@@ -66,47 +64,29 @@ public interface EntityLinker<T extends 
    *                         Sentence's Tokens&gt;&gt; @ return
    * @return 
    */
-  List<T> find(String doctext, Span[] sentences, String[][] tokensBySentence, 
Span[][] namesBySentence);
+  List<T> find(String doctext, Span[] sentences, Span[][] tokensBySentence, 
Span[][] namesBySentence);
 
-  /**
-   *
-   * @param doctext      the document text to be used as additional context, 
and to
-   *                  derive sentences and tokens String[]
-   * @param sentences the list of sentences spans that correspond to the text.
-   * @param tokens    the spans that correspond to one of the sentences.
-   * @param nameSpans the named entity spans that correspond to the tokens
-   * @return
-   */
-  List<T> find(String doctext, Span sentences[], Span tokens[], Span 
nameSpans[]);
 
   /**
    * Links the names that correspond to the tokens[] spans. The sentenceindex
    * can be used to get the sentence text and tokens from the text based on the
    * sentence and token spans. The text is available for additional context.
    *
-   * @param doctext          the document text to be used as additional 
context,
-   *                      and to derive sentences and tokens String[]
-   * @param sentences     the list of sentences spans that correspond to the
-   *                      text.
-   * @param tokens        the spans that correspond to one of the sentences.
-   * @param nameSpans     the named entity spans that correspond to the tokens
+   * @param doctext          the full text of the document
+   * @param tokensBySentence a list of tokens spans that correspond to each 
sentence.
+   *                         The outer array refers to the sentence, the inner
+   *                         array is the tokens for the outer sentence. 
Similar
+   *                         in nature to Map of SentenceIndex keys to Listof
+   *                         tokens as values
+   * @param namesBySentence  a list of name spans that correspond to each
+   *                         sentence. The outer array refers to the sentence,
+   *                         the inner array refers to the tokens that for the
+   *                         same sentence.Similar in nature to
+   *                         Map&lt;SentenceIndex,List&lt;Name Spans For This
+   *                         Sentence's Tokens&gt;&gt; @ return
    * @param sentenceIndex the index to the sentence span that the tokens[]
    *                      Span[] corresponds to
    * @return
    */
-  List<T> find(String doctext, Span sentences[], Span tokens[], Span 
nameSpans[], int sentenceIndex);
-
-  /**
-   * Links the names that correspond to the tokens[]. The Sentences and text 
are
-   * available for additional context.
-   *
-   * @param doctext      the document text to be used as additional context, 
and to
-   *                  derive sentences and tokens String[]
-   * @param sentences the list of sentences spans that correspond to the text.
-   * @param tokens    the actual String[] of tokens that correspond to one of
-   *                  the sentences.
-   * @param nameSpans the named entity spans that correspond to the tokens
-   * @return
-   */
-  List<T> find(String doctext, Span sentences[], String tokens[], Span 
nameSpans[]);
+  List<T> find(String doctext, Span[] sentences, Span[][] tokensBySentence, 
Span[][] namesBySentence, int sentenceIndex);
 }

svn commit: r1635260 - in /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: cmdline/entitylinker/EntityLinkerTool.java entitylinker/EntityLinker.java

Reply via email to