This is an automated email from the ASF dual-hosted git repository.

joern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git


The following commit(s) were added to refs/heads/master by this push:
     new d187edf  Remove end marker from output seq
d187edf is described below

commit d187edf182c8fb8d6526a41db0b8ac02b7df5a03
Author: Jörn Kottmann <[email protected]>
AuthorDate: Thu Nov 29 09:53:05 2018 +0100

    Remove end marker from output seq
---
 .../src/main/java/org/apache/opennlp/normalizer/Normalizer.java  | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git 
a/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java 
b/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
index 52d44cf..c3f7fbb 100644
--- a/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
+++ b/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
@@ -38,6 +38,8 @@ import org.tensorflow.Tensor;
 
 public class Normalizer {
 
+  private static final char END_MARKER = 'E';
+
   private final Session session;
   private final Map<Character, Integer> sourceCharMap;
   private final Map<Integer, Character> targetCharMap;
@@ -109,6 +111,13 @@ public class Normalizer {
             normalizedText.append(targetCharMap.get(translations[ti][ci]));
           }
 
+          // Remove the end marker from the translated string
+          for (int ci = normalizedText.length() - 1; ci >= 0; ci--) {
+            if (END_MARKER == normalizedText.charAt(ci)) {
+              normalizedText.setLength(ci);
+            }
+          }
+
           normalizedTexts.add(normalizedText.toString());
         }
 

Reply via email to