This is an automated email from the ASF dual-hosted git repository.
joern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
The following commit(s) were added to refs/heads/master by this push:
new d187edf Remove end marker from output seq
d187edf is described below
commit d187edf182c8fb8d6526a41db0b8ac02b7df5a03
Author: Jörn Kottmann <[email protected]>
AuthorDate: Thu Nov 29 09:53:05 2018 +0100
Remove end marker from output seq
---
.../src/main/java/org/apache/opennlp/normalizer/Normalizer.java | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git
a/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
b/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
index 52d44cf..c3f7fbb 100644
--- a/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
+++ b/tf-ner-poc/src/main/java/org/apache/opennlp/normalizer/Normalizer.java
@@ -38,6 +38,8 @@ import org.tensorflow.Tensor;
public class Normalizer {
+ private static final char END_MARKER = 'E';
+
private final Session session;
private final Map<Character, Integer> sourceCharMap;
private final Map<Integer, Character> targetCharMap;
@@ -109,6 +111,13 @@ public class Normalizer {
normalizedText.append(targetCharMap.get(translations[ti][ci]));
}
+ // Remove the end marker from the translated string
+ for (int ci = normalizedText.length() - 1; ci >= 0; ci--) {
+ if (END_MARKER == normalizedText.charAt(ci)) {
+ normalizedText.setLength(ci);
+ }
+ }
+
normalizedTexts.add(normalizedText.toString());
}