This is an automated email from the ASF dual-hosted git repository.

tommaso pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/master by this push:
     new 1e851de  no jira - added javadoc to normalizers (#285)
1e851de is described below

commit 1e851de8fe60a7122bd7b75b4278bf23bd0ec12d
Author: Tommaso Teofili <[email protected]>
AuthorDate: Sun Nov 5 17:02:42 2017 +0100

    no jira - added javadoc to normalizers (#285)
    
    No Jira - Add javadoc to normalizers
---
 .../tools/util/normalizer/CharSequenceNormalizer.java     | 15 +++++++++++++--
 .../util/normalizer/EmojiCharSequenceNormalizer.java      |  8 ++++----
 .../util/normalizer/NumberCharSequenceNormalizer.java     |  5 +++--
 .../util/normalizer/ShrinkCharSequenceNormalizer.java     |  5 +++--
 .../util/normalizer/TwitterCharSequenceNormalizer.java    |  5 +++--
 .../tools/util/normalizer/UrlCharSequenceNormalizer.java  |  5 +++--
 6 files changed, 29 insertions(+), 14 deletions(-)

diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/CharSequenceNormalizer.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/CharSequenceNormalizer.java
index b5c1f3f..e09578c 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/CharSequenceNormalizer.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/CharSequenceNormalizer.java
@@ -14,10 +14,21 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-
 package opennlp.tools.util.normalizer;
 
+/**
+ * A char sequence normalizer, used to adjusting (prune, substitute, add, etc.)
+ * characters in order to remove noise from text
+ *
+ * @see <a href="https://en.wikipedia.org/wiki/Text_normalization";>Text 
normalization</a>
+ *
+ */
 public interface CharSequenceNormalizer {
+
+  /**
+   * normalize a sequence of characters
+   * @param text the char sequence to normalize
+   * @return the normalized char sequence
+   */
   CharSequence normalize(CharSequence text);
 }
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/EmojiCharSequenceNormalizer.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/EmojiCharSequenceNormalizer.java
index d1c161c..c7e66e3 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/EmojiCharSequenceNormalizer.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/EmojiCharSequenceNormalizer.java
@@ -14,12 +14,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-
 package opennlp.tools.util.normalizer;
 
 import java.util.regex.Pattern;
 
+/**
+ * Normalizer for emojis.
+ */
 public class EmojiCharSequenceNormalizer implements CharSequenceNormalizer {
 
   private static final EmojiCharSequenceNormalizer INSTANCE = new 
EmojiCharSequenceNormalizer();
@@ -32,7 +33,6 @@ public class EmojiCharSequenceNormalizer implements 
CharSequenceNormalizer {
       Pattern.compile("[\\uD83C-\\uDBFF\\uDC00-\\uDFFF]+");
 
   public CharSequence normalize (CharSequence text) {
-    String modified = EMOJI_REGEX.matcher(text).replaceAll(" ");
-    return modified;
+    return EMOJI_REGEX.matcher(text).replaceAll(" ");
   }
 }
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/NumberCharSequenceNormalizer.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/NumberCharSequenceNormalizer.java
index 6b0452d..5fe0f62 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/NumberCharSequenceNormalizer.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/NumberCharSequenceNormalizer.java
@@ -14,12 +14,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-
 package opennlp.tools.util.normalizer;
 
 import java.util.regex.Pattern;
 
+/**
+ * Normalizer for numbers
+ */
 public class NumberCharSequenceNormalizer implements CharSequenceNormalizer {
 
   private static final Pattern NUMBER_REGEX = Pattern.compile("\\d+");
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/ShrinkCharSequenceNormalizer.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/ShrinkCharSequenceNormalizer.java
index 6183367..cc1c15e 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/ShrinkCharSequenceNormalizer.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/ShrinkCharSequenceNormalizer.java
@@ -14,12 +14,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-
 package opennlp.tools.util.normalizer;
 
 import java.util.regex.Pattern;
 
+/**
+ * Normalizer to shrink repeated spaces / chars
+ */
 public class ShrinkCharSequenceNormalizer implements CharSequenceNormalizer {
 
   private static final Pattern REPEATED_CHAR_REGEX = 
Pattern.compile("(.)\\1{2,}",
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/TwitterCharSequenceNormalizer.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/TwitterCharSequenceNormalizer.java
index b5a8625..69c7068 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/TwitterCharSequenceNormalizer.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/TwitterCharSequenceNormalizer.java
@@ -14,12 +14,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-
 package opennlp.tools.util.normalizer;
 
 import java.util.regex.Pattern;
 
+/**
+ * Normalizer for Twitter character sequences
+ */
 public class TwitterCharSequenceNormalizer implements CharSequenceNormalizer {
 
   private static final Pattern HASH_USER_REGEX =
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/UrlCharSequenceNormalizer.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/UrlCharSequenceNormalizer.java
index 4be9b63..847f86d 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/UrlCharSequenceNormalizer.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/normalizer/UrlCharSequenceNormalizer.java
@@ -14,12 +14,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-
 package opennlp.tools.util.normalizer;
 
 import java.util.regex.Pattern;
 
+/**
+ * Normalizer that removes URls and email addresses.
+ */
 public class UrlCharSequenceNormalizer implements CharSequenceNormalizer {
 
   private static final Pattern URL_REGEX =

-- 
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].

Reply via email to