This is an automated email from the ASF dual-hosted git repository. greid pushed a commit to branch CRUNCH-693 in repository https://gitbox.apache.org/repos/asf/crunch.git
commit 37be4b4f468b2126fbb4e31df7ec4cd86884cbf9 Author: Gabriel Reid <[email protected]> AuthorDate: Sat Jan 11 16:35:20 2020 +0100 CRUNCH-693: Make text parsing locale-independent Standardize on US-based locale for number formatting (which is backwards-compatible with historical behavior). --- .../main/java/org/apache/crunch/contrib/text/TokenizerFactory.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java b/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java index f43478d..555ad8c 100644 --- a/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java +++ b/crunch-contrib/src/main/java/org/apache/crunch/contrib/text/TokenizerFactory.java @@ -17,14 +17,14 @@ */ package org.apache.crunch.contrib.text; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; + import java.io.Serializable; import java.util.Locale; import java.util.Scanner; import java.util.Set; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableSet; - /** * Factory class that constructs {@link Tokenizer} instances for input strings that use a fixed * set of delimiters, skip patterns, locales, and sets of indices to keep or drop. @@ -65,6 +65,7 @@ public class TokenizerFactory implements Serializable { */ public Tokenizer create(String input) { Scanner s = new Scanner(input); + s.useLocale(Locale.US); // Use period for floating point number formatting if (delim != null) { s.useDelimiter(delim); }
