Repository: parquet-mr Updated Branches: refs/heads/master c26fa7881 -> 6c9ca4d4c
PARQUET-430: Change to use Locale parameterized version of String.toUpperCase()/toLowerCase A String is being converted to upper or lowercase, using the platform's default encoding. This may result in improper conversions when used with international characters. For instance, "TITLE".toLowerCase() in a Turkish locale returns "tıtle", where 'ı' -- without a dot -- is the LATIN SMALL LETTER DOTLESS I character. To obtain correct results for locale insensitive strings, we'd better use toLowerCase(Locale.ENGLISH). For more information on this, please see: - http://stackoverflow.com/questions/11063102/using-locales-with-javas-tolowercase-and-touppercase - http://lotusnotus.com/lotusnotus_en.nsf/dx/dotless-i-tolowercase-and-touppercase-functions-use-responsibly.htm - http://java.sys-con.com/node/46241 This PR changes our use of String.toUpperCase()/toLowerCase() to String.toUpperCase(Locale.*ENGLISH*)/toLowerCase(*Locale.ENGLISH*) Author: proflin <[email protected]> Closes #312 from proflin/PARQUET-430 and squashes the following commits: ed55822 [proflin] PARQUET-430 Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/6c9ca4d4 Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/6c9ca4d4 Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/6c9ca4d4 Branch: refs/heads/master Commit: 6c9ca4d4c0de4dff29b79f28ac5c51b4f6fed0da Parents: c26fa78 Author: proflin <[email protected]> Authored: Mon Feb 15 16:35:33 2016 -0800 Committer: Julien Le Dem <[email protected]> Committed: Mon Feb 15 16:35:33 2016 -0800 ---------------------------------------------------------------------- .../org/apache/parquet/filter2/predicate/Operators.java | 9 +++++---- .../src/main/java/org/apache/parquet/schema/GroupType.java | 3 ++- .../java/org/apache/parquet/schema/MessageTypeParser.java | 5 +++-- .../main/java/org/apache/parquet/schema/PrimitiveType.java | 3 ++- .../parquet/hadoop/metadata/CompressionCodecName.java | 4 +++- 5 files changed, 15 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6c9ca4d4/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java index 32b4430..eca0f67 100644 --- a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java +++ b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java @@ -19,6 +19,7 @@ package org.apache.parquet.filter2.predicate; import java.io.Serializable; +import java.util.Locale; import org.apache.parquet.hadoop.metadata.ColumnPath; import org.apache.parquet.io.api.Binary; @@ -129,7 +130,7 @@ public final class Operators { // null in their own constructors. this.value = value; - String name = getClass().getSimpleName().toLowerCase(); + String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + value + ")"; } @@ -258,7 +259,7 @@ public final class Operators { protected BinaryLogicalFilterPredicate(FilterPredicate left, FilterPredicate right) { this.left = checkNotNull(left, "left"); this.right = checkNotNull(right, "right"); - String name = getClass().getSimpleName().toLowerCase(); + String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); this.toString = name + "(" + left + ", " + right + ")"; } @@ -386,7 +387,7 @@ public final class Operators { UserDefinedByClass(Column<T> column, Class<U> udpClass) { super(column); this.udpClass = checkNotNull(udpClass, "udpClass"); - String name = getClass().getSimpleName().toLowerCase(); + String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + udpClass.getName() + ")"; // defensively try to instantiate the class early to make sure that it's possible @@ -442,7 +443,7 @@ public final class Operators { UserDefinedByInstance(Column<T> column, U udpInstance) { super(column); this.udpInstance = checkNotNull(udpInstance, "udpInstance"); - String name = getClass().getSimpleName().toLowerCase(); + String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + udpInstance + ")"; } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6c9ca4d4/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java index 027fbc0..f8404a1 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import org.apache.parquet.io.InvalidRecordException; @@ -208,7 +209,7 @@ public class GroupType extends Type { @Override public void writeToStringBuilder(StringBuilder sb, String indent) { sb.append(indent) - .append(getRepetition().name().toLowerCase()) + .append(getRepetition().name().toLowerCase(Locale.ENGLISH)) .append(" group ") .append(getName()) .append(getOriginalType() == null ? "" : " (" + getOriginalType() +")") http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6c9ca4d4/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java index 4981398..b7274c2 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java @@ -19,6 +19,7 @@ package org.apache.parquet.schema; import java.util.Arrays; +import java.util.Locale; import java.util.StringTokenizer; import org.apache.parquet.Log; @@ -195,7 +196,7 @@ public class MessageTypeParser { private static PrimitiveTypeName asPrimitive(String t, Tokenizer st) { try { - return PrimitiveTypeName.valueOf(t.toUpperCase()); + return PrimitiveTypeName.valueOf(t.toUpperCase(Locale.ENGLISH)); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("expected one of " + Arrays.toString(PrimitiveTypeName.values()) +" got " + t + " at " + st.getLocationString(), e); } @@ -203,7 +204,7 @@ public class MessageTypeParser { private static Repetition asRepetition(String t, Tokenizer st) { try { - return Repetition.valueOf(t.toUpperCase()); + return Repetition.valueOf(t.toUpperCase(Locale.ENGLISH)); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("expected one of " + Arrays.toString(Repetition.values()) +" got " + t + " at " + st.getLocationString(), e); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6c9ca4d4/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java index 1cdc6c3..8056188 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java @@ -20,6 +20,7 @@ package org.apache.parquet.schema; import java.util.Arrays; import java.util.List; +import java.util.Locale; import org.apache.parquet.column.ColumnReader; import org.apache.parquet.io.InvalidRecordException; @@ -394,7 +395,7 @@ public final class PrimitiveType extends Type { @Override public void writeToStringBuilder(StringBuilder sb, String indent) { sb.append(indent) - .append(getRepetition().name().toLowerCase()) + .append(getRepetition().name().toLowerCase(Locale.ENGLISH)) .append(" ") .append(primitive.name().toLowerCase()); if (primitive == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) { http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6c9ca4d4/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java index 558bea7..d03d280 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java @@ -21,6 +21,8 @@ package org.apache.parquet.hadoop.metadata; import org.apache.parquet.format.CompressionCodec; import org.apache.parquet.hadoop.codec.CompressionCodecNotSupportedException; +import java.util.Locale; + public enum CompressionCodecName { UNCOMPRESSED(null, CompressionCodec.UNCOMPRESSED, ""), SNAPPY("org.apache.parquet.hadoop.codec.SnappyCodec", CompressionCodec.SNAPPY, ".snappy"), @@ -31,7 +33,7 @@ public enum CompressionCodecName { if (name == null) { return UNCOMPRESSED; } - return valueOf(name.toUpperCase()); + return valueOf(name.toUpperCase(Locale.ENGLISH)); } public static CompressionCodecName fromCompressionCodec(Class<?> clazz) {
