This is an automated email from the ASF dual-hosted git repository.
gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 714bb45 PARQUET-1505: Use Java 7 NIO StandardCharsets (#599)
714bb45 is described below
commit 714bb450856dc951bd361e0cf4a732775eb3cefd
Author: BELUGABEHR <[email protected]>
AuthorDate: Thu Feb 7 07:26:47 2019 -0500
PARQUET-1505: Use Java 7 NIO StandardCharsets (#599)
---
.../org/apache/parquet/avro/TestReadWrite.java | 10 ++++----
.../parquet/avro/TestReadWriteOldListBehavior.java | 12 ++++-----
.../java/org/apache/parquet/cli/BaseCommand.java | 8 ++----
.../java/org/apache/parquet/io/api/Binary.java | 30 +++++++---------------
.../column/values/dictionary/TestDictionary.java | 7 +++--
.../java/org/apache/parquet/bytes/BytesUtils.java | 3 +++
.../apache/parquet/hadoop/ParquetFileWriter.java | 4 +--
7 files changed, 30 insertions(+), 44 deletions(-)
diff --git
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
index 69a73cb..4368938 100644
--- a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
+++ b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
@@ -18,7 +18,6 @@
*/
package org.apache.parquet.avro;
-import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.io.Resources;
@@ -27,6 +26,7 @@ import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -369,7 +369,7 @@ public class TestReadWrite {
.set("mylong", 2L)
.set("myfloat", 3.1f)
.set("mydouble", 4.1)
- .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)))
+ .set("mybytes",
ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)))
.set("mystring", "hello")
.set("mynestedrecord", nestedRecord)
.set("myenum", "a")
@@ -398,7 +398,7 @@ public class TestReadWrite {
assertEquals(2L, nextRecord.get("mylong"));
assertEquals(3.1f, nextRecord.get("myfloat"));
assertEquals(4.1, nextRecord.get("mydouble"));
- assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)),
nextRecord.get("mybytes"));
+ assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)),
nextRecord.get("mybytes"));
assertEquals(str("hello"), nextRecord.get("mystring"));
assertEquals(expectedEnumSymbol, nextRecord.get("myenum"));
assertEquals(nestedRecord, nextRecord.get("mynestedrecord"));
@@ -567,7 +567,7 @@ public class TestReadWrite {
record.put("mylong", 2L);
record.put("myfloat", 3.1f);
record.put("mydouble", 4.1);
- record.put("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)));
+ record.put("mybytes",
ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)));
record.put("mystring", "hello");
record.put("myenum", "a");
record.put("mynestedint", 1);
@@ -615,7 +615,7 @@ public class TestReadWrite {
assertEquals(2L, nextRecord.get("mylong"));
assertEquals(3.1f, nextRecord.get("myfloat"));
assertEquals(4.1, nextRecord.get("mydouble"));
- assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)),
nextRecord.get("mybytes"));
+ assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)),
nextRecord.get("mybytes"));
assertEquals(str("hello"), nextRecord.get("mystring"));
assertEquals(str("a"), nextRecord.get("myenum")); // enum symbols are
unknown
assertEquals(nestedRecord, nextRecord.get("mynestedrecord"));
diff --git
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWriteOldListBehavior.java
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWriteOldListBehavior.java
index af6f938..bcf553e 100644
---
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWriteOldListBehavior.java
+++
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWriteOldListBehavior.java
@@ -18,12 +18,12 @@
*/
package org.apache.parquet.avro;
-import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.io.Resources;
import java.io.File;
import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -247,7 +247,7 @@ public class TestReadWriteOldListBehavior {
.set("mylong", 2L)
.set("myfloat", 3.1f)
.set("mydouble", 4.1)
- .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)))
+ .set("mybytes",
ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)))
.set("mystring", "hello")
.set("mynestedrecord", nestedRecord)
.set("myenum", "a")
@@ -276,7 +276,7 @@ public class TestReadWriteOldListBehavior {
assertEquals(2L, nextRecord.get("mylong"));
assertEquals(3.1f, nextRecord.get("myfloat"));
assertEquals(4.1, nextRecord.get("mydouble"));
- assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)),
nextRecord.get("mybytes"));
+ assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)),
nextRecord.get("mybytes"));
assertEquals(str("hello"), nextRecord.get("mystring"));
assertEquals(expectedEnumSymbol, nextRecord.get("myenum"));
assertEquals(nestedRecord, nextRecord.get("mynestedrecord"));
@@ -327,7 +327,7 @@ public class TestReadWriteOldListBehavior {
.set("mylong", 2L)
.set("myfloat", 3.1f)
.set("mydouble", 4.1)
- .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)))
+ .set("mybytes",
ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)))
.set("mystring", "hello")
.set("mynestedrecord", nestedRecord)
.set("myenum", "a")
@@ -512,7 +512,7 @@ public class TestReadWriteOldListBehavior {
record.put("mylong", 2L);
record.put("myfloat", 3.1f);
record.put("mydouble", 4.1);
- record.put("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)));
+ record.put("mybytes",
ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)));
record.put("mystring", "hello");
record.put("myenum", "a");
record.put("mynestedint", 1);
@@ -573,7 +573,7 @@ public class TestReadWriteOldListBehavior {
assertEquals(2L, nextRecord.get("mylong"));
assertEquals(3.1f, nextRecord.get("myfloat"));
assertEquals(4.1, nextRecord.get("mydouble"));
- assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)),
nextRecord.get("mybytes"));
+ assertEquals(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8)),
nextRecord.get("mybytes"));
assertEquals(str("hello"), nextRecord.get("mystring"));
assertEquals(str("a"), nextRecord.get("myenum"));
assertEquals(nestedRecord, nextRecord.get("mynestedrecord"));
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/BaseCommand.java
b/parquet-cli/src/main/java/org/apache/parquet/cli/BaseCommand.java
index f385fde..96ca5a5 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/BaseCommand.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/BaseCommand.java
@@ -20,7 +20,6 @@
package org.apache.parquet.cli;
import com.beust.jcommander.internal.Lists;
-import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.io.CharStreams;
import com.google.common.io.Resources;
@@ -52,7 +51,7 @@ import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
-import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.security.AccessController;
import java.util.Iterator;
import java.util.List;
@@ -60,9 +59,6 @@ import java.util.NoSuchElementException;
public abstract class BaseCommand implements Command, Configurable {
- @VisibleForTesting
- static final Charset UTF8 = Charset.forName("utf8");
-
private static final String RESOURCE_URI_SCHEME = "resource";
private static final String STDIN_AS_SOURCE = "stdin";
@@ -103,7 +99,7 @@ public abstract class BaseCommand implements Command,
Configurable {
} else {
FSDataOutputStream outgoing = create(filename);
try {
- outgoing.write(content.getBytes(UTF8));
+ outgoing.write(content.getBytes(StandardCharsets.UTF_8));
} finally {
outgoing.close();
}
diff --git a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
index 85c82bd..021d171 100644
--- a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
+++ b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java
@@ -23,7 +23,6 @@ import java.io.IOException;
import java.io.ObjectStreamException;
import java.io.OutputStream;
import java.io.Serializable;
-import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
@@ -31,12 +30,9 @@ import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
-import org.apache.parquet.io.ParquetDecodingException;
import org.apache.parquet.io.ParquetEncodingException;
import org.apache.parquet.schema.PrimitiveComparator;
-import static org.apache.parquet.bytes.BytesUtils.UTF8;
-
abstract public class Binary implements Comparable<Binary>, Serializable {
protected boolean isBackingBytesReused;
@@ -133,11 +129,10 @@ abstract public class Binary implements
Comparable<Binary>, Serializable {
@Override
public String toStringUsingUTF8() {
- return UTF8.decode(ByteBuffer.wrap(value, offset, length)).toString();
- // TODO: figure out why the following line was much slower
- // rdb: new String(...) is slower because it instantiates a new Decoder,
- // while Charset#decode uses a thread-local decoder cache
- // return new String(value, offset, length, BytesUtils.UTF8);
+ // Charset#decode uses a thread-local decoder cache and is faster than
+ // new String(...) which instantiates a new Decoder per invocation
+ return StandardCharsets.UTF_8
+ .decode(ByteBuffer.wrap(value, offset, length)).toString();
}
@Override
@@ -220,11 +215,7 @@ abstract public class Binary implements
Comparable<Binary>, Serializable {
}
private static ByteBuffer encodeUTF8(String value) {
- try {
- return ByteBuffer.wrap(value.getBytes("UTF-8"));
- } catch (UnsupportedEncodingException e) {
- throw new ParquetEncodingException("UTF-8 not supported.", e);
- }
+ return ByteBuffer.wrap(value.getBytes(StandardCharsets.UTF_8));
}
}
@@ -284,7 +275,7 @@ abstract public class Binary implements Comparable<Binary>,
Serializable {
@Override
public String toStringUsingUTF8() {
- return UTF8.decode(ByteBuffer.wrap(value)).toString();
+ return StandardCharsets.UTF_8.decode(ByteBuffer.wrap(value)).toString();
}
@Override
@@ -393,11 +384,8 @@ abstract public class Binary implements
Comparable<Binary>, Serializable {
public String toStringUsingUTF8() {
String ret;
if (value.hasArray()) {
- try {
- ret = new String(value.array(), value.arrayOffset() + offset,
length, "UTF-8");
- } catch (UnsupportedEncodingException e) {
- throw new ParquetDecodingException("UTF-8 not supported");
- }
+ ret = new String(value.array(), value.arrayOffset() + offset, length,
+ StandardCharsets.UTF_8);
} else {
int limit = value.limit();
value.limit(offset+length);
@@ -406,7 +394,7 @@ abstract public class Binary implements Comparable<Binary>,
Serializable {
// no corresponding interface to read a subset of a buffer, would have
to slice it
// which creates another ByteBuffer object or do what is done here to
adjust the
// limit/offset and set them back after
- ret = UTF8.decode(value).toString();
+ ret = StandardCharsets.UTF_8.decode(value).toString();
value.limit(limit);
value.position(position);
}
diff --git
a/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java
b/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java
index ba3f903..2783b69 100644
---
a/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java
+++
b/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java
@@ -27,8 +27,8 @@ import static
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
import org.apache.parquet.bytes.ByteBufferInputStream;
import org.junit.Assert;
@@ -627,9 +627,8 @@ public class TestDictionary {
}
}
- private void writeRepeatedWithReuse(int COUNT, ValuesWriter cw,
- String prefix) throws
UnsupportedEncodingException {
- Binary reused = Binary.fromReusedByteArray((prefix +
"0").getBytes("UTF-8"));
+ private void writeRepeatedWithReuse(int COUNT, ValuesWriter cw, String
prefix) {
+ Binary reused = Binary.fromReusedByteArray((prefix +
"0").getBytes(StandardCharsets.UTF_8));
for (int i = 0; i < COUNT; i++) {
Binary content = Binary.fromString(prefix + i % 10);
System.arraycopy(content.getBytesUnsafe(), 0, reused.getBytesUnsafe(),
0, reused.length());
diff --git
a/parquet-common/src/main/java/org/apache/parquet/bytes/BytesUtils.java
b/parquet-common/src/main/java/org/apache/parquet/bytes/BytesUtils.java
index 2657c7e..2c8162c 100644
--- a/parquet-common/src/main/java/org/apache/parquet/bytes/BytesUtils.java
+++ b/parquet-common/src/main/java/org/apache/parquet/bytes/BytesUtils.java
@@ -24,6 +24,7 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -34,6 +35,8 @@ import org.slf4j.LoggerFactory;
public class BytesUtils {
private static final Logger LOG = LoggerFactory.getLogger(BytesUtils.class);
+ /** @deprecated Use {@link StandardCharsets#UTF_8} instead */
+ @Deprecated
public static final Charset UTF8 = Charset.forName("UTF-8");
/**
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
index 6158dad..14e3729 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
@@ -24,7 +24,7 @@ import static
org.apache.parquet.hadoop.ParquetWriter.DEFAULT_BLOCK_SIZE;
import static org.apache.parquet.hadoop.ParquetWriter.MAX_PADDING_SIZE_DEFAULT;
import java.io.IOException;
-import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -101,7 +101,7 @@ public class ParquetFileWriter {
public static final String PARQUET_METADATA_FILE = "_metadata";
public static final String MAGIC_STR = "PAR1";
- public static final byte[] MAGIC =
MAGIC_STR.getBytes(Charset.forName("ASCII"));
+ public static final byte[] MAGIC =
MAGIC_STR.getBytes(StandardCharsets.US_ASCII);
public static final String PARQUET_COMMON_METADATA_FILE = "_common_metadata";
public static final int CURRENT_VERSION = 1;