Author: szehon
Date: Fri Jan 30 07:49:08 2015
New Revision: 1655953
URL: http://svn.apache.org/r1655953
Log:
HIVE-9482 : Hive parquet timestamp compatibility (Szehon, reviewed by Brock)
Added:
hive/trunk/data/files/parquet_external_time.parq (with props)
hive/trunk/ql/src/test/queries/clientpositive/parquet_external_time.q
hive/trunk/ql/src/test/results/clientpositive/parquet_external_time.q.out
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ConverterParent.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveCollectionConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/Repeated.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL:
http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1655953&r1=1655952&r2=1655953&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
(original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Fri
Jan 30 07:49:08 2015
@@ -891,7 +891,9 @@ public class HiveConf extends Configurat
"Maximum fraction of heap that can be used by Parquet file writers in
one task.\n" +
"It is for avoiding OutOfMemory error in tasks. Work with Parquet
1.6.0 and above.\n" +
"This config parameter is defined in Parquet, so that it does not
start with 'hive.'."),
-
+
HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION("hive.parquet.timestamp.skip.conversion",
true,
+ "Current Hive implementation of parquet stores timestamps to UTC, this
flag allows skipping of the conversion" +
+ "on reading parquet files from other tools"),
HIVE_ORC_FILE_MEMORY_POOL("hive.exec.orc.memory.pool", 0.5f,
"Maximum fraction of heap that can be used by ORC file writers"),
HIVE_ORC_WRITE_FORMAT("hive.exec.orc.write.format", null,
Added: hive/trunk/data/files/parquet_external_time.parq
URL:
http://svn.apache.org/viewvc/hive/trunk/data/files/parquet_external_time.parq?rev=1655953&view=auto
==============================================================================
Binary file - no diff available.
Propchange: hive/trunk/data/files/parquet_external_time.parq
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ConverterParent.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ConverterParent.java?rev=1655953&r1=1655952&r2=1655953&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ConverterParent.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ConverterParent.java
Fri Jan 30 07:49:08 2015
@@ -1,7 +1,24 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package org.apache.hadoop.hive.ql.io.parquet.convert;
import org.apache.hadoop.io.Writable;
+import java.util.Map;
+
interface ConverterParent {
void set(int index, Writable value);
+
+ Map<String, String> getMetadata();
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java?rev=1655953&r1=1655952&r2=1655953&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java
Fri Jan 30 07:49:08 2015
@@ -13,10 +13,15 @@
*/
package org.apache.hadoop.hive.ql.io.parquet.convert;
+import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
import org.apache.hadoop.io.ArrayWritable;
import parquet.io.api.GroupConverter;
import parquet.io.api.RecordMaterializer;
import parquet.schema.GroupType;
+import parquet.schema.MessageType;
+import parquet.schema.MessageTypeParser;
+
+import java.util.Map;
/**
*
@@ -27,8 +32,9 @@ public class DataWritableRecordConverter
private final HiveStructConverter root;
- public DataWritableRecordConverter(final GroupType requestedSchema, final
GroupType tableSchema) {
- this.root = new HiveStructConverter(requestedSchema, tableSchema);
+ public DataWritableRecordConverter(final GroupType requestedSchema, final
Map<String, String> metadata) {
+ this.root = new HiveStructConverter(requestedSchema,
+
MessageTypeParser.parseMessageType(metadata.get(DataWritableReadSupport.HIVE_SCHEMA_KEY)),
metadata);
}
@Override
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java?rev=1655953&r1=1655952&r2=1655953&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
Fri Jan 30 07:49:08 2015
@@ -16,7 +16,10 @@ package org.apache.hadoop.hive.ql.io.par
import java.math.BigDecimal;
import java.sql.Timestamp;
import java.util.ArrayList;
+import java.util.Map;
+import com.google.common.base.Strings;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
@@ -32,7 +35,6 @@ import org.apache.hadoop.io.Writable;
import parquet.column.Dictionary;
import parquet.io.api.Binary;
-import parquet.io.api.Converter;
import parquet.io.api.PrimitiveConverter;
import parquet.schema.OriginalType;
import parquet.schema.PrimitiveType;
@@ -140,7 +142,14 @@ public enum ETypeConverter {
@Override
protected TimestampWritable convert(Binary binary) {
NanoTime nt = NanoTime.fromBinary(binary);
- Timestamp ts = NanoTimeUtils.getTimestamp(nt);
+ Map<String, String> metadata = parent.getMetadata();
+ //Current Hive parquet timestamp implementation stores it in UTC,
but other components do not do that.
+ //If this file written by current Hive implementation itself, we
need to do the reverse conversion, else skip the conversion.
+ boolean skipConversion = false;
+ if
(Boolean.valueOf(metadata.get(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname)))
{
+ skipConversion =
!Strings.nullToEmpty(metadata.get("createdBy")).startsWith("parquet-mr");
+ }
+ Timestamp ts = NanoTimeUtils.getTimestamp(nt, skipConversion);
return new TimestampWritable(ts);
}
};
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveCollectionConverter.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveCollectionConverter.java?rev=1655953&r1=1655952&r2=1655953&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveCollectionConverter.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveCollectionConverter.java
Fri Jan 30 07:49:08 2015
@@ -3,6 +3,8 @@ package org.apache.hadoop.hive.ql.io.par
import com.google.common.base.Preconditions;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
+
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.Writable;
import parquet.io.api.Converter;
@@ -27,12 +29,13 @@ public class HiveCollectionConverter ext
ConverterParent parent,
int index) {
return new HiveCollectionConverter(
- listType, parent, index, false /* not a map */ );
+ listType, parent, index, false /* not a map */);
}
private HiveCollectionConverter(GroupType collectionType,
ConverterParent parent,
int index, boolean isMap) {
+ setMetadata(parent.getMetadata());
this.collectionType = collectionType;
this.parent = parent;
this.index = index;
@@ -78,6 +81,7 @@ public class HiveCollectionConverter ext
private Writable[] keyValue = null;
public KeyValueConverter(GroupType keyValueType, HiveGroupConverter
parent) {
+ setMetadata(parent.getMetadata());
this.parent = parent;
this.keyConverter = getConverterFromDescription(
keyValueType.getType(0), 0, this);
@@ -120,6 +124,7 @@ public class HiveCollectionConverter ext
private Writable element = null;
public ElementConverter(GroupType repeatedType, HiveGroupConverter parent)
{
+ setMetadata(parent.getMetadata());
this.parent = parent;
this.elementConverter = getConverterFromDescription(
repeatedType.getType(0), 0, this);
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java?rev=1655953&r1=1655952&r2=1655953&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
Fri Jan 30 07:49:08 2015
@@ -23,8 +23,20 @@ import parquet.schema.OriginalType;
import parquet.schema.PrimitiveType;
import parquet.schema.Type;
+import java.util.Map;
+
public abstract class HiveGroupConverter extends GroupConverter implements
ConverterParent {
+ private Map<String, String> metadata;
+
+ public void setMetadata(Map<String, String> metadata) {
+ this.metadata = metadata;
+ }
+
+ public Map<String, String> getMetadata() {
+ return metadata;
+ }
+
protected static PrimitiveConverter
getConverterFromDescription(PrimitiveType type, int index, ConverterParent
parent) {
if (type == null) {
return null;
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java?rev=1655953&r1=1655952&r2=1655953&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java
Fri Jan 30 07:49:08 2015
@@ -15,6 +15,8 @@ package org.apache.hadoop.hive.ql.io.par
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
+
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.Writable;
import parquet.io.api.Converter;
@@ -36,8 +38,9 @@ public class HiveStructConverter extends
private final List<Repeated> repeatedConverters;
private boolean reuseWritableArray = false;
- public HiveStructConverter(final GroupType requestedSchema, final GroupType
tableSchema) {
+ public HiveStructConverter(final GroupType requestedSchema, final GroupType
tableSchema, Map<String, String> metadata) {
this(requestedSchema, null, 0, tableSchema);
+ setMetadata(metadata);
this.reuseWritableArray = true;
this.writables = new Writable[tableSchema.getFieldCount()];
}
@@ -49,6 +52,9 @@ public class HiveStructConverter extends
public HiveStructConverter(final GroupType selectedGroupType,
final ConverterParent parent, final int index,
final GroupType containingGroupType) {
+ if (parent != null) {
+ setMetadata(parent.getMetadata());
+ }
this.parent = parent;
this.index = index;
this.totalFieldCount = containingGroupType.getFieldCount();
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/Repeated.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/Repeated.java?rev=1655953&r1=1655952&r2=1655953&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/Repeated.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/Repeated.java
Fri Jan 30 07:49:08 2015
@@ -1,7 +1,10 @@
package org.apache.hadoop.hive.ql.io.parquet.convert;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
+
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.Writable;
import parquet.column.Dictionary;
@@ -21,11 +24,23 @@ public interface Repeated extends Conver
public void parentEnd();
+ abstract class RepeatedConverterParent extends PrimitiveConverter implements
Repeated {
+ private Map<String, String> metadata;
+
+ public void setMetadata(Map<String, String> metadata) {
+ this.metadata = metadata;
+ }
+
+ public Map<String, String> getMetadata() {
+ return metadata;
+ }
+ }
+
/**
* Stands in for a PrimitiveConverter and accumulates multiple values as an
* ArrayWritable.
*/
- class RepeatedPrimitiveConverter extends PrimitiveConverter implements
Repeated {
+ class RepeatedPrimitiveConverter extends RepeatedConverterParent {
private final PrimitiveType primitiveType;
private final PrimitiveConverter wrapped;
private final ConverterParent parent;
@@ -33,6 +48,7 @@ public interface Repeated extends Conver
private final List<Writable> list = new ArrayList<Writable>();
public RepeatedPrimitiveConverter(PrimitiveType primitiveType,
ConverterParent parent, int index) {
+ setMetadata(parent.getMetadata());
this.primitiveType = primitiveType;
this.parent = parent;
this.index = index;
@@ -112,8 +128,11 @@ public interface Repeated extends Conver
private final ConverterParent parent;
private final int index;
private final List<Writable> list = new ArrayList<Writable>();
+ private final Map<String, String> metadata = new HashMap<String, String>();
+
public RepeatedGroupConverter(GroupType groupType, ConverterParent parent,
int index) {
+ setMetadata(parent.getMetadata());
this.groupType = groupType;
this.parent = parent;
this.index = index;
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java?rev=1655953&r1=1655952&r2=1655953&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
Fri Jan 30 07:49:08 2015
@@ -153,7 +153,6 @@ public class DataWritableReadSupport ext
throw new IllegalStateException("ReadContext not initialized properly. "
+
"Don't know the Hive Schema.");
}
- final MessageType tableSchema =
MessageTypeParser.parseMessageType(metadata.get(HIVE_SCHEMA_KEY));
- return new DataWritableRecordConverter(readContext.getRequestedSchema(),
tableSchema);
+ return new DataWritableRecordConverter(readContext.getRequestedSchema(),
metadata);
}
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java?rev=1655953&r1=1655952&r2=1655953&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java
Fri Jan 30 07:49:08 2015
@@ -16,10 +16,12 @@ package org.apache.hadoop.hive.ql.io.par
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.IOConstants;
import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher;
@@ -246,6 +248,7 @@ public class ParquetRecordReaderWrapper
LOG.warn("Skipping split, could not find row group in: " + (FileSplit)
oldSplit);
split = null;
} else {
+ populateReadMetadata(readContext.getReadSupportMetadata(),
fileMetaData, conf);
split = new ParquetInputSplit(finalPath,
splitStart,
splitLength,
@@ -261,4 +264,16 @@ public class ParquetRecordReaderWrapper
}
return split;
}
+
+ /**
+ * Method populates the read metadata, using filemetadata and Hive
configuration.
+ * @param metadata read metadata to populate
+ * @param fileMetaData parquet file metadata
+ * @param conf hive configuration
+ */
+ private void populateReadMetadata(Map<String, String> metadata, FileMetaData
fileMetaData, JobConf conf) {
+ metadata.put("createdBy", fileMetaData.getCreatedBy());
+
metadata.put(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname,
+ String.valueOf(HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)));
+ }
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java?rev=1655953&r1=1655952&r2=1655953&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
Fri Jan 30 07:49:08 2015
@@ -28,19 +28,31 @@ public class NanoTimeUtils {
static final long SECONDS_PER_MINUTE = 60;
static final long MINUTES_PER_HOUR = 60;
- private static final ThreadLocal<Calendar> parquetTsCalendar = new
ThreadLocal<Calendar>();
+ private static final ThreadLocal<Calendar> parquetGMTCalendar = new
ThreadLocal<Calendar>();
+ private static final ThreadLocal<Calendar> parquetLocalCalendar = new
ThreadLocal<Calendar>();
- private static Calendar getCalendar() {
+ private static Calendar getGMTCalendar() {
//Calendar.getInstance calculates the current-time needlessly, so cache
an instance.
- if (parquetTsCalendar.get() == null) {
-
parquetTsCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT")));
+ if (parquetGMTCalendar.get() == null) {
+
parquetGMTCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT")));
}
- return parquetTsCalendar.get();
+ return parquetGMTCalendar.get();
}
- public static NanoTime getNanoTime(Timestamp ts) {
+ private static Calendar getLocalCalendar() {
+ if (parquetLocalCalendar.get() == null) {
+ parquetLocalCalendar.set(Calendar.getInstance());
+ }
+ return parquetLocalCalendar.get();
+ }
+
+ private static Calendar getCalendar(boolean skipConversion) {
+ return skipConversion ? getLocalCalendar() : getGMTCalendar();
+ }
+
+ public static NanoTime getNanoTime(Timestamp ts, boolean skipConversion) {
- Calendar calendar = getCalendar();
+ Calendar calendar = getCalendar(skipConversion);
calendar.setTime(ts);
JDateTime jDateTime = new JDateTime(calendar.get(Calendar.YEAR),
calendar.get(Calendar.MONTH) + 1, //java calendar index starting at 1.
@@ -53,15 +65,16 @@ public class NanoTimeUtils {
long nanos = ts.getNanos();
long nanosOfDay = nanos + NANOS_PER_SECOND * second + NANOS_PER_SECOND *
SECONDS_PER_MINUTE * minute +
NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR * hour;
+
return new NanoTime(days, nanosOfDay);
}
- public static Timestamp getTimestamp(NanoTime nt) {
+ public static Timestamp getTimestamp(NanoTime nt, boolean skipConversion) {
int julianDay = nt.getJulianDay();
long nanosOfDay = nt.getTimeOfDayNanos();
JDateTime jDateTime = new JDateTime((double) julianDay);
- Calendar calendar = getCalendar();
+ Calendar calendar = getCalendar(skipConversion);
calendar.set(Calendar.YEAR, jDateTime.getYear());
calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calender
index starting at 1.
calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay());
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java?rev=1655953&r1=1655952&r2=1655953&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
Fri Jan 30 07:49:08 2015
@@ -229,7 +229,7 @@ public class DataWritableWriter {
recordConsumer.addBinary((Binary.fromByteArray(((BytesWritable)
value).getBytes())));
} else if (value instanceof TimestampWritable) {
Timestamp ts = ((TimestampWritable) value).getTimestamp();
- NanoTime nt = NanoTimeUtils.getNanoTime(ts);
+ NanoTime nt = NanoTimeUtils.getNanoTime(ts, false);
nt.writeValue(recordConsumer);
} else {
throw new IllegalArgumentException("Unknown value type: " + value + " "
+ value.getClass());
Modified:
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java?rev=1655953&r1=1655952&r2=1655953&view=diff
==============================================================================
---
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
(original)
+++
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
Fri Jan 30 07:49:08 2015
@@ -41,10 +41,10 @@ public class TestParquetTimestampUtils e
cal.setTimeZone(TimeZone.getTimeZone("GMT"));
Timestamp ts = new Timestamp(cal.getTimeInMillis());
- NanoTime nt = NanoTimeUtils.getNanoTime(ts);
+ NanoTime nt = NanoTimeUtils.getNanoTime(ts, false);
Assert.assertEquals(nt.getJulianDay(), 2440000);
- Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt);
+ Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt, false);
Assert.assertEquals(tsFetched, ts);
//check if 30 Julian Days between Jan 1, 2005 and Jan 31, 2005.
@@ -56,9 +56,9 @@ public class TestParquetTimestampUtils e
cal1.setTimeZone(TimeZone.getTimeZone("GMT"));
Timestamp ts1 = new Timestamp(cal1.getTimeInMillis());
- NanoTime nt1 = NanoTimeUtils.getNanoTime(ts1);
+ NanoTime nt1 = NanoTimeUtils.getNanoTime(ts1, false);
- Timestamp ts1Fetched = NanoTimeUtils.getTimestamp(nt1);
+ Timestamp ts1Fetched = NanoTimeUtils.getTimestamp(nt1, false);
Assert.assertEquals(ts1Fetched, ts1);
Calendar cal2 = Calendar.getInstance();
@@ -69,9 +69,9 @@ public class TestParquetTimestampUtils e
cal2.setTimeZone(TimeZone.getTimeZone("UTC"));
Timestamp ts2 = new Timestamp(cal2.getTimeInMillis());
- NanoTime nt2 = NanoTimeUtils.getNanoTime(ts2);
+ NanoTime nt2 = NanoTimeUtils.getNanoTime(ts2, false);
- Timestamp ts2Fetched = NanoTimeUtils.getTimestamp(nt2);
+ Timestamp ts2Fetched = NanoTimeUtils.getTimestamp(nt2, false);
Assert.assertEquals(ts2Fetched, ts2);
Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 30);
}
@@ -90,7 +90,7 @@ public class TestParquetTimestampUtils e
ts.setNanos(1);
//(1*60*60 + 1*60 + 1) * 10e9 + 1
- NanoTime nt = NanoTimeUtils.getNanoTime(ts);
+ NanoTime nt = NanoTimeUtils.getNanoTime(ts, false);
Assert.assertEquals(nt.getTimeOfDayNanos(), 3661000000001L);
//case 2: 23:59:59.999999999
@@ -106,7 +106,7 @@ public class TestParquetTimestampUtils e
ts.setNanos(999999999);
//(23*60*60 + 59*60 + 59)*10e9 + 999999999
- nt = NanoTimeUtils.getNanoTime(ts);
+ nt = NanoTimeUtils.getNanoTime(ts, false);
Assert.assertEquals(nt.getTimeOfDayNanos(), 86399999999999L);
//case 3: verify the difference.
@@ -132,8 +132,8 @@ public class TestParquetTimestampUtils e
Timestamp ts1 = new Timestamp(cal1.getTimeInMillis());
ts1.setNanos(1);
- NanoTime n2 = NanoTimeUtils.getNanoTime(ts2);
- NanoTime n1 = NanoTimeUtils.getNanoTime(ts1);
+ NanoTime n2 = NanoTimeUtils.getNanoTime(ts2, false);
+ NanoTime n1 = NanoTimeUtils.getNanoTime(ts1, false);
Assert.assertEquals(n2.getTimeOfDayNanos() - n1.getTimeOfDayNanos(),
600000000009L);
}
@@ -157,7 +157,7 @@ public class TestParquetTimestampUtils e
* 17:00 PST = 01:00 GMT (if not daylight savings)
* (1*60*60 + 1*60 + 1)*10e9 + 1 = 3661000000001
*/
- NanoTime nt = NanoTimeUtils.getNanoTime(ts);
+ NanoTime nt = NanoTimeUtils.getNanoTime(ts, false);
long timeOfDayNanos = nt.getTimeOfDayNanos();
Assert.assertTrue(timeOfDayNanos == 61000000001L || timeOfDayNanos ==
3661000000001L);
@@ -165,39 +165,63 @@ public class TestParquetTimestampUtils e
Assert.assertEquals(nt.getJulianDay(), 2440001);
}
- public void testValues() {
+ public void testTimezoneValues() {
+ valueTest(false);
+ }
+
+ public void testTimezonelessValues() {
+ valueTest(true);
+ }
+
+ public void testTimezoneless() {
+ Timestamp ts1 = Timestamp.valueOf("2011-01-01 00:30:30.111111111");
+ NanoTime nt1 = NanoTimeUtils.getNanoTime(ts1, true);
+ Assert.assertEquals(nt1.getJulianDay(), 2455563);
+ Assert.assertEquals(nt1.getTimeOfDayNanos(), 1830111111111L);
+ Timestamp ts1Fetched = NanoTimeUtils.getTimestamp(nt1, true);
+ Assert.assertEquals(ts1Fetched.toString(), ts1.toString());
+
+ Timestamp ts2 = Timestamp.valueOf("2011-02-02 08:30:30.222222222");
+ NanoTime nt2 = NanoTimeUtils.getNanoTime(ts2, true);
+ Assert.assertEquals(nt2.getJulianDay(), 2455595);
+ Assert.assertEquals(nt2.getTimeOfDayNanos(), 30630222222222L);
+ Timestamp ts2Fetched = NanoTimeUtils.getTimestamp(nt2, true);
+ Assert.assertEquals(ts2Fetched.toString(), ts2.toString());
+ }
+
+ private void valueTest(boolean local) {
//exercise a broad range of timestamps close to the present.
- verifyTsString("2011-01-01 01:01:01.111111111");
- verifyTsString("2012-02-02 02:02:02.222222222");
- verifyTsString("2013-03-03 03:03:03.333333333");
- verifyTsString("2014-04-04 04:04:04.444444444");
- verifyTsString("2015-05-05 05:05:05.555555555");
- verifyTsString("2016-06-06 06:06:06.666666666");
- verifyTsString("2017-07-07 07:07:07.777777777");
- verifyTsString("2018-08-08 08:08:08.888888888");
- verifyTsString("2019-09-09 09:09:09.999999999");
- verifyTsString("2020-10-10 10:10:10.101010101");
- verifyTsString("2021-11-11 11:11:11.111111111");
- verifyTsString("2022-12-12 12:12:12.121212121");
- verifyTsString("2023-01-02 13:13:13.131313131");
- verifyTsString("2024-02-02 14:14:14.141414141");
- verifyTsString("2025-03-03 15:15:15.151515151");
- verifyTsString("2026-04-04 16:16:16.161616161");
- verifyTsString("2027-05-05 17:17:17.171717171");
- verifyTsString("2028-06-06 18:18:18.181818181");
- verifyTsString("2029-07-07 19:19:19.191919191");
- verifyTsString("2030-08-08 20:20:20.202020202");
- verifyTsString("2031-09-09 21:21:21.212121212");
+ verifyTsString("2011-01-01 01:01:01.111111111", local);
+ verifyTsString("2012-02-02 02:02:02.222222222", local);
+ verifyTsString("2013-03-03 03:03:03.333333333", local);
+ verifyTsString("2014-04-04 04:04:04.444444444", local);
+ verifyTsString("2015-05-05 05:05:05.555555555", local);
+ verifyTsString("2016-06-06 06:06:06.666666666", local);
+ verifyTsString("2017-07-07 07:07:07.777777777", local);
+ verifyTsString("2018-08-08 08:08:08.888888888", local);
+ verifyTsString("2019-09-09 09:09:09.999999999", local);
+ verifyTsString("2020-10-10 10:10:10.101010101", local);
+ verifyTsString("2021-11-11 11:11:11.111111111", local);
+ verifyTsString("2022-12-12 12:12:12.121212121", local);
+ verifyTsString("2023-01-02 13:13:13.131313131", local);
+ verifyTsString("2024-02-02 14:14:14.141414141", local);
+ verifyTsString("2025-03-03 15:15:15.151515151", local);
+ verifyTsString("2026-04-04 16:16:16.161616161", local);
+ verifyTsString("2027-05-05 17:17:17.171717171", local);
+ verifyTsString("2028-06-06 18:18:18.181818181", local);
+ verifyTsString("2029-07-07 19:19:19.191919191", local);
+ verifyTsString("2030-08-08 20:20:20.202020202", local);
+ verifyTsString("2031-09-09 21:21:21.212121212", local);
//test some extreme cases.
- verifyTsString("9999-09-09 09:09:09.999999999");
- verifyTsString("0001-01-01 00:00:00.0");
+ verifyTsString("9999-09-09 09:09:09.999999999", local);
+ verifyTsString("0001-01-01 00:00:00.0", local);
}
- private void verifyTsString(String tsString) {
+ private void verifyTsString(String tsString, boolean local) {
Timestamp ts = Timestamp.valueOf(tsString);
- NanoTime nt = NanoTimeUtils.getNanoTime(ts);
- Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt);
+ NanoTime nt = NanoTimeUtils.getNanoTime(ts, local);
+ Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt, local);
Assert.assertEquals(tsString, tsFetched.toString());
}
}
Added: hive/trunk/ql/src/test/queries/clientpositive/parquet_external_time.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parquet_external_time.q?rev=1655953&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/parquet_external_time.q
(added)
+++ hive/trunk/ql/src/test/queries/clientpositive/parquet_external_time.q Fri
Jan 30 07:49:08 2015
@@ -0,0 +1,5 @@
+create table timetest_parquet(t timestamp) stored as parquet;
+
+load data local inpath '../../data/files/parquet_external_time.parq' into
table timetest_parquet;
+
+select * from timetest_parquet;
\ No newline at end of file
Added: hive/trunk/ql/src/test/results/clientpositive/parquet_external_time.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_external_time.q.out?rev=1655953&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_external_time.q.out
(added)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_external_time.q.out
Fri Jan 30 07:49:08 2015
@@ -0,0 +1,25 @@
+PREHOOK: query: create table timetest_parquet(t timestamp) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@timetest_parquet
+POSTHOOK: query: create table timetest_parquet(t timestamp) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@timetest_parquet
+PREHOOK: query: load data local inpath
'../../data/files/parquet_external_time.parq' into table timetest_parquet
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@timetest_parquet
+POSTHOOK: query: load data local inpath
'../../data/files/parquet_external_time.parq' into table timetest_parquet
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@timetest_parquet
+PREHOOK: query: select * from timetest_parquet
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timetest_parquet
+#### A masked pattern was here ####
+POSTHOOK: query: select * from timetest_parquet
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timetest_parquet
+#### A masked pattern was here ####
+2011-01-01 00:30:30.111111111