Jesus Camacho Rodriguez created ORC-342:
-------------------------------------------
Summary: Error reading timestamp in range [1969-12-31
23:59:59.000,1969-12-31 23:59:59.999] when time zone shifts
Key: ORC-342
URL: https://issues.apache.org/jira/browse/ORC-342
Project: ORC
Issue Type: Bug
Reporter: Jesus Camacho Rodriguez
Timestamps in range [1969-12-31 23:59:59.000,1969-12-31 23:59:59.999] will be
returned as [1970-01-01 00:00:00.000,1970-01-01 00:00:00.999].
Below a test based on {{TestOrcTimezone3}} but modified to reproduce the issue.
{code:java}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.orc;
import static junit.framework.Assert.assertEquals;
import java.io.File;
import java.sql.Timestamp;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.TimeZone;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import com.google.common.collect.Lists;
import junit.framework.Assert;
/**
*
*/
@RunWith(Parameterized.class)
public class TestOrcTimezone3 {
Path workDir = new Path(System.getProperty("test.tmp.dir",
"target" + File.separator + "test" + File.separator + "tmp"));
Configuration conf;
FileSystem fs;
Path testFilePath;
String writerTimeZone;
String readerTimeZone;
static TimeZone defaultTimeZone = TimeZone.getDefault();
public TestOrcTimezone3(String writerTZ, String readerTZ) {
this.writerTimeZone = writerTZ;
this.readerTimeZone = readerTZ;
}
@Parameterized.Parameters
public static Collection<Object[]> data() {
List<Object[]> result = Arrays.asList(new Object[][]{
{"UTC", "America/Los_Angeles"},
});
return result;
}
@Rule
public TestName testCaseName = new TestName();
@Before
public void openFileSystem() throws Exception {
conf = new Configuration();
fs = FileSystem.getLocal(conf);
testFilePath = new Path(workDir, "TestOrcTimezone3." +
testCaseName.getMethodName() + ".orc");
fs.delete(testFilePath, false);
}
@After
public void restoreTimeZone() {
TimeZone.setDefault(defaultTimeZone);
}
@Test
public void testTimestampWriter() throws Exception {
TypeDescription schema = TypeDescription.createTimestamp();
TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone));
Writer writer = OrcFile.createWriter(testFilePath,
OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
.bufferSize(10000));
assertEquals(writerTimeZone, TimeZone.getDefault().getID());
List<String> ts = Lists.newArrayList();
ts.add("1969-12-31 23:59:59.007");
VectorizedRowBatch batch = schema.createRowBatch();
TimestampColumnVector times = (TimestampColumnVector) batch.cols[0];
for (String t : ts) {
times.set(batch.size++, Timestamp.valueOf(t));
}
writer.addRowBatch(batch);
writer.close();
TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(readerTimeZone, TimeZone.getDefault().getID());
RecordReader rows = reader.rows();
batch = reader.getSchema().createRowBatch();
times = (TimestampColumnVector) batch.cols[0];
int idx = 0;
while (rows.nextBatch(batch)) {
for(int r=0; r < batch.size; ++r) {
assertEquals(ts.get(idx++), times.asScratchTimestamp(r).toString());
}
}
rows.close();
}
}
{code}
{code}
Expected :1969-12-31 23:59:59.007
Actual :1970-01-01 00:00:00.007
{code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)