This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push: new 9b64bee64 ORC-1939: set TimestampColumnVector isUTC flag in TimestampFromXXXTreeReader 9b64bee64 is described below commit 9b64bee64b80a3cb72e1e709e1010537b9280925 Author: Vlad Rozov <vro...@amazon.com> AuthorDate: Fri Jul 11 14:06:57 2025 -0700 ORC-1939: set TimestampColumnVector isUTC flag in TimestampFromXXXTreeReader ### What changes were proposed in this pull request? `TimestampColumnVector` `isUTC` flag should be set based on the reader `useUtc` flag similar to how `TimestampTreeReader` sets the flag based on `context.getUseUTCTimestamp()`. ### Why are the changes needed? Correctly reflects whether `TimestampColumnVector` represents time in local or UTC time zones. ### How was this patch tested? Added new assertions to existing tests ### Was this patch authored or co-authored using generative AI tooling? No Closes #2300 from vrozov/ORC-1939. Authored-by: Vlad Rozov <vro...@amazon.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../apache/orc/impl/ConvertTreeReaderFactory.java | 7 ++++++ .../src/test/org/apache/orc/TestOrcTimezone4.java | 2 ++ .../org/apache/orc/impl/TestSchemaEvolution.java | 25 ++++++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java index 6886b551e..4861aa61f 100644 --- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java +++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java @@ -1533,6 +1533,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory { // Allocate column vector for file; cast column vector for reader. longColVector = new LongColumnVector(batchSize); timestampColVector = (TimestampColumnVector) previousVector; + timestampColVector.setIsUTC(useUtc); } else { longColVector.ensureSize(batchSize, false); } @@ -1597,6 +1598,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory { // Allocate column vector for file; cast column vector for reader. doubleColVector = new DoubleColumnVector(batchSize); timestampColVector = (TimestampColumnVector) previousVector; + timestampColVector.setIsUTC(useUtc); } else { doubleColVector.ensureSize(batchSize, false); } @@ -1661,6 +1663,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory { // Allocate column vector for file; cast column vector for reader. decimalColVector = new DecimalColumnVector(batchSize, precision, scale); timestampColVector = (TimestampColumnVector) previousVector; + timestampColVector.setIsUTC(useUtc); } else { decimalColVector.ensureSize(batchSize, false); } @@ -1676,6 +1679,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory { public static class TimestampFromStringGroupTreeReader extends ConvertTreeReader { private BytesColumnVector bytesColVector; private TimestampColumnVector timestampColVector; + private final boolean useUtc; private final DateTimeFormatter formatter; private final boolean useProlepticGregorian; @@ -1683,6 +1687,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory { Context context, boolean isInstant) throws IOException { super(columnId, getStringGroupTreeReader(columnId, fileType, context), context); + useUtc = isInstant || context.getUseUTCTimestamp(); useProlepticGregorian = context.useProlepticGregorian(); Chronology chronology = useProlepticGregorian ? IsoChronology.INSTANCE @@ -1722,6 +1727,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory { // Allocate column vector for file; cast column vector for reader. bytesColVector = new BytesColumnVector(batchSize); timestampColVector = (TimestampColumnVector) previousVector; + timestampColVector.setIsUTC(useUtc); } else { bytesColVector.ensureSize(batchSize, false); } @@ -1768,6 +1774,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory { // Allocate column vector for file; cast column vector for reader. longColVector = new DateColumnVector(batchSize); timestampColVector = (TimestampColumnVector) previousVector; + timestampColVector.setIsUTC(useUtc); } else { longColVector.ensureSize(batchSize, false); } diff --git a/java/core/src/test/org/apache/orc/TestOrcTimezone4.java b/java/core/src/test/org/apache/orc/TestOrcTimezone4.java index 78892a926..cb03e1821 100644 --- a/java/core/src/test/org/apache/orc/TestOrcTimezone4.java +++ b/java/core/src/test/org/apache/orc/TestOrcTimezone4.java @@ -34,6 +34,7 @@ import java.util.List; import java.util.TimeZone; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; /** * @@ -92,6 +93,7 @@ public class TestOrcTimezone4 implements TestConf { times = (TimestampColumnVector) batch.cols[0]; int idx = 0; while (rows.nextBatch(batch)) { + assertTrue(times.isUTC()); for(int r=0; r < batch.size; ++r) { Timestamp timestamp = times.asScratchTimestamp(r); assertEquals(ts.get(idx++), formatter.format(timestamp)); diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java index dc7cc4ba2..fde63021f 100644 --- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java +++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java @@ -2356,6 +2356,9 @@ public class TestSchemaEvolution implements TestConf { final ZoneId WRITER_ZONE = ZoneId.of("America/New_York"); final ZoneId READER_ZONE = ZoneId.of("Australia/Sydney"); + final String EXPECT_LOCAL = "expected %s in local time zone"; + final String EXPECT_UTC = "expected %s in UTC time zone"; + final TimeZone oldDefault = TimeZone.getDefault(); final ZoneId UTC = ZoneId.of("UTC"); @@ -2420,50 +2423,62 @@ public class TestSchemaEvolution implements TestConf { assertEquals(expected1.replace(".1 ", " "), timestampToString(l1.time[current], l1.nanos[current], READER_ZONE), msg); + assertFalse(l1.isUTC(), EXPECT_LOCAL.formatted("l1")); assertEquals(expected2.replace(".1 ", " "), timestampToString(l2.time[current], l2.nanos[current], WRITER_ZONE), msg); + assertTrue(l2.isUTC(), EXPECT_UTC.formatted("l2")); assertEquals(longTimestampToString(((r % 128) - offset), READER_ZONE), timestampToString(t1.time[current], t1.nanos[current], READER_ZONE), msg); + assertFalse(t1.isUTC(), EXPECT_LOCAL.formatted("t1")); assertEquals(longTimestampToString((r % 128), WRITER_ZONE), timestampToString(t2.time[current], t2.nanos[current], WRITER_ZONE), msg); + assertTrue(t2.isUTC(), EXPECT_UTC.formatted("t2")); assertEquals(expected1, timestampToString(d1.time[current], d1.nanos[current], READER_ZONE), msg); + assertFalse(d1.isUTC(), EXPECT_LOCAL.formatted("d1")); assertEquals(expected2, timestampToString(d2.time[current], d2.nanos[current], WRITER_ZONE), msg); + assertTrue(d2.isUTC(), EXPECT_UTC.formatted("d2")); assertEquals(expected1, timestampToString(dbl1.time[current], dbl1.nanos[current], READER_ZONE), msg); + assertFalse(dbl1.isUTC(), EXPECT_LOCAL.formatted("dbl1")); assertEquals(expected2, timestampToString(dbl2.time[current], dbl2.nanos[current], WRITER_ZONE), msg); + assertTrue(dbl2.isUTC(), EXPECT_UTC.formatted("dbl2")); assertEquals(expectedDate1, timestampToString(dt1.time[current], dt1.nanos[current], READER_ZONE), msg); + assertFalse(dt1.isUTC(), EXPECT_LOCAL.formatted("dt1")); assertEquals(expectedDate2, timestampToString(dt2.time[current], dt2.nanos[current], UTC), msg); + assertTrue(dt2.isUTC(), EXPECT_UTC.formatted("dt2")); assertEquals(expected1, timestampToString(s1.time[current], s1.nanos[current], READER_ZONE), msg); + assertFalse(s1.isUTC(), EXPECT_LOCAL.formatted("s1")); assertEquals(expected2, timestampToString(s2.time[current], s2.nanos[current], WRITER_ZONE), msg); + assertTrue(s2.isUTC(), EXPECT_UTC.formatted("s2")); current += 1; } assertFalse(rows.nextBatch(batch)); @@ -2488,42 +2503,52 @@ public class TestSchemaEvolution implements TestConf { assertEquals(expected1.replace(".1 ", " "), timestampToString(l1.time[current], l1.nanos[current], UTC), msg); + assertTrue(l1.isUTC(), EXPECT_UTC.formatted("l1")); assertEquals(expected2.replace(".1 ", " "), timestampToString(l2.time[current], l2.nanos[current], WRITER_ZONE), msg); + assertTrue(l2.isUTC(), EXPECT_UTC.formatted("l2")); assertEquals(expected1, timestampToString(d1.time[current], d1.nanos[current], UTC), msg); + assertTrue(d1.isUTC(), EXPECT_UTC.formatted("d1")); assertEquals(expected2, timestampToString(d2.time[current], d2.nanos[current], WRITER_ZONE), msg); + assertTrue(d2.isUTC(), EXPECT_UTC.formatted("d2")); assertEquals(expected1, timestampToString(dbl1.time[current], dbl1.nanos[current], UTC), msg); + assertTrue(dbl1.isUTC(), EXPECT_UTC.formatted("dbl1")); assertEquals(expected2, timestampToString(dbl2.time[current], dbl2.nanos[current], WRITER_ZONE), msg); + assertTrue(dbl2.isUTC(), EXPECT_UTC.formatted("dbl2")); assertEquals(expectedDate, timestampToString(dt1.time[current], dt1.nanos[current], UTC), msg); + assertTrue(dt1.isUTC(), EXPECT_UTC.formatted("dt1")); assertEquals(expectedDate, timestampToString(dt2.time[current], dt2.nanos[current], UTC), msg); + assertTrue(dt2.isUTC(), EXPECT_UTC.formatted("dt2")); assertEquals(expected1, timestampToString(s1.time[current], s1.nanos[current], UTC), msg); + assertTrue(s1.isUTC(), EXPECT_UTC.formatted("s1")); assertEquals(expected2, timestampToString(s2.time[current], s2.nanos[current], WRITER_ZONE), msg); + assertTrue(s2.isUTC(), EXPECT_UTC.formatted("s2")); current += 1; } assertFalse(rows.nextBatch(batch));