This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new 9b64bee64 ORC-1939: set TimestampColumnVector isUTC flag in 
TimestampFromXXXTreeReader
9b64bee64 is described below

commit 9b64bee64b80a3cb72e1e709e1010537b9280925
Author: Vlad Rozov <vro...@amazon.com>
AuthorDate: Fri Jul 11 14:06:57 2025 -0700

    ORC-1939: set TimestampColumnVector isUTC flag in TimestampFromXXXTreeReader
    
    ### What changes were proposed in this pull request?
    `TimestampColumnVector` `isUTC` flag should be set based on the reader 
`useUtc` flag similar to how `TimestampTreeReader` sets the flag based on 
`context.getUseUTCTimestamp()`.
    
    ### Why are the changes needed?
    Correctly reflects whether `TimestampColumnVector` represents time in local 
or UTC time zones.
    
    ### How was this patch tested?
    Added new assertions to existing tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #2300 from vrozov/ORC-1939.
    
    Authored-by: Vlad Rozov <vro...@amazon.com>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../apache/orc/impl/ConvertTreeReaderFactory.java  |  7 ++++++
 .../src/test/org/apache/orc/TestOrcTimezone4.java  |  2 ++
 .../org/apache/orc/impl/TestSchemaEvolution.java   | 25 ++++++++++++++++++++++
 3 files changed, 34 insertions(+)

diff --git 
a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java 
b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index 6886b551e..4861aa61f 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -1533,6 +1533,7 @@ public class ConvertTreeReaderFactory extends 
TreeReaderFactory {
         // Allocate column vector for file; cast column vector for reader.
         longColVector = new LongColumnVector(batchSize);
         timestampColVector = (TimestampColumnVector) previousVector;
+        timestampColVector.setIsUTC(useUtc);
       } else {
         longColVector.ensureSize(batchSize, false);
       }
@@ -1597,6 +1598,7 @@ public class ConvertTreeReaderFactory extends 
TreeReaderFactory {
         // Allocate column vector for file; cast column vector for reader.
         doubleColVector = new DoubleColumnVector(batchSize);
         timestampColVector = (TimestampColumnVector) previousVector;
+        timestampColVector.setIsUTC(useUtc);
       } else {
         doubleColVector.ensureSize(batchSize, false);
       }
@@ -1661,6 +1663,7 @@ public class ConvertTreeReaderFactory extends 
TreeReaderFactory {
         // Allocate column vector for file; cast column vector for reader.
         decimalColVector = new DecimalColumnVector(batchSize, precision, 
scale);
         timestampColVector = (TimestampColumnVector) previousVector;
+        timestampColVector.setIsUTC(useUtc);
       } else {
         decimalColVector.ensureSize(batchSize, false);
       }
@@ -1676,6 +1679,7 @@ public class ConvertTreeReaderFactory extends 
TreeReaderFactory {
   public static class TimestampFromStringGroupTreeReader extends 
ConvertTreeReader {
     private BytesColumnVector bytesColVector;
     private TimestampColumnVector timestampColVector;
+    private final boolean useUtc;
     private final DateTimeFormatter formatter;
     private final boolean useProlepticGregorian;
 
@@ -1683,6 +1687,7 @@ public class ConvertTreeReaderFactory extends 
TreeReaderFactory {
                                        Context context, boolean isInstant)
         throws IOException {
       super(columnId, getStringGroupTreeReader(columnId, fileType, context), 
context);
+      useUtc = isInstant || context.getUseUTCTimestamp();
       useProlepticGregorian = context.useProlepticGregorian();
       Chronology chronology = useProlepticGregorian
           ? IsoChronology.INSTANCE
@@ -1722,6 +1727,7 @@ public class ConvertTreeReaderFactory extends 
TreeReaderFactory {
         // Allocate column vector for file; cast column vector for reader.
         bytesColVector = new BytesColumnVector(batchSize);
         timestampColVector = (TimestampColumnVector) previousVector;
+        timestampColVector.setIsUTC(useUtc);
       } else {
         bytesColVector.ensureSize(batchSize, false);
       }
@@ -1768,6 +1774,7 @@ public class ConvertTreeReaderFactory extends 
TreeReaderFactory {
         // Allocate column vector for file; cast column vector for reader.
         longColVector = new DateColumnVector(batchSize);
         timestampColVector = (TimestampColumnVector) previousVector;
+        timestampColVector.setIsUTC(useUtc);
       } else {
         longColVector.ensureSize(batchSize, false);
       }
diff --git a/java/core/src/test/org/apache/orc/TestOrcTimezone4.java 
b/java/core/src/test/org/apache/orc/TestOrcTimezone4.java
index 78892a926..cb03e1821 100644
--- a/java/core/src/test/org/apache/orc/TestOrcTimezone4.java
+++ b/java/core/src/test/org/apache/orc/TestOrcTimezone4.java
@@ -34,6 +34,7 @@ import java.util.List;
 import java.util.TimeZone;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 /**
  *
@@ -92,6 +93,7 @@ public class TestOrcTimezone4 implements TestConf {
     times = (TimestampColumnVector) batch.cols[0];
     int idx = 0;
     while (rows.nextBatch(batch)) {
+      assertTrue(times.isUTC());
       for(int r=0; r < batch.size; ++r) {
         Timestamp timestamp = times.asScratchTimestamp(r);
         assertEquals(ts.get(idx++), formatter.format(timestamp));
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java 
b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index dc7cc4ba2..fde63021f 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -2356,6 +2356,9 @@ public class TestSchemaEvolution implements TestConf {
     final ZoneId WRITER_ZONE = ZoneId.of("America/New_York");
     final ZoneId READER_ZONE = ZoneId.of("Australia/Sydney");
 
+    final String EXPECT_LOCAL = "expected %s in local time zone";
+    final String EXPECT_UTC = "expected %s in UTC time zone";
+
     final TimeZone oldDefault = TimeZone.getDefault();
     final ZoneId UTC = ZoneId.of("UTC");
 
@@ -2420,50 +2423,62 @@ public class TestSchemaEvolution implements TestConf {
           assertEquals(expected1.replace(".1 ", " "),
               timestampToString(l1.time[current], l1.nanos[current], 
READER_ZONE),
               msg);
+          assertFalse(l1.isUTC(), EXPECT_LOCAL.formatted("l1"));
 
           assertEquals(expected2.replace(".1 ", " "),
               timestampToString(l2.time[current], l2.nanos[current], 
WRITER_ZONE),
               msg);
+          assertTrue(l2.isUTC(), EXPECT_UTC.formatted("l2"));
 
           assertEquals(longTimestampToString(((r % 128) - offset), 
READER_ZONE),
               timestampToString(t1.time[current], t1.nanos[current], 
READER_ZONE),
               msg);
+          assertFalse(t1.isUTC(), EXPECT_LOCAL.formatted("t1"));
 
           assertEquals(longTimestampToString((r % 128), WRITER_ZONE),
               timestampToString(t2.time[current], t2.nanos[current], 
WRITER_ZONE),
               msg);
+          assertTrue(t2.isUTC(), EXPECT_UTC.formatted("t2"));
 
           assertEquals(expected1,
               timestampToString(d1.time[current], d1.nanos[current], 
READER_ZONE),
               msg);
+          assertFalse(d1.isUTC(), EXPECT_LOCAL.formatted("d1"));
 
           assertEquals(expected2,
               timestampToString(d2.time[current], d2.nanos[current], 
WRITER_ZONE),
               msg);
+          assertTrue(d2.isUTC(), EXPECT_UTC.formatted("d2"));
 
           assertEquals(expected1,
               timestampToString(dbl1.time[current], dbl1.nanos[current], 
READER_ZONE),
               msg);
+          assertFalse(dbl1.isUTC(), EXPECT_LOCAL.formatted("dbl1"));
 
           assertEquals(expected2,
               timestampToString(dbl2.time[current], dbl2.nanos[current], 
WRITER_ZONE),
               msg);
+          assertTrue(dbl2.isUTC(), EXPECT_UTC.formatted("dbl2"));
 
           assertEquals(expectedDate1,
               timestampToString(dt1.time[current], dt1.nanos[current], 
READER_ZONE),
               msg);
+          assertFalse(dt1.isUTC(), EXPECT_LOCAL.formatted("dt1"));
 
           assertEquals(expectedDate2,
               timestampToString(dt2.time[current], dt2.nanos[current], UTC),
               msg);
+          assertTrue(dt2.isUTC(), EXPECT_UTC.formatted("dt2"));
 
           assertEquals(expected1,
               timestampToString(s1.time[current], s1.nanos[current], 
READER_ZONE),
               msg);
+          assertFalse(s1.isUTC(), EXPECT_LOCAL.formatted("s1"));
 
           assertEquals(expected2,
               timestampToString(s2.time[current], s2.nanos[current], 
WRITER_ZONE),
               msg);
+          assertTrue(s2.isUTC(), EXPECT_UTC.formatted("s2"));
           current += 1;
         }
         assertFalse(rows.nextBatch(batch));
@@ -2488,42 +2503,52 @@ public class TestSchemaEvolution implements TestConf {
           assertEquals(expected1.replace(".1 ", " "),
               timestampToString(l1.time[current], l1.nanos[current], UTC),
               msg);
+          assertTrue(l1.isUTC(), EXPECT_UTC.formatted("l1"));
 
           assertEquals(expected2.replace(".1 ", " "),
               timestampToString(l2.time[current], l2.nanos[current], 
WRITER_ZONE),
               msg);
+          assertTrue(l2.isUTC(), EXPECT_UTC.formatted("l2"));
 
           assertEquals(expected1,
               timestampToString(d1.time[current], d1.nanos[current], UTC),
               msg);
+          assertTrue(d1.isUTC(), EXPECT_UTC.formatted("d1"));
 
           assertEquals(expected2,
               timestampToString(d2.time[current], d2.nanos[current], 
WRITER_ZONE),
               msg);
+          assertTrue(d2.isUTC(), EXPECT_UTC.formatted("d2"));
 
           assertEquals(expected1,
               timestampToString(dbl1.time[current], dbl1.nanos[current], UTC),
               msg);
+          assertTrue(dbl1.isUTC(), EXPECT_UTC.formatted("dbl1"));
 
           assertEquals(expected2,
               timestampToString(dbl2.time[current], dbl2.nanos[current], 
WRITER_ZONE),
               msg);
+          assertTrue(dbl2.isUTC(), EXPECT_UTC.formatted("dbl2"));
 
           assertEquals(expectedDate,
               timestampToString(dt1.time[current], dt1.nanos[current], UTC),
               msg);
+          assertTrue(dt1.isUTC(), EXPECT_UTC.formatted("dt1"));
 
           assertEquals(expectedDate,
               timestampToString(dt2.time[current], dt2.nanos[current], UTC),
               msg);
+          assertTrue(dt2.isUTC(), EXPECT_UTC.formatted("dt2"));
 
           assertEquals(expected1,
               timestampToString(s1.time[current], s1.nanos[current], UTC),
               msg);
+          assertTrue(s1.isUTC(), EXPECT_UTC.formatted("s1"));
 
           assertEquals(expected2,
               timestampToString(s2.time[current], s2.nanos[current], 
WRITER_ZONE),
               msg);
+          assertTrue(s2.isUTC(), EXPECT_UTC.formatted("s2"));
           current += 1;
         }
         assertFalse(rows.nextBatch(batch));

Reply via email to