This is an automated email from the ASF dual-hosted git repository.
sbadhya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new d9ec04156d8 HIVE-20889: Support timestamp-micros in AvroSerDe (#5779)
d9ec04156d8 is described below
commit d9ec04156d84bedbaa9f8dc40c27dbb88a3b9f49
Author: NZEC <[email protected]>
AuthorDate: Tue Nov 25 14:48:36 2025 +0530
HIVE-20889: Support timestamp-micros in AvroSerDe (#5779)
Co-authored-by: araika <[email protected]>
---
.../apache/hadoop/hive/common/type/Timestamp.java | 17 +++++
data/files/avro_timestamp.txt | 10 ++-
.../clientpositive/avro_hybrid_mixed_timestamp.q | 7 +-
.../avro_proleptic_mixed_timestamp.q | 7 +-
.../queries/clientpositive/avro_timestamp_micros.q | 9 +++
.../llap/avro_hybrid_mixed_timestamp.q.out | 24 ++++++-
.../llap/avro_proleptic_mixed_timestamp.q.out | 24 ++++++-
.../clientpositive/llap/avro_timestamp.q.out | 32 +++++++++
.../llap/avro_timestamp_micros.q.out | 36 ++++++++++
.../hadoop/hive/serde2/avro/AvroDeserializer.java | 70 +++++++++++++++++--
.../apache/hadoop/hive/serde2/avro/AvroSerDe.java | 3 +-
.../hadoop/hive/serde2/avro/AvroSerializer.java | 33 +++++++--
.../hadoop/hive/serde2/avro/SchemaToTypeInfo.java | 22 +++++-
.../hadoop/hive/serde2/avro/TypeInfoToSchema.java | 2 +-
.../hive/serde2/avro/TestAvroDeserializer.java | 43 ++++++++++++
.../avro/TestAvroObjectInspectorGenerator.java | 12 +++-
.../hive/serde2/avro/TestAvroSerializer.java | 9 ++-
.../hive/serde2/avro/TestTypeInfoToSchema.java | 2 +-
.../hadoop/hive/common/type/CalendarUtils.java | 81 ++++++++++++++++++++++
19 files changed, 420 insertions(+), 23 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
b/common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
index ee36da60b96..35d0e2a8655 100644
--- a/common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
+++ b/common/src/java/org/apache/hadoop/hive/common/type/Timestamp.java
@@ -163,6 +163,11 @@ public long toEpochMilli() {
return localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli();
}
+ public long toEpochMicro() {
+ return localDateTime.toEpochSecond(ZoneOffset.UTC) * 1_000_000
+ + localDateTime.getNano() / 1000;
+ }
+
public long toEpochMilli(ZoneId id) {
return localDateTime.atZone(id).toInstant().toEpochMilli();
}
@@ -237,6 +242,18 @@ public static Timestamp ofEpochMilli(long epochMilli, int
nanos) {
.withNano(nanos));
}
+ public static Timestamp ofEpochMicro(long epochMicro) {
+ int nanos = Math.toIntExact((epochMicro % 1000000) * 1000);
+ epochMicro -= nanos / 1_000_000;
+
+ Instant instant = Instant.ofEpochSecond(
+ epochMicro / 1_000_000,
+ nanos
+ );
+
+ return new Timestamp(LocalDateTime.ofInstant(instant, ZoneOffset.UTC));
+ }
+
public void setNanos(int nanos) {
localDateTime = localDateTime.withNano(nanos);
}
diff --git a/data/files/avro_timestamp.txt b/data/files/avro_timestamp.txt
index 6af27ba387c..02d4955c934 100644
--- a/data/files/avro_timestamp.txt
+++ b/data/files/avro_timestamp.txt
@@ -5,4 +5,12 @@
1412-02-21 07:08:09.123|foo:0980-12-16 07:08:09.123,bar:0998-05-07
07:08:09.123|0011-09-04 07:08:09.123,0011-09-05 07:08:09.123
1214-02-11 07:08:09.123|baz:0981-12-16 07:08:09.123|0011-09-05 07:08:09.123
0847-02-11 07:08:09.123|baz:0921-12-16 07:08:09.123|0011-09-05 07:08:09.123
-0600-02-11 07:08:09.123|baz:0981-12-16 07:08:09.123|0039-09-05 07:08:09.123
\ No newline at end of file
+0600-02-11 07:08:09.123|baz:0981-12-16 07:08:09.123|0039-09-05 07:08:09.123
+2012-02-21 07:08:09.123456|foo:1980-12-16 07:08:09.123456,bar:1998-05-07
07:08:09.123456|2011-09-04 07:08:09.123456,2011-09-05 07:08:09.123456
+2014-02-11 07:08:09.123456|baz:1981-12-16 07:08:09.123456|2011-09-05
07:08:09.123456
+1947-02-11 07:08:09.123456|baz:1921-12-16 07:08:09.123456|2011-09-05
07:08:09.123456
+8200-02-11 07:08:09.123456|baz:6981-12-16 07:08:09.123456|1039-09-05
07:08:09.123456
+1412-02-21 07:08:09.123456|foo:0980-12-16 07:08:09.123456,bar:0998-05-07
07:08:09.123456|0011-09-04 07:08:09.123456,0011-09-05 07:08:09.123456
+1214-02-11 07:08:09.123456|baz:0981-12-16 07:08:09.123456|0011-09-05
07:08:09.123456
+0847-02-11 07:08:09.123456|baz:0921-12-16 07:08:09.123456|0011-09-05
07:08:09.123456
+0600-02-11 07:08:09.123456|baz:0981-12-16 07:08:09.123456|0039-09-05
07:08:09.123456
\ No newline at end of file
diff --git a/ql/src/test/queries/clientpositive/avro_hybrid_mixed_timestamp.q
b/ql/src/test/queries/clientpositive/avro_hybrid_mixed_timestamp.q
index 28fc99c51bc..bc3cd29d369 100644
--- a/ql/src/test/queries/clientpositive/avro_hybrid_mixed_timestamp.q
+++ b/ql/src/test/queries/clientpositive/avro_hybrid_mixed_timestamp.q
@@ -6,12 +6,17 @@ stored as avro;
INSERT INTO hybrid_table VALUES
('2012-02-21 07:08:09.123'),
('2014-02-11 07:08:09.123'),
+('2014-02-11 07:08:09.123456'),
('1947-02-11 07:08:09.123'),
('8200-02-11 07:08:09.123'),
+('8200-02-11 07:08:09.123456'),
('1012-02-21 07:15:11.123'),
+('1012-02-21 07:15:11.12345'),
('1014-02-11 07:15:11.123'),
+('1014-02-11 07:15:11.1234'),
('0947-02-11 07:15:11.123'),
-('0200-02-11 07:15:11.123');
+('0200-02-11 07:15:11.123'),
+('0200-02-11 07:15:11.1234');
select * from hybrid_table;
diff --git
a/ql/src/test/queries/clientpositive/avro_proleptic_mixed_timestamp.q
b/ql/src/test/queries/clientpositive/avro_proleptic_mixed_timestamp.q
index 5a67ab59d2a..429e9f6bb33 100644
--- a/ql/src/test/queries/clientpositive/avro_proleptic_mixed_timestamp.q
+++ b/ql/src/test/queries/clientpositive/avro_proleptic_mixed_timestamp.q
@@ -8,12 +8,17 @@ stored as avro;
INSERT INTO hybrid_table VALUES
('2012-02-21 07:08:09.123'),
('2014-02-11 07:08:09.123'),
+('2014-02-11 07:08:09.123456'),
('1947-02-11 07:08:09.123'),
+('1947-02-11 07:08:09.1234'),
('8200-02-11 07:08:09.123'),
('1012-02-21 07:15:11.123'),
+('1012-02-21 07:15:11.12345'),
('1014-02-11 07:15:11.123'),
('0947-02-11 07:15:11.123'),
-('0200-02-11 07:15:11.123');
+('0947-02-11 07:15:11.12345'),
+('0200-02-11 07:15:11.123'),
+('0200-02-11 07:15:11.123456');
select * from hybrid_table;
diff --git a/ql/src/test/queries/clientpositive/avro_timestamp_micros.q
b/ql/src/test/queries/clientpositive/avro_timestamp_micros.q
new file mode 100644
index 00000000000..7712c44ad1a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/avro_timestamp_micros.q
@@ -0,0 +1,9 @@
+CREATE EXTERNAL TABLE micros_table(`dt` timestamp)
+STORED AS AVRO;
+
+INSERT INTO micros_table VALUES
+(cast('2024-08-09 14:08:26.326107' as timestamp)),
+('2012-02-21 07:08:09.123'),
+('1014-02-11 07:15:11.12345');
+
+SELECT * FROM micros_table;
diff --git
a/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_timestamp.q.out
b/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_timestamp.q.out
index 5477195ed87..7e417d95686 100644
--- a/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/llap/avro_hybrid_mixed_timestamp.q.out
@@ -15,24 +15,34 @@ POSTHOOK: Output: default@hybrid_table
PREHOOK: query: INSERT INTO hybrid_table VALUES
('2012-02-21 07:08:09.123'),
('2014-02-11 07:08:09.123'),
+('2014-02-11 07:08:09.123456'),
('1947-02-11 07:08:09.123'),
('8200-02-11 07:08:09.123'),
+('8200-02-11 07:08:09.123456'),
('1012-02-21 07:15:11.123'),
+('1012-02-21 07:15:11.12345'),
('1014-02-11 07:15:11.123'),
+('1014-02-11 07:15:11.1234'),
('0947-02-11 07:15:11.123'),
-('0200-02-11 07:15:11.123')
+('0200-02-11 07:15:11.123'),
+('0200-02-11 07:15:11.1234')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@hybrid_table
POSTHOOK: query: INSERT INTO hybrid_table VALUES
('2012-02-21 07:08:09.123'),
('2014-02-11 07:08:09.123'),
+('2014-02-11 07:08:09.123456'),
('1947-02-11 07:08:09.123'),
('8200-02-11 07:08:09.123'),
+('8200-02-11 07:08:09.123456'),
('1012-02-21 07:15:11.123'),
+('1012-02-21 07:15:11.12345'),
('1014-02-11 07:15:11.123'),
+('1014-02-11 07:15:11.1234'),
('0947-02-11 07:15:11.123'),
-('0200-02-11 07:15:11.123')
+('0200-02-11 07:15:11.123'),
+('0200-02-11 07:15:11.1234')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@hybrid_table
@@ -47,12 +57,17 @@ POSTHOOK: Input: default@hybrid_table
#### A masked pattern was here ####
2012-02-21 07:08:09.123
2014-02-11 07:08:09.123
+2014-02-11 07:08:09.123456
1947-02-11 07:08:09.123
8200-02-11 07:08:09.123
+8200-02-11 07:08:09.123456
1012-02-21 07:15:11.123
+1012-02-21 07:15:11.12345
1014-02-11 07:15:11.123
+1014-02-11 07:15:11.1234
0947-02-11 07:15:11.123
0200-02-11 07:15:11.123
+0200-02-11 07:15:11.1234
PREHOOK: query: select * from hybrid_table
PREHOOK: type: QUERY
PREHOOK: Input: default@hybrid_table
@@ -63,12 +78,17 @@ POSTHOOK: Input: default@hybrid_table
#### A masked pattern was here ####
2012-02-21 07:08:09.123
2014-02-11 07:08:09.123
+2014-02-11 07:08:09.123456
1947-02-11 07:08:09.123
8200-02-11 07:08:09.123
+8200-02-11 07:08:09.123456
1012-02-21 07:15:11.123
+1012-02-21 07:15:11.12345
1014-02-11 07:15:11.123
+1014-02-11 07:15:11.1234
0947-02-11 07:15:11.123
0200-02-11 07:15:11.123
+0200-02-11 07:15:11.1234
PREHOOK: query: drop table hybrid_table
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@hybrid_table
diff --git
a/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_timestamp.q.out
b/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_timestamp.q.out
index 5477195ed87..92cbd405bff 100644
---
a/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_timestamp.q.out
+++
b/ql/src/test/results/clientpositive/llap/avro_proleptic_mixed_timestamp.q.out
@@ -15,24 +15,34 @@ POSTHOOK: Output: default@hybrid_table
PREHOOK: query: INSERT INTO hybrid_table VALUES
('2012-02-21 07:08:09.123'),
('2014-02-11 07:08:09.123'),
+('2014-02-11 07:08:09.123456'),
('1947-02-11 07:08:09.123'),
+('1947-02-11 07:08:09.1234'),
('8200-02-11 07:08:09.123'),
('1012-02-21 07:15:11.123'),
+('1012-02-21 07:15:11.12345'),
('1014-02-11 07:15:11.123'),
('0947-02-11 07:15:11.123'),
-('0200-02-11 07:15:11.123')
+('0947-02-11 07:15:11.12345'),
+('0200-02-11 07:15:11.123'),
+('0200-02-11 07:15:11.123456')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@hybrid_table
POSTHOOK: query: INSERT INTO hybrid_table VALUES
('2012-02-21 07:08:09.123'),
('2014-02-11 07:08:09.123'),
+('2014-02-11 07:08:09.123456'),
('1947-02-11 07:08:09.123'),
+('1947-02-11 07:08:09.1234'),
('8200-02-11 07:08:09.123'),
('1012-02-21 07:15:11.123'),
+('1012-02-21 07:15:11.12345'),
('1014-02-11 07:15:11.123'),
('0947-02-11 07:15:11.123'),
-('0200-02-11 07:15:11.123')
+('0947-02-11 07:15:11.12345'),
+('0200-02-11 07:15:11.123'),
+('0200-02-11 07:15:11.123456')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@hybrid_table
@@ -47,12 +57,17 @@ POSTHOOK: Input: default@hybrid_table
#### A masked pattern was here ####
2012-02-21 07:08:09.123
2014-02-11 07:08:09.123
+2014-02-11 07:08:09.123456
1947-02-11 07:08:09.123
+1947-02-11 07:08:09.1234
8200-02-11 07:08:09.123
1012-02-21 07:15:11.123
+1012-02-21 07:15:11.12345
1014-02-11 07:15:11.123
0947-02-11 07:15:11.123
+0947-02-11 07:15:11.12345
0200-02-11 07:15:11.123
+0200-02-11 07:15:11.123456
PREHOOK: query: select * from hybrid_table
PREHOOK: type: QUERY
PREHOOK: Input: default@hybrid_table
@@ -63,12 +78,17 @@ POSTHOOK: Input: default@hybrid_table
#### A masked pattern was here ####
2012-02-21 07:08:09.123
2014-02-11 07:08:09.123
+2014-02-11 07:08:09.123456
1947-02-11 07:08:09.123
+1947-02-11 07:08:09.1234
8200-02-11 07:08:09.123
1012-02-21 07:15:11.123
+1012-02-21 07:15:11.12345
1014-02-11 07:15:11.123
0947-02-11 07:15:11.123
+0947-02-11 07:15:11.12345
0200-02-11 07:15:11.123
+0200-02-11 07:15:11.123456
PREHOOK: query: drop table hybrid_table
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@hybrid_table
diff --git a/ql/src/test/results/clientpositive/llap/avro_timestamp.q.out
b/ql/src/test/results/clientpositive/llap/avro_timestamp.q.out
index 923f045644d..ccd3ff83f7a 100644
--- a/ql/src/test/results/clientpositive/llap/avro_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/llap/avro_timestamp.q.out
@@ -83,6 +83,14 @@ POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26
07%3A08%3A09.123
1214-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"}
["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123
0847-02-11 07:08:09.123 {"baz":"0921-12-16 07:08:09.123"}
["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123
0600-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"}
["0039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123
+2012-02-21 07:08:09.123456 {"bar":"1998-05-07
07:08:09.123456","foo":"1980-12-16 07:08:09.123456"} ["2011-09-04
07:08:09.123456","2011-09-05 07:08:09.123456"] 2 2014-09-26
07:08:09.123
+2014-02-11 07:08:09.123456 {"baz":"1981-12-16 07:08:09.123456"}
["2011-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
+1947-02-11 07:08:09.123456 {"baz":"1921-12-16 07:08:09.123456"}
["2011-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
+8200-02-11 07:08:09.123456 {"baz":"6981-12-16 07:08:09.123456"}
["1039-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
+1412-02-21 07:08:09.123456 {"bar":"0998-05-07
07:08:09.123456","foo":"0980-12-16 07:08:09.123456"} ["0011-09-04
07:08:09.123456","0011-09-05 07:08:09.123456"] 2 2014-09-26
07:08:09.123
+1214-02-11 07:08:09.123456 {"baz":"0981-12-16 07:08:09.123456"}
["0011-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
+0847-02-11 07:08:09.123456 {"baz":"0921-12-16 07:08:09.123456"}
["0011-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
+0600-02-11 07:08:09.123456 {"baz":"0981-12-16 07:08:09.123456"}
["0039-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
PREHOOK: query: SELECT d, COUNT(d) FROM avro_timestamp GROUP BY d
PREHOOK: type: QUERY
PREHOOK: Input: default@avro_timestamp
@@ -97,10 +105,18 @@ POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26
07%3A08%3A09.123
1214-02-11 07:08:09.123 1
1947-02-11 07:08:09.123 1
2014-02-11 07:08:09.123 1
+0600-02-11 07:08:09.123456 1
0847-02-11 07:08:09.123 1
+0847-02-11 07:08:09.123456 1
+1214-02-11 07:08:09.123456 1
1412-02-21 07:08:09.123 1
+1412-02-21 07:08:09.123456 1
+1947-02-11 07:08:09.123456 1
2012-02-21 07:08:09.123 1
+2012-02-21 07:08:09.123456 1
+2014-02-11 07:08:09.123456 1
8200-02-11 07:08:09.123 1
+8200-02-11 07:08:09.123456 1
PREHOOK: query: SELECT * FROM avro_timestamp WHERE d!='1947-02-11 07:08:09.123'
PREHOOK: type: QUERY
PREHOOK: Input: default@avro_timestamp
@@ -118,6 +134,14 @@ POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26
07%3A08%3A09.123
1214-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"}
["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123
0847-02-11 07:08:09.123 {"baz":"0921-12-16 07:08:09.123"}
["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123
0600-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"}
["0039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123
+2012-02-21 07:08:09.123456 {"bar":"1998-05-07
07:08:09.123456","foo":"1980-12-16 07:08:09.123456"} ["2011-09-04
07:08:09.123456","2011-09-05 07:08:09.123456"] 2 2014-09-26
07:08:09.123
+2014-02-11 07:08:09.123456 {"baz":"1981-12-16 07:08:09.123456"}
["2011-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
+1947-02-11 07:08:09.123456 {"baz":"1921-12-16 07:08:09.123456"}
["2011-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
+8200-02-11 07:08:09.123456 {"baz":"6981-12-16 07:08:09.123456"}
["1039-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
+1412-02-21 07:08:09.123456 {"bar":"0998-05-07
07:08:09.123456","foo":"0980-12-16 07:08:09.123456"} ["0011-09-04
07:08:09.123456","0011-09-05 07:08:09.123456"] 2 2014-09-26
07:08:09.123
+1214-02-11 07:08:09.123456 {"baz":"0981-12-16 07:08:09.123456"}
["0011-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
+0847-02-11 07:08:09.123456 {"baz":"0921-12-16 07:08:09.123456"}
["0011-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
+0600-02-11 07:08:09.123456 {"baz":"0981-12-16 07:08:09.123456"}
["0039-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
PREHOOK: query: SELECT * FROM avro_timestamp WHERE d<'2014-12-21 07:08:09.123'
PREHOOK: type: QUERY
PREHOOK: Input: default@avro_timestamp
@@ -135,6 +159,13 @@ POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26
07%3A08%3A09.123
1214-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"}
["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123
0847-02-11 07:08:09.123 {"baz":"0921-12-16 07:08:09.123"}
["0011-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123
0600-02-11 07:08:09.123 {"baz":"0981-12-16 07:08:09.123"}
["0039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123
+2012-02-21 07:08:09.123456 {"bar":"1998-05-07
07:08:09.123456","foo":"1980-12-16 07:08:09.123456"} ["2011-09-04
07:08:09.123456","2011-09-05 07:08:09.123456"] 2 2014-09-26
07:08:09.123
+2014-02-11 07:08:09.123456 {"baz":"1981-12-16 07:08:09.123456"}
["2011-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
+1947-02-11 07:08:09.123456 {"baz":"1921-12-16 07:08:09.123456"}
["2011-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
+1412-02-21 07:08:09.123456 {"bar":"0998-05-07
07:08:09.123456","foo":"0980-12-16 07:08:09.123456"} ["0011-09-04
07:08:09.123456","0011-09-05 07:08:09.123456"] 2 2014-09-26
07:08:09.123
+1214-02-11 07:08:09.123456 {"baz":"0981-12-16 07:08:09.123456"}
["0011-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
+0847-02-11 07:08:09.123456 {"baz":"0921-12-16 07:08:09.123456"}
["0011-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
+0600-02-11 07:08:09.123456 {"baz":"0981-12-16 07:08:09.123456"}
["0039-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
PREHOOK: query: SELECT * FROM avro_timestamp WHERE d>'8000-12-01 07:08:09.123'
PREHOOK: type: QUERY
PREHOOK: Input: default@avro_timestamp
@@ -146,3 +177,4 @@ POSTHOOK: Input: default@avro_timestamp
POSTHOOK: Input: default@avro_timestamp@p1=2/p2=2014-09-26 07%3A08%3A09.123
#### A masked pattern was here ####
8200-02-11 07:08:09.123 {"baz":"6981-12-16 07:08:09.123"}
["1039-09-05 07:08:09.123"] 2 2014-09-26 07:08:09.123
+8200-02-11 07:08:09.123456 {"baz":"6981-12-16 07:08:09.123456"}
["1039-09-05 07:08:09.123456"] 2 2014-09-26 07:08:09.123
diff --git
a/ql/src/test/results/clientpositive/llap/avro_timestamp_micros.q.out
b/ql/src/test/results/clientpositive/llap/avro_timestamp_micros.q.out
new file mode 100644
index 00000000000..95c43135f5b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/avro_timestamp_micros.q.out
@@ -0,0 +1,36 @@
+PREHOOK: query: CREATE EXTERNAL TABLE micros_table(`dt` timestamp)
+STORED AS AVRO
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@micros_table
+POSTHOOK: query: CREATE EXTERNAL TABLE micros_table(`dt` timestamp)
+STORED AS AVRO
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@micros_table
+PREHOOK: query: INSERT INTO micros_table VALUES
+(cast('2024-08-09 14:08:26.326107' as timestamp)),
+('2012-02-21 07:08:09.123'),
+('1014-02-11 07:15:11.12345')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@micros_table
+POSTHOOK: query: INSERT INTO micros_table VALUES
+(cast('2024-08-09 14:08:26.326107' as timestamp)),
+('2012-02-21 07:08:09.123'),
+('1014-02-11 07:15:11.12345')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@micros_table
+POSTHOOK: Lineage: micros_table.dt SCRIPT []
+PREHOOK: query: SELECT * FROM micros_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@micros_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM micros_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@micros_table
+#### A masked pattern was here ####
+2024-08-09 14:08:26.326107
+2012-02-21 07:08:09.123
+1014-02-11 07:15:11.12345
diff --git
a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
index 11d66277ef9..1fe99085a21 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
@@ -29,8 +29,10 @@
import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.TimeZone;
+import org.apache.avro.LogicalType;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Type;
import org.apache.avro.generic.GenericData;
@@ -96,6 +98,7 @@ class AvroDeserializer {
private Boolean writerZoneConversionLegacy = null;
private Configuration configuration = null;
+ private LogicalType logicalType = null;
AvroDeserializer() {}
@@ -220,12 +223,60 @@ private List<Object> workerBase(List<Object> objectRow,
Schema fileSchema, List<
Object datum = record.get(columnName);
Schema datumSchema = record.getSchema().getField(columnName).schema();
Schema.Field field =
AvroSerdeUtils.isNullableType(fileSchema)?AvroSerdeUtils.getOtherTypeFromNullableType(fileSchema).getField(columnName):fileSchema.getField(columnName);
+ if (field != null) {
+ // This single call handles all cases: direct, union, map, and array.
+ logicalType = findNestedLogicalType(field, field.schema());
+ }
objectRow.add(worker(datum, field == null ? null : field.schema(),
datumSchema, columnType));
}
return objectRow;
}
+ /**
+ * Recursively traverses a schema to find the first LogicalType.
+ * This handles direct logical types, and logical types nested within
+ * UNIONS, MAPS, or ARRAYS.
+ *
+ * @param field
+ * @param schema The schema to inspect.
+ * @return The found LogicalType, or null if none is found.
+ */
+ private LogicalType findNestedLogicalType(Schema.Field field, Schema schema)
{
+ // Base Case 1: The schema is null, so we can't proceed.
+ if (schema == null) {
+ return null;
+ }
+
+ // Base Case 2: The schema itself has the logical type. We found it.
+ if (schema.getLogicalType() != null) {
+ return schema.getLogicalType();
+ }
+
+ // Recursive Step: The logical type is deeper. Check the container type.
+ switch (schema.getType()) {
+ case UNION:
+ // Find the first non-null branch and search within it.
+ return schema.getTypes().stream()
+ .filter(s -> s.getType() != Schema.Type.NULL)
+ .map(s -> findNestedLogicalType(field, s)) // Recurse on the
branch
+ .filter(java.util.Objects::nonNull)
+ .findFirst()
+ .orElse(null);
+ case MAP:
+ // Search within the map's value schema.
+ return findNestedLogicalType(field, schema.getValueType());
+ case ARRAY:
+ // Search within the array's element schema.
+ return findNestedLogicalType(field, schema.getElementType());
+ default:
+ // This type (e.g., STRING, INT) doesn't contain a nested schema.
+ return field.getProp("logicalType") != null
+ ? new LogicalType(field.getProp("logicalType"))
+ : null;
+ }
+ }
+
private Object worker(Object datum, Schema fileSchema, Schema recordSchema,
TypeInfo columnType)
throws AvroSerdeException {
if (datum == null) {
@@ -388,11 +439,22 @@ private Object deserializePrimitive(Object datum, Schema
fileSchema, Schema reco
skipProlepticConversion =
HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN_DEFAULT.defaultBoolVal;
}
}
- Timestamp timestamp = TimestampTZUtil.convertTimestampToZone(
- Timestamp.ofEpochMilli((Long) datum), ZoneOffset.UTC,
convertToTimeZone, legacyConversion);
+ Timestamp timestamp;
+ if (logicalType != null &&
logicalType.getName().equalsIgnoreCase(AvroSerDe.TIMESTAMP_TYPE_NAME_MICROS)) {
+ timestamp = Timestamp.ofEpochMicro((Long) datum);
+ } else {
+ timestamp = Timestamp.ofEpochMilli((Long) datum);
+ }
+ timestamp = TimestampTZUtil.convertTimestampToZone(
+ timestamp, ZoneOffset.UTC, convertToTimeZone, legacyConversion);
if (!skipProlepticConversion) {
- timestamp = Timestamp.ofEpochMilli(
- CalendarUtils.convertTimeToProleptic(timestamp.toEpochMilli()));
+ if (logicalType != null &&
logicalType.getName().equalsIgnoreCase(AvroSerDe.TIMESTAMP_TYPE_NAME_MICROS)) {
+ timestamp = Timestamp.ofEpochMicro(
+
CalendarUtils.convertTimeToProlepticMicros(timestamp.toEpochMicro()));
+ } else {
+ timestamp = Timestamp.ofEpochMilli(
+
CalendarUtils.convertTimeToProleptic(timestamp.toEpochMilli()));
+ }
}
return timestamp;
}
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
index 100475f06d7..e6c42687c8b 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
@@ -57,7 +57,8 @@ public class AvroSerDe extends AbstractSerDe {
public static final String CHAR_TYPE_NAME = "char";
public static final String VARCHAR_TYPE_NAME = "varchar";
public static final String DATE_TYPE_NAME = "date";
- public static final String TIMESTAMP_TYPE_NAME = "timestamp-millis";
+ public static final String TIMESTAMP_TYPE_NAME_MILLIS = "timestamp-millis";
+ public static final String TIMESTAMP_TYPE_NAME_MICROS = "timestamp-micros";
public static final String WRITER_TIME_ZONE = "writer.time.zone";
public static final String WRITER_PROLEPTIC = "writer.proleptic";
public static final String WRITER_ZONE_CONVERSION_LEGACY =
"writer.zone.conversion.legacy";
diff --git
a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java
b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java
index 409ad9d23fd..0abb6d09846 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java
@@ -24,6 +24,7 @@
import java.util.Set;
import java.util.TimeZone;
+import org.apache.avro.LogicalType;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.Schema.Type;
@@ -231,11 +232,8 @@ private Object serializePrimitive(TypeInfo typeInfo,
PrimitiveObjectInspector fi
case TIMESTAMP:
Timestamp timestamp =
((TimestampObjectInspector)
fieldOI).getPrimitiveJavaObject(structFieldData);
- long millis = defaultProleptic ? timestamp.toEpochMilli() :
- CalendarUtils.convertTimeToHybrid(timestamp.toEpochMilli());
- timestamp = TimestampTZUtil.convertTimestampToZone(
- Timestamp.ofEpochMilli(millis), TimeZone.getDefault().toZoneId(),
ZoneOffset.UTC, legacyConversion);
- return timestamp.toEpochMilli();
+ LogicalType logicalType = schema.getLogicalType();
+ return getEpochTimestamp(timestamp, logicalType);
case UNKNOWN:
throw new AvroSerdeException("Received UNKNOWN primitive category.");
case VOID:
@@ -245,6 +243,31 @@ private Object serializePrimitive(TypeInfo typeInfo,
PrimitiveObjectInspector fi
}
}
+ /**
+ * @param timestamp
+ * @param logicalType
+ * @return Object
+ * The following has TIMESTAMP MILLIS as a default in case there is no
+ * logicalType present. This is done to uphold backward compatibility.
+ */
+ private Object getEpochTimestamp(Timestamp timestamp, LogicalType
logicalType) {
+ switch (logicalType.getName()) {
+ case AvroSerDe.TIMESTAMP_TYPE_NAME_MICROS:
+ long micros = defaultProleptic ? timestamp.toEpochMicro() :
+
CalendarUtils.convertTimeToHybridMicros(timestamp.toEpochMicro());
+ timestamp = TimestampTZUtil.convertTimestampToZone(
+ Timestamp.ofEpochMicro(micros),
TimeZone.getDefault().toZoneId(), ZoneOffset.UTC, legacyConversion);
+ return timestamp.toEpochMicro();
+
+ default:
+ long millis = defaultProleptic ? timestamp.toEpochMilli() :
+ CalendarUtils.convertTimeToHybrid(timestamp.toEpochMilli());
+ timestamp = TimestampTZUtil.convertTimestampToZone(
+ Timestamp.ofEpochMilli(millis),
TimeZone.getDefault().toZoneId(), ZoneOffset.UTC, legacyConversion);
+ return timestamp.toEpochMilli();
+ }
+ }
+
private Object serializeUnion(UnionTypeInfo typeInfo, UnionObjectInspector
fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
byte tag = fieldOI.getTag(structFieldData);
diff --git
a/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java
b/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java
index aaf9f2f7a4f..e944041ca38 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java
@@ -179,8 +179,7 @@ public static TypeInfo generateTypeInfo(Schema schema,
return TypeInfoFactory.dateTypeInfo;
}
- if (type == LONG &&
-
AvroSerDe.TIMESTAMP_TYPE_NAME.equals(schema.getProp(AvroSerDe.AVRO_PROP_LOGICAL_TYPE)))
{
+ if (type == LONG && isTimestampType(schema)) {
return TypeInfoFactory.timestampTypeInfo;
}
@@ -197,6 +196,25 @@ private static int getIntValue(Object obj) {
return value;
}
+ /**
+ * Checks if the given Avro schema represents a timestamp type
+ * based on its logical type property.
+ */
+ public static boolean isTimestampType(Schema schema) {
+ if (schema == null) {
+ return false;
+ }
+
+ String logicalType = schema.getProp(AvroSerDe.AVRO_PROP_LOGICAL_TYPE);
+ if (logicalType == null) {
+ return false;
+ }
+
+ // Supported timestamp logical types (extend this set as needed)
+ return AvroSerDe.TIMESTAMP_TYPE_NAME_MILLIS.equalsIgnoreCase(logicalType)
||
+ AvroSerDe.TIMESTAMP_TYPE_NAME_MICROS.equalsIgnoreCase(logicalType);
+ }
+
private static TypeInfo generateTypeInfoWorker(Schema schema,
Set<Schema> seenSchemas) throws AvroSerdeException {
// Avro requires NULLable types to be defined as unions of some type T
diff --git
a/serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java
b/serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java
index 7b6af3b8930..e8d05aacdd9 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java
@@ -159,7 +159,7 @@ private Schema createAvroPrimitive(TypeInfo typeInfo) {
case TIMESTAMP:
schema = AvroSerdeUtils.getSchemaFor("{" +
"\"type\":\"" + AvroSerDe.AVRO_LONG_TYPE_NAME + "\"," +
- "\"logicalType\":\"" + AvroSerDe.TIMESTAMP_TYPE_NAME + "\"}");
+ "\"logicalType\":\"" + AvroSerDe.TIMESTAMP_TYPE_NAME_MICROS + "\"}");
break;
case VOID:
schema = Schema.create(Schema.Type.NULL);
diff --git
a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java
b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java
index 24800dad5af..291f65ed2d1 100644
---
a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java
+++
b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java
@@ -322,6 +322,49 @@ public void canDeserializeTimestamps() throws
SerDeException, IOException {
assertEquals(Timestamp.valueOf("2019-01-01 16:00:00.999"),
resultTimestamp);
}
+ @Test
+ public void canDeserializeTimestampsMicros() throws SerDeException,
IOException {
+ List<String> columnNames = new ArrayList<>();
+ columnNames.add("timestampField");
+ List<TypeInfo> columnTypes = new ArrayList<>();
+ columnTypes.add(TypeInfoFactory.getPrimitiveTypeInfo("timestamp"));
+ Schema readerSchema =
+
AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.TIMESTAMP_SCHEMA_MICROS);
+
+ // 2019-01-02 00:00:00.123456 GMT is 1546387200123456 microseconds after
epoch
+ GenericData.Record record = new GenericData.Record(readerSchema);
+ record.put("timestampField", 1546387200123456L);
+ assertTrue(GENERIC_DATA.validate(readerSchema, record));
+
+ AvroGenericRecordWritable agrw = new
AvroGenericRecordWritable(ZoneId.of("America/New_York"), false, false);
+ agrw.setRecord(record);
+ agrw.setFileSchema(readerSchema);
+ agrw.setRecordReaderID(new UID());
+
+ AvroDeserializer deserializer = new AvroDeserializer();
+ ArrayList<Object> row =
+ (ArrayList<Object>) deserializer.deserialize(columnNames,
columnTypes, agrw, readerSchema);
+ Timestamp resultTimestamp = (Timestamp) row.get(0);
+
+ // 2019-01-02 00:00:00.123456 GMT is 2019-01-01 19:00:00.123456 GMT-0500
(America/New_York / EST)
+ assertEquals(Timestamp.valueOf("2019-01-01 19:00:00.123456"),
resultTimestamp);
+
+ // Do the same without specifying writer time zone. This tests
deserialization of older records
+ // which should be interpreted in Instant semantics
+ AvroGenericRecordWritable agrw2 = new AvroGenericRecordWritable();
+ agrw2.setRecord(record);
+ agrw2.setFileSchema(readerSchema);
+ agrw2.setRecordReaderID(new UID());
+
+ row =
+ (ArrayList<Object>) deserializer.deserialize(columnNames,
columnTypes, agrw2, readerSchema);
+ resultTimestamp = (Timestamp) row.get(0);
+
+ // 2019-01-02 00:00:00.123456 GMT is 2019-01-01 16:00:00.123456 in zone
GMT-0800 (PST)
+ // This is the time zone for VM in test.
+ assertEquals(Timestamp.valueOf("2019-01-01 16:00:00.123456"),
resultTimestamp);
+ }
+
@Test
public void canDeserializeUnions() throws SerDeException, IOException {
Schema s =
AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.UNION_SCHEMA);
diff --git
a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java
b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java
index 048d3d970d6..cdf75598f7a 100644
---
a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java
+++
b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java
@@ -227,10 +227,20 @@ public class TestAvroObjectInspectorGenerator {
" \"fields\" : [\n" +
" {\"name\":\"timestampField\", " +
" \"type\":\"" + AvroSerDe.AVRO_LONG_TYPE_NAME + "\", " +
- " \"logicalType\":\"" + AvroSerDe.TIMESTAMP_TYPE_NAME + "\"}" +
+ " \"logicalType\":\"" + AvroSerDe.TIMESTAMP_TYPE_NAME_MILLIS +
"\"}" +
" ]\n" +
"}";
+ public static final String TIMESTAMP_SCHEMA_MICROS = "{\n" +
+ " \"type\": \"record\", \n" +
+ " \"name\": \"timestampTest\",\n" +
+ " \"fields\" : [\n" +
+ " {\"name\":\"timestampField\", " +
+ " \"type\":\"" + AvroSerDe.AVRO_LONG_TYPE_NAME + "\", " +
+ " \"logicalType\":\"" + AvroSerDe.TIMESTAMP_TYPE_NAME_MICROS
+ "\"}" +
+ " ]\n" +
+ "}";
+
public static final String KITCHEN_SINK_SCHEMA = "{\n" +
" \"namespace\": \"org.apache.hadoop.hive\",\n" +
" \"name\": \"kitchsink\",\n" +
diff --git
a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerializer.java
b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerializer.java
index bcd0fd1acf9..0908a4b2b91 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerializer.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerializer.java
@@ -125,7 +125,14 @@ public void canSerializeDoubles() throws SerDeException,
IOException {
public void canSerializeTimestamps() throws SerDeException, IOException {
singleFieldTest("timestamp1", Timestamp.valueOf("2011-01-01
00:00:00").toEpochMilli(),
"\"" + AvroSerDe.AVRO_LONG_TYPE_NAME + "\"," +
- "\"logicalType\":\"" + AvroSerDe.TIMESTAMP_TYPE_NAME + "\"");
+ "\"logicalType\":\"" + AvroSerDe.TIMESTAMP_TYPE_NAME_MILLIS + "\"");
+ }
+
+ @Test
+ public void canSerializeTimestampsMicros() throws SerDeException,
IOException {
+ singleFieldTest("timestamp1", Timestamp.valueOf("2011-01-01
00:00:00").toEpochMicro(),
+ "\"" + AvroSerDe.AVRO_LONG_TYPE_NAME + "\"," +
+ "\"logicalType\":\"" +
AvroSerDe.TIMESTAMP_TYPE_NAME_MICROS + "\"");
}
@Test
diff --git
a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java
b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java
index ac0a8ee46dd..029144d6378 100644
---
a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java
+++
b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java
@@ -261,7 +261,7 @@ public void createAvroDateSchema() {
public void createAvroTimestampSchema() {
final String specificSchema = "{" +
"\"type\":\"long\"," +
- "\"logicalType\":\"timestamp-millis\"}";
+ "\"logicalType\":\"timestamp-micros\"}";
String expectedSchema = genSchema(specificSchema);
Assert.assertEquals("Test for timestamp in avro schema failed",
diff --git
a/storage-api/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java
b/storage-api/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java
index 9214d3cd137..e45bcaa3669 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java
@@ -19,6 +19,12 @@
import java.text.ParseException;
import java.text.SimpleDateFormat;
+import java.time.format.DateTimeFormatter;
+import java.time.Instant;
+import java.time.format.DateTimeParseException;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.TimeZone;
@@ -43,6 +49,8 @@ public class CalendarUtils {
private static final Logger LOG =
LoggerFactory.getLogger(CalendarUtils.class);
public static final long SWITCHOVER_MILLIS;
public static final long SWITCHOVER_DAYS;
+ public static final long SWITCHOVER_MICROS;
+ public static final long SWITCHOVER_DAYS_MICROS;
private static SimpleDateFormat createFormatter(String fmt, boolean
proleptic) {
SimpleDateFormat result = new SimpleDateFormat(fmt);
@@ -56,22 +64,40 @@ private static SimpleDateFormat createFormatter(String fmt,
boolean proleptic) {
private static final String DATE = "yyyy-MM-dd";
private static final String TIME = DATE + " HH:mm:ss.SSS";
+ // Microsecond-precision pattern (6 digits)
+ private static final String TIME_MICROS = DATE + " HH:mm:ss.SSSSSS";
private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
private static final ThreadLocal<SimpleDateFormat> HYBRID_DATE_FORMAT =
ThreadLocal.withInitial(() -> createFormatter(DATE, false));
+ private static final DateTimeFormatter HYBRID_DATE_FORMAT_MICROS =
+ DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneOffset.UTC);
private static final ThreadLocal<SimpleDateFormat> HYBRID_TIME_FORMAT =
ThreadLocal.withInitial(() -> createFormatter(TIME, false));
+ private static final DateTimeFormatter HYBRID_TIME_FORMAT_MICROS =
+ DateTimeFormatter.ofPattern(TIME_MICROS).withZone(ZoneOffset.UTC);
private static final ThreadLocal<SimpleDateFormat> PROLEPTIC_DATE_FORMAT =
ThreadLocal.withInitial(() -> createFormatter(DATE, true));
+ private static final DateTimeFormatter PROLEPTIC_DATE_FORMAT_MICROS =
+ DateTimeFormatter.ofPattern(DATE).withZone(ZoneOffset.UTC);
private static final ThreadLocal<SimpleDateFormat> PROLEPTIC_TIME_FORMAT =
ThreadLocal.withInitial(() -> createFormatter(TIME, true));
+ private static final DateTimeFormatter PROLEPTIC_TIME_FORMAT_MICROS =
+ DateTimeFormatter.ofPattern(TIME_MICROS).withZone(ZoneOffset.UTC);
static {
// Get the last day where the two calendars agree with each other.
try {
SWITCHOVER_MILLIS =
HYBRID_DATE_FORMAT.get().parse("1582-10-15").getTime();
SWITCHOVER_DAYS = TimeUnit.MILLISECONDS.toDays(SWITCHOVER_MILLIS);
+
+ // For cases with microseconds precision
+ Instant switchoverInstant = LocalDate.parse("1582-10-15",
HYBRID_DATE_FORMAT_MICROS)
+ .atStartOfDay(ZoneOffset.UTC)
+ .toInstant();
+ SWITCHOVER_MICROS = switchoverInstant.getEpochSecond() * 1_000_000L +
+ switchoverInstant.getNano() / 1000L;
+ SWITCHOVER_DAYS_MICROS = TimeUnit.MICROSECONDS.toDays(SWITCHOVER_MICROS);
} catch (ParseException e) {
throw new IllegalArgumentException("Can't parse switch over date", e);
}
@@ -161,6 +187,51 @@ public static long convertTimeToProleptic(long hybrid) {
return proleptic;
}
+ /**
+ * Converts epoch microseconds between hybrid Julian/Gregorian and proleptic
Gregorian calendars.
+ *
+ * This method preserves the string representation of the timestamp by
formatting the input microseconds
+ * using the provided 'fromFormatter', then parsing it back using the
'toFormatter'. This effectively
+ * shifts the epoch offset between the two calendar systems for dates before
the switchover threshold.
+ *
+ * @param micros Epoch microseconds to convert.
+ * @param threshold Switchover threshold in microseconds; conversion only
occurs if micros < threshold.
+ * @param fromFormatter Formatter used to format the instant (source
calendar system).
+ * @param toFormatter Formatter used to parse the formatted string (target
calendar system).
+ * @return Converted epoch microseconds in the target calendar system.
+ * @throws IllegalArgumentException if the formatted date string cannot be
parsed.
+ */
+ private static long convertMicros(long micros, long threshold,
DateTimeFormatter fromFormatter, DateTimeFormatter toFormatter) {
+ long result = micros;
+ if (micros < threshold) {
+ Instant instant = Instant.ofEpochSecond(
+ micros / 1_000_000L,
+ (micros % 1_000_000L) * 1_000L
+ );
+ String dateStr = fromFormatter.format(instant.atZone(ZoneOffset.UTC));
+ try {
+ Instant parsedInstant = LocalDateTime.parse(dateStr, toFormatter)
+ .atZone(ZoneOffset.UTC)
+ .toInstant();
+ result = parsedInstant.getEpochSecond() * 1_000_000L +
+ parsedInstant.getNano() / 1000L;
+ } catch (DateTimeParseException e) {
+ throw new IllegalArgumentException("Can't parse " + dateStr, e);
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Convert epoch microseconds from the hybrid Julian/Gregorian calendar to
the
+ * proleptic Gregorian.
+ * @param hybridMicros Microseconds of epoch in the hybrid Julian/Gregorian
+ * @return Microseconds of epoch in the proleptic Gregorian
+ */
+ public static long convertTimeToProlepticMicros(long hybridMicros) {
+ return convertMicros(hybridMicros, SWITCHOVER_MICROS,
HYBRID_TIME_FORMAT_MICROS, PROLEPTIC_TIME_FORMAT_MICROS);
+ }
+
/**
* Convert epoch millis from the proleptic Gregorian calendar to the hybrid
* Julian/Gregorian.
@@ -180,6 +251,16 @@ public static long convertTimeToHybrid(long proleptic) {
return hybrid;
}
+ /**
+ * Convert epoch microseconds from the proleptic Gregorian calendar to the
+ * hybrid Julian/Gregorian.
+ * @param prolepticMicros Microseconds of epoch in the proleptic Gregorian
+ * @return Microseconds of epoch in the hybrid Julian/Gregorian
+ */
+ public static long convertTimeToHybridMicros(long prolepticMicros) {
+ return convertMicros(prolepticMicros, SWITCHOVER_MICROS,
PROLEPTIC_TIME_FORMAT_MICROS, HYBRID_TIME_FORMAT_MICROS);
+ }
+
/**
*
* Formats epoch day to date according to proleptic or hybrid calendar