This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 1f71014435 GH-38648: [Java] Regenerate Flatbuffers (#38650)
1f71014435 is described below
commit 1f71014435fd56c915aebe0a9ac982f6e8de6f94
Author: Dane Pitkin <[email protected]>
AuthorDate: Thu Nov 9 10:53:55 2023 -0500
GH-38648: [Java] Regenerate Flatbuffers (#38650)
### Rationale for this change
Regenerate the Flatbuffers to include new formats. Flatbuffers are always
forwards/backwards compatible as long as the code is generated from the same
`flatc` compiler version.
### What changes are included in this PR?
* Flatbuffers regenerated with `flatc` v1.12.0
### Are these changes tested?
Yes, via unit tests.
### Are there any user-facing changes?
Yes, RecordBatch.java was modified.
* Closes: #38648
Authored-by: Dane Pitkin <[email protected]>
Signed-off-by: David Li <[email protected]>
---
.../arrow/flatbuf/{Date.java => BinaryView.java} | 36 ++---
.../org/apache/arrow/flatbuf/BodyCompression.java | 3 +-
.../main/java/org/apache/arrow/flatbuf/Date.java | 4 +-
.../flatbuf/{Date.java => LargeListView.java} | 33 ++---
.../arrow/flatbuf/{Date.java => ListView.java} | 34 ++---
.../java/org/apache/arrow/flatbuf/RecordBatch.java | 33 ++++-
.../flatbuf/{Date.java => RunEndEncoded.java} | 36 ++---
.../main/java/org/apache/arrow/flatbuf/Time.java | 17 ++-
.../java/org/apache/arrow/flatbuf/Timestamp.java | 148 +++++++++++++++------
.../main/java/org/apache/arrow/flatbuf/Type.java | 7 +-
.../arrow/flatbuf/{Date.java => Utf8View.java} | 36 ++---
11 files changed, 223 insertions(+), 164 deletions(-)
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
b/java/format/src/main/java/org/apache/arrow/flatbuf/BinaryView.java
similarity index 50%
copy from java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
copy to java/format/src/main/java/org/apache/arrow/flatbuf/BinaryView.java
index b2fcc9e39e..56a8d32953 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/BinaryView.java
@@ -25,32 +25,24 @@ import com.google.flatbuffers.*;
@SuppressWarnings("unused")
/**
- * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
- * epoch (1970-01-01), stored in either of two units:
+ * Logically the same as Binary, but the internal representation uses a view
+ * struct that contains the string length and either the string's entire data
+ * inline (for small strings) or an inlined prefix, an index of another buffer,
+ * and an offset pointing to a slice in that buffer (for non-small strings).
*
- * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
- * leap seconds), where the values are evenly divisible by 86400000
- * * Days (32 bits) since the UNIX epoch
+ * Since it uses a variable number of data buffers, each Field with this type
+ * must have a corresponding entry in `variadicBufferCounts`.
*/
-public final class Date extends Table {
+public final class BinaryView extends Table {
public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
- public static Date getRootAsDate(ByteBuffer _bb) { return getRootAsDate(_bb,
new Date()); }
- public static Date getRootAsDate(ByteBuffer _bb, Date obj) {
_bb.order(ByteOrder.LITTLE_ENDIAN); return
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public static BinaryView getRootAsBinaryView(ByteBuffer _bb) { return
getRootAsBinaryView(_bb, new BinaryView()); }
+ public static BinaryView getRootAsBinaryView(ByteBuffer _bb, BinaryView obj)
{ _bb.order(ByteOrder.LITTLE_ENDIAN); return
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
- public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this;
}
+ public BinaryView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return
this; }
- public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o +
bb_pos) : 1; }
- public static int createDate(FlatBufferBuilder builder,
- short unit) {
- builder.startTable(1);
- Date.addUnit(builder, unit);
- return Date.endDate(builder);
- }
-
- public static void startDate(FlatBufferBuilder builder) {
builder.startTable(1); }
- public static void addUnit(FlatBufferBuilder builder, short unit) {
builder.addShort(0, unit, 1); }
- public static int endDate(FlatBufferBuilder builder) {
+ public static void startBinaryView(FlatBufferBuilder builder) {
builder.startTable(0); }
+ public static int endBinaryView(FlatBufferBuilder builder) {
int o = builder.endTable();
return o;
}
@@ -58,8 +50,8 @@ public final class Date extends Table {
public static final class Vector extends BaseVector {
public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) {
__reset(_vector, _element_size, _bb); return this; }
- public Date get(int j) { return get(new Date(), j); }
- public Date get(Date obj, int j) { return
obj.__assign(__indirect(__element(j), bb), bb); }
+ public BinaryView get(int j) { return get(new BinaryView(), j); }
+ public BinaryView get(BinaryView obj, int j) { return
obj.__assign(__indirect(__element(j), bb), bb); }
}
}
diff --git
a/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
index 650454eb15..ed8ce0939a 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
@@ -37,7 +37,8 @@ public final class BodyCompression extends Table {
public BodyCompression __assign(int _i, ByteBuffer _bb) { __init(_i, _bb);
return this; }
/**
- * Compressor library
+ * Compressor library.
+ * For LZ4_FRAME, each compressed buffer must consist of a single frame.
*/
public byte codec() { int o = __offset(4); return o != 0 ? bb.get(o +
bb_pos) : 0; }
/**
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
b/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
index b2fcc9e39e..ac6e389835 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
@@ -25,8 +25,8 @@ import com.google.flatbuffers.*;
@SuppressWarnings("unused")
/**
- * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
- * epoch (1970-01-01), stored in either of two units:
+ * Date is either a 32-bit or 64-bit signed integer type representing an
+ * elapsed time since UNIX epoch (1970-01-01), stored in either of two units:
*
* * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
* leap seconds), where the values are evenly divisible by 86400000
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeListView.java
similarity index 50%
copy from java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
copy to java/format/src/main/java/org/apache/arrow/flatbuf/LargeListView.java
index b2fcc9e39e..08c31c23a9 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeListView.java
@@ -25,32 +25,19 @@ import com.google.flatbuffers.*;
@SuppressWarnings("unused")
/**
- * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
- * epoch (1970-01-01), stored in either of two units:
- *
- * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
- * leap seconds), where the values are evenly divisible by 86400000
- * * Days (32 bits) since the UNIX epoch
+ * Same as ListView, but with 64-bit offsets and sizes, allowing to represent
+ * extremely large data values.
*/
-public final class Date extends Table {
+public final class LargeListView extends Table {
public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
- public static Date getRootAsDate(ByteBuffer _bb) { return getRootAsDate(_bb,
new Date()); }
- public static Date getRootAsDate(ByteBuffer _bb, Date obj) {
_bb.order(ByteOrder.LITTLE_ENDIAN); return
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public static LargeListView getRootAsLargeListView(ByteBuffer _bb) { return
getRootAsLargeListView(_bb, new LargeListView()); }
+ public static LargeListView getRootAsLargeListView(ByteBuffer _bb,
LargeListView obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
- public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this;
}
+ public LargeListView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb);
return this; }
- public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o +
bb_pos) : 1; }
- public static int createDate(FlatBufferBuilder builder,
- short unit) {
- builder.startTable(1);
- Date.addUnit(builder, unit);
- return Date.endDate(builder);
- }
-
- public static void startDate(FlatBufferBuilder builder) {
builder.startTable(1); }
- public static void addUnit(FlatBufferBuilder builder, short unit) {
builder.addShort(0, unit, 1); }
- public static int endDate(FlatBufferBuilder builder) {
+ public static void startLargeListView(FlatBufferBuilder builder) {
builder.startTable(0); }
+ public static int endLargeListView(FlatBufferBuilder builder) {
int o = builder.endTable();
return o;
}
@@ -58,8 +45,8 @@ public final class Date extends Table {
public static final class Vector extends BaseVector {
public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) {
__reset(_vector, _element_size, _bb); return this; }
- public Date get(int j) { return get(new Date(), j); }
- public Date get(Date obj, int j) { return
obj.__assign(__indirect(__element(j), bb), bb); }
+ public LargeListView get(int j) { return get(new LargeListView(), j); }
+ public LargeListView get(LargeListView obj, int j) { return
obj.__assign(__indirect(__element(j), bb), bb); }
}
}
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
b/java/format/src/main/java/org/apache/arrow/flatbuf/ListView.java
similarity index 50%
copy from java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
copy to java/format/src/main/java/org/apache/arrow/flatbuf/ListView.java
index b2fcc9e39e..2c9ad4c13d 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/ListView.java
@@ -25,32 +25,20 @@ import com.google.flatbuffers.*;
@SuppressWarnings("unused")
/**
- * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
- * epoch (1970-01-01), stored in either of two units:
- *
- * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
- * leap seconds), where the values are evenly divisible by 86400000
- * * Days (32 bits) since the UNIX epoch
+ * Represents the same logical types that List can, but contains offsets and
+ * sizes allowing for writes in any order and sharing of child values among
+ * list values.
*/
-public final class Date extends Table {
+public final class ListView extends Table {
public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
- public static Date getRootAsDate(ByteBuffer _bb) { return getRootAsDate(_bb,
new Date()); }
- public static Date getRootAsDate(ByteBuffer _bb, Date obj) {
_bb.order(ByteOrder.LITTLE_ENDIAN); return
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public static ListView getRootAsListView(ByteBuffer _bb) { return
getRootAsListView(_bb, new ListView()); }
+ public static ListView getRootAsListView(ByteBuffer _bb, ListView obj) {
_bb.order(ByteOrder.LITTLE_ENDIAN); return
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
- public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this;
}
+ public ListView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return
this; }
- public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o +
bb_pos) : 1; }
- public static int createDate(FlatBufferBuilder builder,
- short unit) {
- builder.startTable(1);
- Date.addUnit(builder, unit);
- return Date.endDate(builder);
- }
-
- public static void startDate(FlatBufferBuilder builder) {
builder.startTable(1); }
- public static void addUnit(FlatBufferBuilder builder, short unit) {
builder.addShort(0, unit, 1); }
- public static int endDate(FlatBufferBuilder builder) {
+ public static void startListView(FlatBufferBuilder builder) {
builder.startTable(0); }
+ public static int endListView(FlatBufferBuilder builder) {
int o = builder.endTable();
return o;
}
@@ -58,8 +46,8 @@ public final class Date extends Table {
public static final class Vector extends BaseVector {
public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) {
__reset(_vector, _element_size, _bb); return this; }
- public Date get(int j) { return get(new Date(), j); }
- public Date get(Date obj, int j) { return
obj.__assign(__indirect(__element(j), bb), bb); }
+ public ListView get(int j) { return get(new ListView(), j); }
+ public ListView get(ListView obj, int j) { return
obj.__assign(__indirect(__element(j), bb), bb); }
}
}
diff --git
a/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java
b/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java
index eb814e07dc..ce907ee0fd 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java
@@ -67,27 +67,54 @@ public final class RecordBatch extends Table {
*/
public org.apache.arrow.flatbuf.BodyCompression compression() { return
compression(new org.apache.arrow.flatbuf.BodyCompression()); }
public org.apache.arrow.flatbuf.BodyCompression
compression(org.apache.arrow.flatbuf.BodyCompression obj) { int o =
__offset(10); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+ /**
+ * Some types such as Utf8View are represented using a variable number of
buffers.
+ * For each such Field in the pre-ordered flattened logical schema, there
will be
+ * an entry in variadicBufferCounts to indicate the number of number of
variadic
+ * buffers which belong to that Field in the current RecordBatch.
+ *
+ * For example, the schema
+ * col1: Struct<alpha: Int32, beta: BinaryView, gamma: Float64>
+ * col2: Utf8View
+ * contains two Fields with variadic buffers so variadicBufferCounts will
have
+ * two entries, the first counting the variadic buffers of `col1.beta` and
the
+ * second counting `col2`'s.
+ *
+ * This field may be omitted if and only if the schema contains no Fields
with
+ * a variable number of buffers, such as BinaryView and Utf8View.
+ */
+ public long variadicBufferCounts(int j) { int o = __offset(12); return o !=
0 ? bb.getLong(__vector(o) + j * 8) : 0; }
+ public int variadicBufferCountsLength() { int o = __offset(12); return o !=
0 ? __vector_len(o) : 0; }
+ public LongVector variadicBufferCountsVector() { return
variadicBufferCountsVector(new LongVector()); }
+ public LongVector variadicBufferCountsVector(LongVector obj) { int o =
__offset(12); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+ public ByteBuffer variadicBufferCountsAsByteBuffer() { return
__vector_as_bytebuffer(12, 8); }
+ public ByteBuffer variadicBufferCountsInByteBuffer(ByteBuffer _bb) { return
__vector_in_bytebuffer(_bb, 12, 8); }
public static int createRecordBatch(FlatBufferBuilder builder,
long length,
int nodesOffset,
int buffersOffset,
- int compressionOffset) {
- builder.startTable(4);
+ int compressionOffset,
+ int variadicBufferCountsOffset) {
+ builder.startTable(5);
RecordBatch.addLength(builder, length);
+ RecordBatch.addVariadicBufferCounts(builder, variadicBufferCountsOffset);
RecordBatch.addCompression(builder, compressionOffset);
RecordBatch.addBuffers(builder, buffersOffset);
RecordBatch.addNodes(builder, nodesOffset);
return RecordBatch.endRecordBatch(builder);
}
- public static void startRecordBatch(FlatBufferBuilder builder) {
builder.startTable(4); }
+ public static void startRecordBatch(FlatBufferBuilder builder) {
builder.startTable(5); }
public static void addLength(FlatBufferBuilder builder, long length) {
builder.addLong(0, length, 0L); }
public static void addNodes(FlatBufferBuilder builder, int nodesOffset) {
builder.addOffset(1, nodesOffset, 0); }
public static void startNodesVector(FlatBufferBuilder builder, int numElems)
{ builder.startVector(16, numElems, 8); }
public static void addBuffers(FlatBufferBuilder builder, int buffersOffset)
{ builder.addOffset(2, buffersOffset, 0); }
public static void startBuffersVector(FlatBufferBuilder builder, int
numElems) { builder.startVector(16, numElems, 8); }
public static void addCompression(FlatBufferBuilder builder, int
compressionOffset) { builder.addOffset(3, compressionOffset, 0); }
+ public static void addVariadicBufferCounts(FlatBufferBuilder builder, int
variadicBufferCountsOffset) { builder.addOffset(4, variadicBufferCountsOffset,
0); }
+ public static int createVariadicBufferCountsVector(FlatBufferBuilder
builder, long[] data) { builder.startVector(8, data.length, 8); for (int i =
data.length - 1; i >= 0; i--) builder.addLong(data[i]); return
builder.endVector(); }
+ public static void startVariadicBufferCountsVector(FlatBufferBuilder
builder, int numElems) { builder.startVector(8, numElems, 8); }
public static int endRecordBatch(FlatBufferBuilder builder) {
int o = builder.endTable();
return o;
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
b/java/format/src/main/java/org/apache/arrow/flatbuf/RunEndEncoded.java
similarity index 50%
copy from java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
copy to java/format/src/main/java/org/apache/arrow/flatbuf/RunEndEncoded.java
index b2fcc9e39e..d48733ef0c 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/RunEndEncoded.java
@@ -25,32 +25,22 @@ import com.google.flatbuffers.*;
@SuppressWarnings("unused")
/**
- * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
- * epoch (1970-01-01), stored in either of two units:
- *
- * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
- * leap seconds), where the values are evenly divisible by 86400000
- * * Days (32 bits) since the UNIX epoch
+ * Contains two child arrays, run_ends and values.
+ * The run_ends child array must be a 16/32/64-bit integer array
+ * which encodes the indices at which the run with the value in
+ * each corresponding index in the values child array ends.
+ * Like list/struct types, the value array can be of any type.
*/
-public final class Date extends Table {
+public final class RunEndEncoded extends Table {
public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
- public static Date getRootAsDate(ByteBuffer _bb) { return getRootAsDate(_bb,
new Date()); }
- public static Date getRootAsDate(ByteBuffer _bb, Date obj) {
_bb.order(ByteOrder.LITTLE_ENDIAN); return
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public static RunEndEncoded getRootAsRunEndEncoded(ByteBuffer _bb) { return
getRootAsRunEndEncoded(_bb, new RunEndEncoded()); }
+ public static RunEndEncoded getRootAsRunEndEncoded(ByteBuffer _bb,
RunEndEncoded obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
- public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this;
}
+ public RunEndEncoded __assign(int _i, ByteBuffer _bb) { __init(_i, _bb);
return this; }
- public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o +
bb_pos) : 1; }
- public static int createDate(FlatBufferBuilder builder,
- short unit) {
- builder.startTable(1);
- Date.addUnit(builder, unit);
- return Date.endDate(builder);
- }
-
- public static void startDate(FlatBufferBuilder builder) {
builder.startTable(1); }
- public static void addUnit(FlatBufferBuilder builder, short unit) {
builder.addShort(0, unit, 1); }
- public static int endDate(FlatBufferBuilder builder) {
+ public static void startRunEndEncoded(FlatBufferBuilder builder) {
builder.startTable(0); }
+ public static int endRunEndEncoded(FlatBufferBuilder builder) {
int o = builder.endTable();
return o;
}
@@ -58,8 +48,8 @@ public final class Date extends Table {
public static final class Vector extends BaseVector {
public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) {
__reset(_vector, _element_size, _bb); return this; }
- public Date get(int j) { return get(new Date(), j); }
- public Date get(Date obj, int j) { return
obj.__assign(__indirect(__element(j), bb), bb); }
+ public RunEndEncoded get(int j) { return get(new RunEndEncoded(), j); }
+ public RunEndEncoded get(RunEndEncoded obj, int j) { return
obj.__assign(__indirect(__element(j), bb), bb); }
}
}
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java
b/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java
index 596d403a3e..9acc3fc7a5 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java
@@ -25,9 +25,20 @@ import com.google.flatbuffers.*;
@SuppressWarnings("unused")
/**
- * Time type. The physical storage type depends on the unit
- * - SECOND and MILLISECOND: 32 bits
- * - MICROSECOND and NANOSECOND: 64 bits
+ * Time is either a 32-bit or 64-bit signed integer type representing an
+ * elapsed time since midnight, stored in either of four units: seconds,
+ * milliseconds, microseconds or nanoseconds.
+ *
+ * The integer `bitWidth` depends on the `unit` and must be one of the
following:
+ * * SECOND and MILLISECOND: 32 bits
+ * * MICROSECOND and NANOSECOND: 64 bits
+ *
+ * The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds
+ * (exclusive), adjusted for the time unit (for example, up to 86400000
+ * exclusive for the MILLISECOND unit).
+ * This definition doesn't allow for leap seconds. Time values from
+ * measurements with leap seconds will need to be corrected when ingesting
+ * into Arrow (for example by replacing the value 86400 with 86399).
*/
public final class Time extends Table {
public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java
b/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java
index 041452607c..fe0c6aaea2 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java
@@ -25,37 +25,111 @@ import com.google.flatbuffers.*;
@SuppressWarnings("unused")
/**
- * Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding
- * leap seconds, as a 64-bit integer. Note that UNIX time does not include
- * leap seconds.
+ * Timestamp is a 64-bit signed integer representing an elapsed time since a
+ * fixed epoch, stored in either of four units: seconds, milliseconds,
+ * microseconds or nanoseconds, and is optionally annotated with a timezone.
+ *
+ * Timestamp values do not include any leap seconds (in other words, all
+ * days are considered 86400 seconds long).
+ *
+ * Timestamps with a non-empty timezone
+ * ------------------------------------
+ *
+ * If a Timestamp column has a non-empty timezone value, its epoch is
+ * 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone
+ * (the Unix epoch), regardless of the Timestamp's own timezone.
+ *
+ * Therefore, timestamp values with a non-empty timezone correspond to
+ * physical points in time together with some additional information about
+ * how the data was obtained and/or how to display it (the timezone).
+ *
+ * For example, the timestamp value 0 with the timezone string "Europe/Paris"
+ * corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the
+ * application may prefer to display it as "January 1st 1970, 01h00" in
+ * the Europe/Paris timezone (which is the same physical point in time).
+ *
+ * One consequence is that timestamp values with a non-empty timezone
+ * can be compared and ordered directly, since they all share the same
+ * well-known point of reference (the Unix epoch).
+ *
+ * Timestamps with an unset / empty timezone
+ * -----------------------------------------
+ *
+ * If a Timestamp column has no timezone value, its epoch is
+ * 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone.
+ *
+ * Therefore, timestamp values without a timezone cannot be meaningfully
+ * interpreted as physical points in time, but only as calendar / clock
+ * indications ("wall clock time") in an unspecified timezone.
+ *
+ * For example, the timestamp value 0 with an empty timezone string
+ * corresponds to "January 1st 1970, 00h00" in an unknown timezone: there
+ * is not enough information to interpret it as a well-defined physical
+ * point in time.
+ *
+ * One consequence is that timestamp values without a timezone cannot
+ * be reliably compared or ordered, since they may have different points of
+ * reference. In particular, it is *not* possible to interpret an unset
+ * or empty timezone as the same as "UTC".
+ *
+ * Conversion between timezones
+ * ----------------------------
+ *
+ * If a Timestamp column has a non-empty timezone, changing the timezone
+ * to a different non-empty value is a metadata-only operation:
+ * the timestamp values need not change as their point of reference remains
+ * the same (the Unix epoch).
+ *
+ * However, if a Timestamp column has no timezone value, changing it to a
+ * non-empty value requires to think about the desired semantics.
+ * One possibility is to assume that the original timestamp values are
+ * relative to the epoch of the timezone being set; timestamp values should
+ * then adjusted to the Unix epoch (for example, changing the timezone from
+ * empty to "Europe/Paris" would require converting the timestamp values
+ * from "Europe/Paris" to "UTC", which seems counter-intuitive but is
+ * nevertheless correct).
+ *
+ * Guidelines for encoding data from external libraries
+ * ----------------------------------------------------
*
* Date & time libraries often have multiple different data types for temporal
- * data. In order to ease interoperability between different implementations
the
+ * data. In order to ease interoperability between different implementations
the
* Arrow project has some recommendations for encoding these types into a
Timestamp
* column.
*
- * An "instant" represents a single moment in time that has no meaningful time
zone
- * or the time zone is unknown. A column of instants can also contain values
from
- * multiple time zones. To encode an instant set the timezone string to "UTC".
- *
- * A "zoned date-time" represents a single moment in time that has a meaningful
- * reference time zone. To encode a zoned date-time as a Timestamp set the
timezone
- * string to the name of the timezone. There is some ambiguity between an
instant
- * and a zoned date-time with the UTC time zone. Both of these are stored the
same.
- * Typically, this distinction does not matter. If it does, then an
application should
- * use custom metadata or an extension type to distinguish between the two
cases.
- *
- * An "offset date-time" represents a single moment in time combined with a
meaningful
- * offset from UTC. To encode an offset date-time as a Timestamp set the
timezone string
- * to the numeric time zone offset string (e.g. "+03:00").
- *
- * A "local date-time" does not represent a single moment in time. It
represents a wall
- * clock time combined with a date. Because of daylight savings time there
may multiple
- * instants that correspond to a single local date-time in any given time
zone. A
- * local date-time is often stored as a struct or a Date32/Time64 pair.
However, it can
- * also be encoded into a Timestamp column. To do so the value should be the
the time
- * elapsed from the Unix epoch so that a wall clock in UTC would display the
desired time.
- * The timezone string should be set to null or the empty string.
+ * An "instant" represents a physical point in time that has no relevant
timezone
+ * (for example, astronomical data). To encode an instant, use a Timestamp with
+ * the timezone string set to "UTC", and make sure the Timestamp values
+ * are relative to the UTC epoch (January 1st 1970, midnight).
+ *
+ * A "zoned date-time" represents a physical point in time annotated with an
+ * informative timezone (for example, the timezone in which the data was
+ * recorded). To encode a zoned date-time, use a Timestamp with the timezone
+ * string set to the name of the timezone, and make sure the Timestamp values
+ * are relative to the UTC epoch (January 1st 1970, midnight).
+ *
+ * (There is some ambiguity between an instant and a zoned date-time with the
+ * UTC timezone. Both of these are stored the same in Arrow. Typically,
+ * this distinction does not matter. If it does, then an application should
+ * use custom metadata or an extension type to distinguish between the two
cases.)
+ *
+ * An "offset date-time" represents a physical point in time combined with an
+ * explicit offset from UTC. To encode an offset date-time, use a Timestamp
+ * with the timezone string set to the numeric timezone offset string
+ * (e.g. "+03:00"), and make sure the Timestamp values are relative to
+ * the UTC epoch (January 1st 1970, midnight).
+ *
+ * A "naive date-time" (also called "local date-time" in some libraries)
+ * represents a wall clock time combined with a calendar date, but with
+ * no indication of how to map this information to a physical point in time.
+ * Naive date-times must be handled with care because of this missing
+ * information, and also because daylight saving time (DST) may make
+ * some values ambiguous or non-existent. A naive date-time may be
+ * stored as a struct with Date and Time fields. However, it may also be
+ * encoded into a Timestamp column with an empty timezone. The timestamp
+ * values should be computed "as if" the timezone of the date-time values
+ * was UTC; for example, the naive date-time "January 1st 1970, 00h00" would
+ * be encoded as timestamp value 0.
*/
public final class Timestamp extends Table {
public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
@@ -66,24 +140,16 @@ public final class Timestamp extends Table {
public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o +
bb_pos) : 0; }
/**
- * The time zone is a string indicating the name of a time zone, one of:
+ * The timezone is an optional string indicating the name of a timezone,
+ * one of:
*
- * * As used in the Olson time zone database (the "tz database" or
- * "tzdata"), such as "America/New_York"
- * * An absolute time zone offset of the form +XX:XX or -XX:XX, such as
+07:30
+ * * As used in the Olson timezone database (the "tz database" or
+ * "tzdata"), such as "America/New_York".
+ * * An absolute timezone offset of the form "+XX:XX" or "-XX:XX",
+ * such as "+07:30".
*
* Whether a timezone string is present indicates different semantics about
- * the data:
- *
- * * If the time zone is null or an empty string, the data is a local
date-time
- * and does not represent a single moment in time. Instead it represents
a wall clock
- * time and care should be taken to avoid interpreting it semantically as
an instant.
- *
- * * If the time zone is set to a valid value, values can be displayed as
- * "localized" to that time zone, even though the underlying 64-bit
- * integers are identical to the same data stored in UTC. Converting
- * between time zones is a metadata-only operation and does not change the
- * underlying values
+ * the data (see above).
*/
public String timezone() { int o = __offset(6); return o != 0 ? __string(o +
bb_pos) : null; }
public ByteBuffer timezoneAsByteBuffer() { return __vector_as_bytebuffer(6,
1); }
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
b/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
index 5f1a550cff..29248bb23c 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
@@ -47,8 +47,13 @@ public final class Type {
public static final byte LargeBinary = 19;
public static final byte LargeUtf8 = 20;
public static final byte LargeList = 21;
+ public static final byte RunEndEncoded = 22;
+ public static final byte BinaryView = 23;
+ public static final byte Utf8View = 24;
+ public static final byte ListView = 25;
+ public static final byte LargeListView = 26;
- public static final String[] names = { "NONE", "Null", "Int",
"FloatingPoint", "Binary", "Utf8", "Bool", "Decimal", "Date", "Time",
"Timestamp", "Interval", "List", "Struct_", "Union", "FixedSizeBinary",
"FixedSizeList", "Map", "Duration", "LargeBinary", "LargeUtf8", "LargeList", };
+ public static final String[] names = { "NONE", "Null", "Int",
"FloatingPoint", "Binary", "Utf8", "Bool", "Decimal", "Date", "Time",
"Timestamp", "Interval", "List", "Struct_", "Union", "FixedSizeBinary",
"FixedSizeList", "Map", "Duration", "LargeBinary", "LargeUtf8", "LargeList",
"RunEndEncoded", "BinaryView", "Utf8View", "ListView", "LargeListView", };
public static String name(int e) { return names[e]; }
}
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
b/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8View.java
similarity index 50%
copy from java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
copy to java/format/src/main/java/org/apache/arrow/flatbuf/Utf8View.java
index b2fcc9e39e..035c977576 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8View.java
@@ -25,32 +25,24 @@ import com.google.flatbuffers.*;
@SuppressWarnings("unused")
/**
- * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
- * epoch (1970-01-01), stored in either of two units:
+ * Logically the same as Utf8, but the internal representation uses a view
+ * struct that contains the string length and either the string's entire data
+ * inline (for small strings) or an inlined prefix, an index of another buffer,
+ * and an offset pointing to a slice in that buffer (for non-small strings).
*
- * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
- * leap seconds), where the values are evenly divisible by 86400000
- * * Days (32 bits) since the UNIX epoch
+ * Since it uses a variable number of data buffers, each Field with this type
+ * must have a corresponding entry in `variadicBufferCounts`.
*/
-public final class Date extends Table {
+public final class Utf8View extends Table {
public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
- public static Date getRootAsDate(ByteBuffer _bb) { return getRootAsDate(_bb,
new Date()); }
- public static Date getRootAsDate(ByteBuffer _bb, Date obj) {
_bb.order(ByteOrder.LITTLE_ENDIAN); return
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+ public static Utf8View getRootAsUtf8View(ByteBuffer _bb) { return
getRootAsUtf8View(_bb, new Utf8View()); }
+ public static Utf8View getRootAsUtf8View(ByteBuffer _bb, Utf8View obj) {
_bb.order(ByteOrder.LITTLE_ENDIAN); return
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
- public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this;
}
+ public Utf8View __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return
this; }
- public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o +
bb_pos) : 1; }
- public static int createDate(FlatBufferBuilder builder,
- short unit) {
- builder.startTable(1);
- Date.addUnit(builder, unit);
- return Date.endDate(builder);
- }
-
- public static void startDate(FlatBufferBuilder builder) {
builder.startTable(1); }
- public static void addUnit(FlatBufferBuilder builder, short unit) {
builder.addShort(0, unit, 1); }
- public static int endDate(FlatBufferBuilder builder) {
+ public static void startUtf8View(FlatBufferBuilder builder) {
builder.startTable(0); }
+ public static int endUtf8View(FlatBufferBuilder builder) {
int o = builder.endTable();
return o;
}
@@ -58,8 +50,8 @@ public final class Date extends Table {
public static final class Vector extends BaseVector {
public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) {
__reset(_vector, _element_size, _bb); return this; }
- public Date get(int j) { return get(new Date(), j); }
- public Date get(Date obj, int j) { return
obj.__assign(__indirect(__element(j), bb), bb); }
+ public Utf8View get(int j) { return get(new Utf8View(), j); }
+ public Utf8View get(Utf8View obj, int j) { return
obj.__assign(__indirect(__element(j), bb), bb); }
}
}