This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 1f71014435 GH-38648: [Java] Regenerate Flatbuffers (#38650)
1f71014435 is described below

commit 1f71014435fd56c915aebe0a9ac982f6e8de6f94
Author: Dane Pitkin <[email protected]>
AuthorDate: Thu Nov 9 10:53:55 2023 -0500

    GH-38648: [Java] Regenerate Flatbuffers (#38650)
    
    ### Rationale for this change
    
    Regenerate the Flatbuffers to include new formats. Flatbuffers are always 
forwards/backwards compatible as long as the code is generated from the same 
`flatc` compiler version.
    
    ### What changes are included in this PR?
    
    * Flatbuffers regenerated with `flatc` v1.12.0
    
    ### Are these changes tested?
    
    Yes, via unit tests.
    
    ### Are there any user-facing changes?
    
    Yes, RecordBatch.java was modified.
    * Closes: #38648
    
    Authored-by: Dane Pitkin <[email protected]>
    Signed-off-by: David Li <[email protected]>
---
 .../arrow/flatbuf/{Date.java => BinaryView.java}   |  36 ++---
 .../org/apache/arrow/flatbuf/BodyCompression.java  |   3 +-
 .../main/java/org/apache/arrow/flatbuf/Date.java   |   4 +-
 .../flatbuf/{Date.java => LargeListView.java}      |  33 ++---
 .../arrow/flatbuf/{Date.java => ListView.java}     |  34 ++---
 .../java/org/apache/arrow/flatbuf/RecordBatch.java |  33 ++++-
 .../flatbuf/{Date.java => RunEndEncoded.java}      |  36 ++---
 .../main/java/org/apache/arrow/flatbuf/Time.java   |  17 ++-
 .../java/org/apache/arrow/flatbuf/Timestamp.java   | 148 +++++++++++++++------
 .../main/java/org/apache/arrow/flatbuf/Type.java   |   7 +-
 .../arrow/flatbuf/{Date.java => Utf8View.java}     |  36 ++---
 11 files changed, 223 insertions(+), 164 deletions(-)

diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java 
b/java/format/src/main/java/org/apache/arrow/flatbuf/BinaryView.java
similarity index 50%
copy from java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
copy to java/format/src/main/java/org/apache/arrow/flatbuf/BinaryView.java
index b2fcc9e39e..56a8d32953 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/BinaryView.java
@@ -25,32 +25,24 @@ import com.google.flatbuffers.*;
 
 @SuppressWarnings("unused")
 /**
- * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
- * epoch (1970-01-01), stored in either of two units:
+ * Logically the same as Binary, but the internal representation uses a view
+ * struct that contains the string length and either the string's entire data
+ * inline (for small strings) or an inlined prefix, an index of another buffer,
+ * and an offset pointing to a slice in that buffer (for non-small strings).
  *
- * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
- *   leap seconds), where the values are evenly divisible by 86400000
- * * Days (32 bits) since the UNIX epoch
+ * Since it uses a variable number of data buffers, each Field with this type
+ * must have a corresponding entry in `variadicBufferCounts`.
  */
-public final class Date extends Table {
+public final class BinaryView extends Table {
   public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
-  public static Date getRootAsDate(ByteBuffer _bb) { return getRootAsDate(_bb, 
new Date()); }
-  public static Date getRootAsDate(ByteBuffer _bb, Date obj) { 
_bb.order(ByteOrder.LITTLE_ENDIAN); return 
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public static BinaryView getRootAsBinaryView(ByteBuffer _bb) { return 
getRootAsBinaryView(_bb, new BinaryView()); }
+  public static BinaryView getRootAsBinaryView(ByteBuffer _bb, BinaryView obj) 
{ _bb.order(ByteOrder.LITTLE_ENDIAN); return 
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
   public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
-  public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; 
}
+  public BinaryView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return 
this; }
 
-  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + 
bb_pos) : 1; }
 
-  public static int createDate(FlatBufferBuilder builder,
-      short unit) {
-    builder.startTable(1);
-    Date.addUnit(builder, unit);
-    return Date.endDate(builder);
-  }
-
-  public static void startDate(FlatBufferBuilder builder) { 
builder.startTable(1); }
-  public static void addUnit(FlatBufferBuilder builder, short unit) { 
builder.addShort(0, unit, 1); }
-  public static int endDate(FlatBufferBuilder builder) {
+  public static void startBinaryView(FlatBufferBuilder builder) { 
builder.startTable(0); }
+  public static int endBinaryView(FlatBufferBuilder builder) {
     int o = builder.endTable();
     return o;
   }
@@ -58,8 +50,8 @@ public final class Date extends Table {
   public static final class Vector extends BaseVector {
     public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { 
__reset(_vector, _element_size, _bb); return this; }
 
-    public Date get(int j) { return get(new Date(), j); }
-    public Date get(Date obj, int j) {  return 
obj.__assign(__indirect(__element(j), bb), bb); }
+    public BinaryView get(int j) { return get(new BinaryView(), j); }
+    public BinaryView get(BinaryView obj, int j) {  return 
obj.__assign(__indirect(__element(j), bb), bb); }
   }
 }
 
diff --git 
a/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java 
b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
index 650454eb15..ed8ce0939a 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java
@@ -37,7 +37,8 @@ public final class BodyCompression extends Table {
   public BodyCompression __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); 
return this; }
 
   /**
-   * Compressor library
+   * Compressor library.
+   * For LZ4_FRAME, each compressed buffer must consist of a single frame.
    */
   public byte codec() { int o = __offset(4); return o != 0 ? bb.get(o + 
bb_pos) : 0; }
   /**
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java 
b/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
index b2fcc9e39e..ac6e389835 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
@@ -25,8 +25,8 @@ import com.google.flatbuffers.*;
 
 @SuppressWarnings("unused")
 /**
- * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
- * epoch (1970-01-01), stored in either of two units:
+ * Date is either a 32-bit or 64-bit signed integer type representing an
+ * elapsed time since UNIX epoch (1970-01-01), stored in either of two units:
  *
  * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
  *   leap seconds), where the values are evenly divisible by 86400000
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java 
b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeListView.java
similarity index 50%
copy from java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
copy to java/format/src/main/java/org/apache/arrow/flatbuf/LargeListView.java
index b2fcc9e39e..08c31c23a9 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeListView.java
@@ -25,32 +25,19 @@ import com.google.flatbuffers.*;
 
 @SuppressWarnings("unused")
 /**
- * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
- * epoch (1970-01-01), stored in either of two units:
- *
- * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
- *   leap seconds), where the values are evenly divisible by 86400000
- * * Days (32 bits) since the UNIX epoch
+ * Same as ListView, but with 64-bit offsets and sizes, allowing to represent
+ * extremely large data values.
  */
-public final class Date extends Table {
+public final class LargeListView extends Table {
   public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
-  public static Date getRootAsDate(ByteBuffer _bb) { return getRootAsDate(_bb, 
new Date()); }
-  public static Date getRootAsDate(ByteBuffer _bb, Date obj) { 
_bb.order(ByteOrder.LITTLE_ENDIAN); return 
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public static LargeListView getRootAsLargeListView(ByteBuffer _bb) { return 
getRootAsLargeListView(_bb, new LargeListView()); }
+  public static LargeListView getRootAsLargeListView(ByteBuffer _bb, 
LargeListView obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return 
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
   public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
-  public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; 
}
+  public LargeListView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); 
return this; }
 
-  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + 
bb_pos) : 1; }
 
-  public static int createDate(FlatBufferBuilder builder,
-      short unit) {
-    builder.startTable(1);
-    Date.addUnit(builder, unit);
-    return Date.endDate(builder);
-  }
-
-  public static void startDate(FlatBufferBuilder builder) { 
builder.startTable(1); }
-  public static void addUnit(FlatBufferBuilder builder, short unit) { 
builder.addShort(0, unit, 1); }
-  public static int endDate(FlatBufferBuilder builder) {
+  public static void startLargeListView(FlatBufferBuilder builder) { 
builder.startTable(0); }
+  public static int endLargeListView(FlatBufferBuilder builder) {
     int o = builder.endTable();
     return o;
   }
@@ -58,8 +45,8 @@ public final class Date extends Table {
   public static final class Vector extends BaseVector {
     public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { 
__reset(_vector, _element_size, _bb); return this; }
 
-    public Date get(int j) { return get(new Date(), j); }
-    public Date get(Date obj, int j) {  return 
obj.__assign(__indirect(__element(j), bb), bb); }
+    public LargeListView get(int j) { return get(new LargeListView(), j); }
+    public LargeListView get(LargeListView obj, int j) {  return 
obj.__assign(__indirect(__element(j), bb), bb); }
   }
 }
 
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java 
b/java/format/src/main/java/org/apache/arrow/flatbuf/ListView.java
similarity index 50%
copy from java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
copy to java/format/src/main/java/org/apache/arrow/flatbuf/ListView.java
index b2fcc9e39e..2c9ad4c13d 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/ListView.java
@@ -25,32 +25,20 @@ import com.google.flatbuffers.*;
 
 @SuppressWarnings("unused")
 /**
- * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
- * epoch (1970-01-01), stored in either of two units:
- *
- * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
- *   leap seconds), where the values are evenly divisible by 86400000
- * * Days (32 bits) since the UNIX epoch
+ * Represents the same logical types that List can, but contains offsets and
+ * sizes allowing for writes in any order and sharing of child values among
+ * list values.
  */
-public final class Date extends Table {
+public final class ListView extends Table {
   public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
-  public static Date getRootAsDate(ByteBuffer _bb) { return getRootAsDate(_bb, 
new Date()); }
-  public static Date getRootAsDate(ByteBuffer _bb, Date obj) { 
_bb.order(ByteOrder.LITTLE_ENDIAN); return 
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public static ListView getRootAsListView(ByteBuffer _bb) { return 
getRootAsListView(_bb, new ListView()); }
+  public static ListView getRootAsListView(ByteBuffer _bb, ListView obj) { 
_bb.order(ByteOrder.LITTLE_ENDIAN); return 
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
   public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
-  public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; 
}
+  public ListView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return 
this; }
 
-  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + 
bb_pos) : 1; }
 
-  public static int createDate(FlatBufferBuilder builder,
-      short unit) {
-    builder.startTable(1);
-    Date.addUnit(builder, unit);
-    return Date.endDate(builder);
-  }
-
-  public static void startDate(FlatBufferBuilder builder) { 
builder.startTable(1); }
-  public static void addUnit(FlatBufferBuilder builder, short unit) { 
builder.addShort(0, unit, 1); }
-  public static int endDate(FlatBufferBuilder builder) {
+  public static void startListView(FlatBufferBuilder builder) { 
builder.startTable(0); }
+  public static int endListView(FlatBufferBuilder builder) {
     int o = builder.endTable();
     return o;
   }
@@ -58,8 +46,8 @@ public final class Date extends Table {
   public static final class Vector extends BaseVector {
     public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { 
__reset(_vector, _element_size, _bb); return this; }
 
-    public Date get(int j) { return get(new Date(), j); }
-    public Date get(Date obj, int j) {  return 
obj.__assign(__indirect(__element(j), bb), bb); }
+    public ListView get(int j) { return get(new ListView(), j); }
+    public ListView get(ListView obj, int j) {  return 
obj.__assign(__indirect(__element(j), bb), bb); }
   }
 }
 
diff --git 
a/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java 
b/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java
index eb814e07dc..ce907ee0fd 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java
@@ -67,27 +67,54 @@ public final class RecordBatch extends Table {
    */
   public org.apache.arrow.flatbuf.BodyCompression compression() { return 
compression(new org.apache.arrow.flatbuf.BodyCompression()); }
   public org.apache.arrow.flatbuf.BodyCompression 
compression(org.apache.arrow.flatbuf.BodyCompression obj) { int o = 
__offset(10); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; }
+  /**
+   * Some types such as Utf8View are represented using a variable number of 
buffers.
+   * For each such Field in the pre-ordered flattened logical schema, there 
will be
+   * an entry in variadicBufferCounts to indicate the number of number of 
variadic
+   * buffers which belong to that Field in the current RecordBatch.
+   *
+   * For example, the schema
+   *     col1: Struct<alpha: Int32, beta: BinaryView, gamma: Float64>
+   *     col2: Utf8View
+   * contains two Fields with variadic buffers so variadicBufferCounts will 
have
+   * two entries, the first counting the variadic buffers of `col1.beta` and 
the
+   * second counting `col2`'s.
+   *
+   * This field may be omitted if and only if the schema contains no Fields 
with
+   * a variable number of buffers, such as BinaryView and Utf8View.
+   */
+  public long variadicBufferCounts(int j) { int o = __offset(12); return o != 
0 ? bb.getLong(__vector(o) + j * 8) : 0; }
+  public int variadicBufferCountsLength() { int o = __offset(12); return o != 
0 ? __vector_len(o) : 0; }
+  public LongVector variadicBufferCountsVector() { return 
variadicBufferCountsVector(new LongVector()); }
+  public LongVector variadicBufferCountsVector(LongVector obj) { int o = 
__offset(12); return o != 0 ? obj.__assign(__vector(o), bb) : null; }
+  public ByteBuffer variadicBufferCountsAsByteBuffer() { return 
__vector_as_bytebuffer(12, 8); }
+  public ByteBuffer variadicBufferCountsInByteBuffer(ByteBuffer _bb) { return 
__vector_in_bytebuffer(_bb, 12, 8); }
 
   public static int createRecordBatch(FlatBufferBuilder builder,
       long length,
       int nodesOffset,
       int buffersOffset,
-      int compressionOffset) {
-    builder.startTable(4);
+      int compressionOffset,
+      int variadicBufferCountsOffset) {
+    builder.startTable(5);
     RecordBatch.addLength(builder, length);
+    RecordBatch.addVariadicBufferCounts(builder, variadicBufferCountsOffset);
     RecordBatch.addCompression(builder, compressionOffset);
     RecordBatch.addBuffers(builder, buffersOffset);
     RecordBatch.addNodes(builder, nodesOffset);
     return RecordBatch.endRecordBatch(builder);
   }
 
-  public static void startRecordBatch(FlatBufferBuilder builder) { 
builder.startTable(4); }
+  public static void startRecordBatch(FlatBufferBuilder builder) { 
builder.startTable(5); }
   public static void addLength(FlatBufferBuilder builder, long length) { 
builder.addLong(0, length, 0L); }
   public static void addNodes(FlatBufferBuilder builder, int nodesOffset) { 
builder.addOffset(1, nodesOffset, 0); }
   public static void startNodesVector(FlatBufferBuilder builder, int numElems) 
{ builder.startVector(16, numElems, 8); }
   public static void addBuffers(FlatBufferBuilder builder, int buffersOffset) 
{ builder.addOffset(2, buffersOffset, 0); }
   public static void startBuffersVector(FlatBufferBuilder builder, int 
numElems) { builder.startVector(16, numElems, 8); }
   public static void addCompression(FlatBufferBuilder builder, int 
compressionOffset) { builder.addOffset(3, compressionOffset, 0); }
+  public static void addVariadicBufferCounts(FlatBufferBuilder builder, int 
variadicBufferCountsOffset) { builder.addOffset(4, variadicBufferCountsOffset, 
0); }
+  public static int createVariadicBufferCountsVector(FlatBufferBuilder 
builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = 
data.length - 1; i >= 0; i--) builder.addLong(data[i]); return 
builder.endVector(); }
+  public static void startVariadicBufferCountsVector(FlatBufferBuilder 
builder, int numElems) { builder.startVector(8, numElems, 8); }
   public static int endRecordBatch(FlatBufferBuilder builder) {
     int o = builder.endTable();
     return o;
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java 
b/java/format/src/main/java/org/apache/arrow/flatbuf/RunEndEncoded.java
similarity index 50%
copy from java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
copy to java/format/src/main/java/org/apache/arrow/flatbuf/RunEndEncoded.java
index b2fcc9e39e..d48733ef0c 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/RunEndEncoded.java
@@ -25,32 +25,22 @@ import com.google.flatbuffers.*;
 
 @SuppressWarnings("unused")
 /**
- * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
- * epoch (1970-01-01), stored in either of two units:
- *
- * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
- *   leap seconds), where the values are evenly divisible by 86400000
- * * Days (32 bits) since the UNIX epoch
+ * Contains two child arrays, run_ends and values.
+ * The run_ends child array must be a 16/32/64-bit integer array
+ * which encodes the indices at which the run with the value in 
+ * each corresponding index in the values child array ends.
+ * Like list/struct types, the value array can be of any type.
  */
-public final class Date extends Table {
+public final class RunEndEncoded extends Table {
   public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
-  public static Date getRootAsDate(ByteBuffer _bb) { return getRootAsDate(_bb, 
new Date()); }
-  public static Date getRootAsDate(ByteBuffer _bb, Date obj) { 
_bb.order(ByteOrder.LITTLE_ENDIAN); return 
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public static RunEndEncoded getRootAsRunEndEncoded(ByteBuffer _bb) { return 
getRootAsRunEndEncoded(_bb, new RunEndEncoded()); }
+  public static RunEndEncoded getRootAsRunEndEncoded(ByteBuffer _bb, 
RunEndEncoded obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return 
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
   public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
-  public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; 
}
+  public RunEndEncoded __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); 
return this; }
 
-  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + 
bb_pos) : 1; }
 
-  public static int createDate(FlatBufferBuilder builder,
-      short unit) {
-    builder.startTable(1);
-    Date.addUnit(builder, unit);
-    return Date.endDate(builder);
-  }
-
-  public static void startDate(FlatBufferBuilder builder) { 
builder.startTable(1); }
-  public static void addUnit(FlatBufferBuilder builder, short unit) { 
builder.addShort(0, unit, 1); }
-  public static int endDate(FlatBufferBuilder builder) {
+  public static void startRunEndEncoded(FlatBufferBuilder builder) { 
builder.startTable(0); }
+  public static int endRunEndEncoded(FlatBufferBuilder builder) {
     int o = builder.endTable();
     return o;
   }
@@ -58,8 +48,8 @@ public final class Date extends Table {
   public static final class Vector extends BaseVector {
     public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { 
__reset(_vector, _element_size, _bb); return this; }
 
-    public Date get(int j) { return get(new Date(), j); }
-    public Date get(Date obj, int j) {  return 
obj.__assign(__indirect(__element(j), bb), bb); }
+    public RunEndEncoded get(int j) { return get(new RunEndEncoded(), j); }
+    public RunEndEncoded get(RunEndEncoded obj, int j) {  return 
obj.__assign(__indirect(__element(j), bb), bb); }
   }
 }
 
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java 
b/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java
index 596d403a3e..9acc3fc7a5 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java
@@ -25,9 +25,20 @@ import com.google.flatbuffers.*;
 
 @SuppressWarnings("unused")
 /**
- * Time type. The physical storage type depends on the unit
- * - SECOND and MILLISECOND: 32 bits
- * - MICROSECOND and NANOSECOND: 64 bits
+ * Time is either a 32-bit or 64-bit signed integer type representing an
+ * elapsed time since midnight, stored in either of four units: seconds,
+ * milliseconds, microseconds or nanoseconds.
+ *
+ * The integer `bitWidth` depends on the `unit` and must be one of the 
following:
+ * * SECOND and MILLISECOND: 32 bits
+ * * MICROSECOND and NANOSECOND: 64 bits
+ *
+ * The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds
+ * (exclusive), adjusted for the time unit (for example, up to 86400000
+ * exclusive for the MILLISECOND unit).
+ * This definition doesn't allow for leap seconds. Time values from
+ * measurements with leap seconds will need to be corrected when ingesting
+ * into Arrow (for example by replacing the value 86400 with 86399).
  */
 public final class Time extends Table {
   public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java 
b/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java
index 041452607c..fe0c6aaea2 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java
@@ -25,37 +25,111 @@ import com.google.flatbuffers.*;
 
 @SuppressWarnings("unused")
 /**
- * Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding
- * leap seconds, as a 64-bit integer. Note that UNIX time does not include
- * leap seconds.
+ * Timestamp is a 64-bit signed integer representing an elapsed time since a
+ * fixed epoch, stored in either of four units: seconds, milliseconds,
+ * microseconds or nanoseconds, and is optionally annotated with a timezone.
+ *
+ * Timestamp values do not include any leap seconds (in other words, all
+ * days are considered 86400 seconds long).
+ *
+ * Timestamps with a non-empty timezone
+ * ------------------------------------
+ *
+ * If a Timestamp column has a non-empty timezone value, its epoch is
+ * 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone
+ * (the Unix epoch), regardless of the Timestamp's own timezone.
+ *
+ * Therefore, timestamp values with a non-empty timezone correspond to
+ * physical points in time together with some additional information about
+ * how the data was obtained and/or how to display it (the timezone).
+ *
+ *   For example, the timestamp value 0 with the timezone string "Europe/Paris"
+ *   corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the
+ *   application may prefer to display it as "January 1st 1970, 01h00" in
+ *   the Europe/Paris timezone (which is the same physical point in time).
+ *
+ * One consequence is that timestamp values with a non-empty timezone
+ * can be compared and ordered directly, since they all share the same
+ * well-known point of reference (the Unix epoch).
+ *
+ * Timestamps with an unset / empty timezone
+ * -----------------------------------------
+ *
+ * If a Timestamp column has no timezone value, its epoch is
+ * 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone.
+ *
+ * Therefore, timestamp values without a timezone cannot be meaningfully
+ * interpreted as physical points in time, but only as calendar / clock
+ * indications ("wall clock time") in an unspecified timezone.
+ *
+ *   For example, the timestamp value 0 with an empty timezone string
+ *   corresponds to "January 1st 1970, 00h00" in an unknown timezone: there
+ *   is not enough information to interpret it as a well-defined physical
+ *   point in time.
+ *
+ * One consequence is that timestamp values without a timezone cannot
+ * be reliably compared or ordered, since they may have different points of
+ * reference.  In particular, it is *not* possible to interpret an unset
+ * or empty timezone as the same as "UTC".
+ *
+ * Conversion between timezones
+ * ----------------------------
+ *
+ * If a Timestamp column has a non-empty timezone, changing the timezone
+ * to a different non-empty value is a metadata-only operation:
+ * the timestamp values need not change as their point of reference remains
+ * the same (the Unix epoch).
+ *
+ * However, if a Timestamp column has no timezone value, changing it to a
+ * non-empty value requires to think about the desired semantics.
+ * One possibility is to assume that the original timestamp values are
+ * relative to the epoch of the timezone being set; timestamp values should
+ * then adjusted to the Unix epoch (for example, changing the timezone from
+ * empty to "Europe/Paris" would require converting the timestamp values
+ * from "Europe/Paris" to "UTC", which seems counter-intuitive but is
+ * nevertheless correct).
+ *
+ * Guidelines for encoding data from external libraries
+ * ----------------------------------------------------
  *
  * Date & time libraries often have multiple different data types for temporal
- * data.  In order to ease interoperability between different implementations 
the
+ * data. In order to ease interoperability between different implementations 
the
  * Arrow project has some recommendations for encoding these types into a 
Timestamp
  * column.
  *
- * An "instant" represents a single moment in time that has no meaningful time 
zone
- * or the time zone is unknown.  A column of instants can also contain values 
from
- * multiple time zones.  To encode an instant set the timezone string to "UTC".
- *
- * A "zoned date-time" represents a single moment in time that has a meaningful
- * reference time zone.  To encode a zoned date-time as a Timestamp set the 
timezone
- * string to the name of the timezone.  There is some ambiguity between an 
instant
- * and a zoned date-time with the UTC time zone.  Both of these are stored the 
same.
- * Typically, this distinction does not matter.  If it does, then an 
application should
- * use custom metadata or an extension type to distinguish between the two 
cases.
- *
- * An "offset date-time" represents a single moment in time combined with a 
meaningful
- * offset from UTC.  To encode an offset date-time as a Timestamp set the 
timezone string
- * to the numeric time zone offset string (e.g. "+03:00").
- *
- * A "local date-time" does not represent a single moment in time.  It 
represents a wall
- * clock time combined with a date.  Because of daylight savings time there 
may multiple
- * instants that correspond to a single local date-time in any given time 
zone.  A
- * local date-time is often stored as a struct or a Date32/Time64 pair.  
However, it can
- * also be encoded into a Timestamp column.  To do so the value should be the 
the time
- * elapsed from the Unix epoch so that a wall clock in UTC would display the 
desired time.
- * The timezone string should be set to null or the empty string.
+ * An "instant" represents a physical point in time that has no relevant 
timezone
+ * (for example, astronomical data). To encode an instant, use a Timestamp with
+ * the timezone string set to "UTC", and make sure the Timestamp values
+ * are relative to the UTC epoch (January 1st 1970, midnight).
+ *
+ * A "zoned date-time" represents a physical point in time annotated with an
+ * informative timezone (for example, the timezone in which the data was
+ * recorded).  To encode a zoned date-time, use a Timestamp with the timezone
+ * string set to the name of the timezone, and make sure the Timestamp values
+ * are relative to the UTC epoch (January 1st 1970, midnight).
+ *
+ *  (There is some ambiguity between an instant and a zoned date-time with the
+ *   UTC timezone.  Both of these are stored the same in Arrow.  Typically,
+ *   this distinction does not matter.  If it does, then an application should
+ *   use custom metadata or an extension type to distinguish between the two 
cases.)
+ *
+ * An "offset date-time" represents a physical point in time combined with an
+ * explicit offset from UTC.  To encode an offset date-time, use a Timestamp
+ * with the timezone string set to the numeric timezone offset string
+ * (e.g. "+03:00"), and make sure the Timestamp values are relative to
+ * the UTC epoch (January 1st 1970, midnight).
+ *
+ * A "naive date-time" (also called "local date-time" in some libraries)
+ * represents a wall clock time combined with a calendar date, but with
+ * no indication of how to map this information to a physical point in time.
+ * Naive date-times must be handled with care because of this missing
+ * information, and also because daylight saving time (DST) may make
+ * some values ambiguous or non-existent. A naive date-time may be
+ * stored as a struct with Date and Time fields. However, it may also be
+ * encoded into a Timestamp column with an empty timezone. The timestamp
+ * values should be computed "as if" the timezone of the date-time values
+ * was UTC; for example, the naive date-time "January 1st 1970, 00h00" would
+ * be encoded as timestamp value 0.
  */
 public final class Timestamp extends Table {
   public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
@@ -66,24 +140,16 @@ public final class Timestamp extends Table {
 
   public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + 
bb_pos) : 0; }
   /**
-   * The time zone is a string indicating the name of a time zone, one of:
+   * The timezone is an optional string indicating the name of a timezone,
+   * one of:
    *
-   * * As used in the Olson time zone database (the "tz database" or
-   *   "tzdata"), such as "America/New_York"
-   * * An absolute time zone offset of the form +XX:XX or -XX:XX, such as 
+07:30
+   * * As used in the Olson timezone database (the "tz database" or
+   *   "tzdata"), such as "America/New_York".
+   * * An absolute timezone offset of the form "+XX:XX" or "-XX:XX",
+   *   such as "+07:30".
    *
    * Whether a timezone string is present indicates different semantics about
-   * the data:
-   *
-   * * If the time zone is null or an empty string, the data is a local 
date-time
-   *   and does not represent a single moment in time.  Instead it represents 
a wall clock
-   *   time and care should be taken to avoid interpreting it semantically as 
an instant.
-   *
-   * * If the time zone is set to a valid value, values can be displayed as
-   *   "localized" to that time zone, even though the underlying 64-bit
-   *   integers are identical to the same data stored in UTC. Converting
-   *   between time zones is a metadata-only operation and does not change the
-   *   underlying values
+   * the data (see above).
    */
   public String timezone() { int o = __offset(6); return o != 0 ? __string(o + 
bb_pos) : null; }
   public ByteBuffer timezoneAsByteBuffer() { return __vector_as_bytebuffer(6, 
1); }
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java 
b/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
index 5f1a550cff..29248bb23c 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java
@@ -47,8 +47,13 @@ public final class Type {
   public static final byte LargeBinary = 19;
   public static final byte LargeUtf8 = 20;
   public static final byte LargeList = 21;
+  public static final byte RunEndEncoded = 22;
+  public static final byte BinaryView = 23;
+  public static final byte Utf8View = 24;
+  public static final byte ListView = 25;
+  public static final byte LargeListView = 26;
 
-  public static final String[] names = { "NONE", "Null", "Int", 
"FloatingPoint", "Binary", "Utf8", "Bool", "Decimal", "Date", "Time", 
"Timestamp", "Interval", "List", "Struct_", "Union", "FixedSizeBinary", 
"FixedSizeList", "Map", "Duration", "LargeBinary", "LargeUtf8", "LargeList", };
+  public static final String[] names = { "NONE", "Null", "Int", 
"FloatingPoint", "Binary", "Utf8", "Bool", "Decimal", "Date", "Time", 
"Timestamp", "Interval", "List", "Struct_", "Union", "FixedSizeBinary", 
"FixedSizeList", "Map", "Duration", "LargeBinary", "LargeUtf8", "LargeList", 
"RunEndEncoded", "BinaryView", "Utf8View", "ListView", "LargeListView", };
 
   public static String name(int e) { return names[e]; }
 }
diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java 
b/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8View.java
similarity index 50%
copy from java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
copy to java/format/src/main/java/org/apache/arrow/flatbuf/Utf8View.java
index b2fcc9e39e..035c977576 100644
--- a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java
+++ b/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8View.java
@@ -25,32 +25,24 @@ import com.google.flatbuffers.*;
 
 @SuppressWarnings("unused")
 /**
- * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
- * epoch (1970-01-01), stored in either of two units:
+ * Logically the same as Utf8, but the internal representation uses a view
+ * struct that contains the string length and either the string's entire data
+ * inline (for small strings) or an inlined prefix, an index of another buffer,
+ * and an offset pointing to a slice in that buffer (for non-small strings).
  *
- * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
- *   leap seconds), where the values are evenly divisible by 86400000
- * * Days (32 bits) since the UNIX epoch
+ * Since it uses a variable number of data buffers, each Field with this type
+ * must have a corresponding entry in `variadicBufferCounts`.
  */
-public final class Date extends Table {
+public final class Utf8View extends Table {
   public static void ValidateVersion() { Constants.FLATBUFFERS_1_12_0(); }
-  public static Date getRootAsDate(ByteBuffer _bb) { return getRootAsDate(_bb, 
new Date()); }
-  public static Date getRootAsDate(ByteBuffer _bb, Date obj) { 
_bb.order(ByteOrder.LITTLE_ENDIAN); return 
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
+  public static Utf8View getRootAsUtf8View(ByteBuffer _bb) { return 
getRootAsUtf8View(_bb, new Utf8View()); }
+  public static Utf8View getRootAsUtf8View(ByteBuffer _bb, Utf8View obj) { 
_bb.order(ByteOrder.LITTLE_ENDIAN); return 
(obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); }
   public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); }
-  public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; 
}
+  public Utf8View __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return 
this; }
 
-  public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + 
bb_pos) : 1; }
 
-  public static int createDate(FlatBufferBuilder builder,
-      short unit) {
-    builder.startTable(1);
-    Date.addUnit(builder, unit);
-    return Date.endDate(builder);
-  }
-
-  public static void startDate(FlatBufferBuilder builder) { 
builder.startTable(1); }
-  public static void addUnit(FlatBufferBuilder builder, short unit) { 
builder.addShort(0, unit, 1); }
-  public static int endDate(FlatBufferBuilder builder) {
+  public static void startUtf8View(FlatBufferBuilder builder) { 
builder.startTable(0); }
+  public static int endUtf8View(FlatBufferBuilder builder) {
     int o = builder.endTable();
     return o;
   }
@@ -58,8 +50,8 @@ public final class Date extends Table {
   public static final class Vector extends BaseVector {
     public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { 
__reset(_vector, _element_size, _bb); return this; }
 
-    public Date get(int j) { return get(new Date(), j); }
-    public Date get(Date obj, int j) {  return 
obj.__assign(__indirect(__element(j), bb), bb); }
+    public Utf8View get(int j) { return get(new Utf8View(), j); }
+    public Utf8View get(Utf8View obj, int j) {  return 
obj.__assign(__indirect(__element(j), bb), bb); }
   }
 }
 


Reply via email to