This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 2df33de ARROW-4571: [Format] Tensor.fbs file has multiple root_type
declarations
2df33de is described below
commit 2df33dec5e8abc07aa6300bbd2ee634c57d8fcfe
Author: Kenta Murata <[email protected]>
AuthorDate: Mon Feb 18 10:20:33 2019 -0600
ARROW-4571: [Format] Tensor.fbs file has multiple root_type declarations
Author: Kenta Murata <[email protected]>
Closes #3651 from mrkn/separate_sparse_tensor_format and squashes the
following commits:
760cefaa9 <Kenta Murata> Add format/SparseTensor.fbs
1f92cfa5e <Kenta Murata> Separate SaprseTensor.fbs from Tensor.fbs
---
cpp/src/arrow/ipc/CMakeLists.txt | 1 +
cpp/src/arrow/ipc/metadata-internal.cc | 3 +-
docs/source/format/README.rst | 2 +-
format/Message.fbs | 1 +
format/{Tensor.fbs => SparseTensor.fbs} | 36 ++-----------
format/Tensor.fbs | 93 ---------------------------------
java/format/pom.xml | 1 +
7 files changed, 9 insertions(+), 128 deletions(-)
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index fccd53c..352b2de 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -58,6 +58,7 @@ set(FBS_SRC
${ARROW_SOURCE_DIR}/../format/File.fbs
${ARROW_SOURCE_DIR}/../format/Schema.fbs
${ARROW_SOURCE_DIR}/../format/Tensor.fbs
+ ${ARROW_SOURCE_DIR}/../format/SparseTensor.fbs
${CMAKE_CURRENT_SOURCE_DIR}/feather.fbs)
foreach(FIL ${FBS_SRC})
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc
b/cpp/src/arrow/ipc/metadata-internal.cc
index da67113..38d8eaa 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -28,7 +28,8 @@
#include "arrow/io/interfaces.h"
#include "arrow/ipc/File_generated.h" // IWYU pragma: keep
#include "arrow/ipc/Message_generated.h"
-#include "arrow/ipc/Tensor_generated.h" // IWYU pragma: keep
+#include "arrow/ipc/SparseTensor_generated.h" // IWYU pragma: keep
+#include "arrow/ipc/Tensor_generated.h" // IWYU pragma: keep
#include "arrow/ipc/message.h"
#include "arrow/ipc/util.h"
#include "arrow/sparse_tensor.h"
diff --git a/docs/source/format/README.rst b/docs/source/format/README.rst
index f2f770b..4044026 100644
--- a/docs/source/format/README.rst
+++ b/docs/source/format/README.rst
@@ -25,7 +25,7 @@ Currently, the Arrow specification consists of these pieces:
- Logical Types, Schemas, and Record Batch Metadata (see Schema.fbs)
- Encapsulated Messages (see Message.fbs)
- Mechanics of messaging between Arrow systems (IPC, RPC, etc.) (see
:doc:`IPC`)
-- Tensor (Multi-dimensional array) Metadata (see Tensor.fbs)
+- Tensor (Multi-dimensional array) Metadata (see Tensor.fbs and
SparseTensor.fbs)
The metadata currently uses Google's `flatbuffers library`_ for serializing a
couple related pieces of information:
diff --git a/format/Message.fbs b/format/Message.fbs
index e14fdca..10adaaa 100644
--- a/format/Message.fbs
+++ b/format/Message.fbs
@@ -16,6 +16,7 @@
// under the License.
include "Schema.fbs";
+include "SparseTensor.fbs";
include "Tensor.fbs";
namespace org.apache.arrow.flatbuf;
diff --git a/format/Tensor.fbs b/format/SparseTensor.fbs
similarity index 80%
copy from format/Tensor.fbs
copy to format/SparseTensor.fbs
index e77b353..0a0c6c2 100644
--- a/format/Tensor.fbs
+++ b/format/SparseTensor.fbs
@@ -15,44 +15,14 @@
// specific language governing permissions and limitations
// under the License.
-/// EXPERIMENTAL: Metadata for n-dimensional arrays, aka "tensors" or
-/// "ndarrays". Arrow implementations in general are not required to implement
-/// this type
+/// EXPERIMENTAL: Metadata for n-dimensional sparse arrays, aka "sparse
tensors".
+/// Arrow implementations in general are not required to implement this type
-include "Schema.fbs";
+include "Tensor.fbs";
namespace org.apache.arrow.flatbuf;
/// ----------------------------------------------------------------------
-/// Data structures for dense tensors
-
-/// Shape data for a single axis in a tensor
-table TensorDim {
- /// Length of dimension
- size: long;
-
- /// Name of the dimension, optional
- name: string;
-}
-
-table Tensor {
- /// The type of data contained in a value cell. Currently only fixed-width
- /// value types are supported, no strings or nested types
- type: Type;
-
- /// The dimensions of the tensor, optionally named
- shape: [TensorDim];
-
- /// Non-negative byte offsets to advance one value cell along each dimension
- strides: [long];
-
- /// The location and size of the tensor's data
- data: Buffer;
-}
-
-root_type Tensor;
-
-/// ----------------------------------------------------------------------
/// EXPERIMENTAL: Data structures for sparse tensors
/// Coodinate format of sparse tensor index.
diff --git a/format/Tensor.fbs b/format/Tensor.fbs
index e77b353..01a20c3 100644
--- a/format/Tensor.fbs
+++ b/format/Tensor.fbs
@@ -51,96 +51,3 @@ table Tensor {
}
root_type Tensor;
-
-/// ----------------------------------------------------------------------
-/// EXPERIMENTAL: Data structures for sparse tensors
-
-/// Coodinate format of sparse tensor index.
-table SparseTensorIndexCOO {
- /// COO's index list are represented as a NxM matrix,
- /// where N is the number of non-zero values,
- /// and M is the number of dimensions of a sparse tensor.
- /// indicesBuffer stores the location and size of this index matrix.
- /// The type of index value is long, so the stride for the index matrix is
unnecessary.
- ///
- /// For example, let X be a 2x3x4x5 tensor, and it has the following 6
non-zero values:
- ///
- /// X[0, 1, 2, 0] := 1
- /// X[1, 1, 2, 3] := 2
- /// X[0, 2, 1, 0] := 3
- /// X[0, 1, 3, 0] := 4
- /// X[0, 1, 2, 1] := 5
- /// X[1, 2, 0, 4] := 6
- ///
- /// In COO format, the index matrix of X is the following 4x6 matrix:
- ///
- /// [[0, 0, 0, 0, 1, 1],
- /// [1, 1, 1, 2, 1, 2],
- /// [2, 2, 3, 1, 2, 0],
- /// [0, 1, 0, 0, 3, 4]]
- ///
- /// Note that the indices are sorted in lexcographical order.
- indicesBuffer: Buffer;
-}
-
-/// Compressed Sparse Row format, that is matrix-specific.
-table SparseMatrixIndexCSR {
- /// indptrBuffer stores the location and size of indptr array that
- /// represents the range of the rows.
- /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
- /// The length of this array is 1 + (the number of rows), and the type
- /// of index value is long.
- ///
- /// For example, let X be the following 6x4 matrix:
- ///
- /// X := [[0, 1, 2, 0],
- /// [0, 0, 3, 0],
- /// [0, 4, 0, 5],
- /// [0, 0, 0, 0],
- /// [6, 0, 7, 8],
- /// [0, 9, 0, 0]].
- ///
- /// The array of non-zero values in X is:
- ///
- /// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
- ///
- /// And the indptr of X is:
- ///
- /// indptr(X) = [0, 2, 3, 5, 5, 8, 10].
- indptrBuffer: Buffer;
-
- /// indicesBuffer stores the location and size of the array that
- /// contains the column indices of the corresponding non-zero values.
- /// The type of index value is long.
- ///
- /// For example, the indices of the above X is:
- ///
- /// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
- indicesBuffer: Buffer;
-}
-
-union SparseTensorIndex {
- SparseTensorIndexCOO,
- SparseMatrixIndexCSR
-}
-
-table SparseTensor {
- /// The type of data contained in a value cell.
- /// Currently only fixed-width value types are supported,
- /// no strings or nested types.
- type: Type;
-
- /// The dimensions of the tensor, optionally named.
- shape: [TensorDim];
-
- /// The number of non-zero values in a sparse tensor.
- non_zero_length: long;
-
- /// Sparse tensor index
- sparseIndex: SparseTensorIndex;
-
- /// The location and size of the tensor's data
- data: Buffer;
-}
-
-root_type SparseTensor;
diff --git a/java/format/pom.xml b/java/format/pom.xml
index 2c3dc03..5525cd3 100644
--- a/java/format/pom.xml
+++ b/java/format/pom.xml
@@ -106,6 +106,7 @@
<argument>${flatc.generated.files}</argument>
<argument>../../format/Schema.fbs</argument>
<argument>../../format/Tensor.fbs</argument>
+ <argument>../../format/SparseTensor.fbs</argument>
<argument>../../format/File.fbs</argument>
<argument>../../format/Message.fbs</argument>
</arguments>