This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git
The following commit(s) were added to refs/heads/main by this push:
new 9e8316eb feat(spec): remove polymorphic from type id (#2054)
9e8316eb is described below
commit 9e8316eb106444f04c1f9b11b61bc88466ff1ccb
Author: Shawn Yang <[email protected]>
AuthorDate: Mon Feb 10 11:43:02 2025 +0800
feat(spec): remove polymorphic from type id (#2054)
## What does this PR do?
polymorphic info is only used when serializing fields of a struct. This
is not a generic type information, we should not include it in type id.
## Related issues
## Does this PR introduce any user-facing change?
<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/fury/issues/new/choose) describing the
need to do so and update the document if necessary.
-->
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
<!--
When the PR has an impact on performance (if you don't know whether the
PR will have an impact on performance, you can submit the PR first, and
if it will have impact on performance, the code reviewer will explain
it), be sure to attach a benchmark data here.
-->
---
cpp/fury/type/type.h | 135 +++++++-----------
docs/guide/xlang_type_mapping.md | 88 ++++++------
docs/specification/xlang_serialization_spec.md | 27 ++--
.../org/apache/fury/resolver/XtypeResolver.java | 8 --
.../src/main/java/org/apache/fury/type/Types.java | 157 +++++++++------------
python/pyfury/includes/libserialization.pxd | 55 ++++----
python/pyfury/type.py | 84 +++++------
7 files changed, 232 insertions(+), 322 deletions(-)
diff --git a/cpp/fury/type/type.h b/cpp/fury/type/type.h
index d98c0d3f..27fc2fc0 100644
--- a/cpp/fury/type/type.h
+++ b/cpp/fury/type/type.h
@@ -23,19 +23,19 @@ namespace fury {
enum class TypeId : int32_t {
// a boolean value (true or false).
BOOL = 1,
- // a 8-bit signed integer.
+ // an 8-bit signed integer.
INT8 = 2,
// a 16-bit signed integer.
INT16 = 3,
// a 32-bit signed integer.
INT32 = 4,
- // a 32-bit signed integer which use fury var_int32 encoding.
+ // a 32-bit signed integer which uses fury var_int32 encoding.
VAR_INT32 = 5,
// a 64-bit signed integer.
INT64 = 6,
- // a 64-bit signed integer which use fury PVL encoding.
+ // a 64-bit signed integer which uses fury PVL encoding.
VAR_INT64 = 7,
- // a 64-bit signed integer which use fury SLI encoding.
+ // a 64-bit signed integer which uses fury SLI encoding.
SLI_INT64 = 8,
// a 16-bit floating point number.
FLOAT16 = 9,
@@ -45,93 +45,65 @@ enum class TypeId : int32_t {
FLOAT64 = 11,
// a text string encoded using Latin1/UTF16/UTF-8 encoding.
STRING = 12,
- // a data type consisting of a set of named values. Rust enum with
- // non-predefined field values are not supported as an enum
+ // a data type consisting of a set of named values.
ENUM = 13,
// an enum whose value will be serialized as the registered name.
NAMED_ENUM = 14,
- // a morphic(final) type serialized by Fury Struct serializer. i.e. it
doesn't
- // have subclasses. Suppose we're
- // deserializing `List<SomeClass>`, we can save dynamic serializer dispatch
- // since `SomeClass` is morphic(final).
+ // a morphic(final) type serialized by Fury Struct serializer.
STRUCT = 15,
- // a type which is not morphic(not final). i.e. it has subclasses. Suppose
- // we're deserializing
- // `List<SomeClass>`, we must dispatch serializer dynamically since
- // `SomeClass` is polymorphic(non-final).
- POLYMORPHIC_STRUCT = 16,
// a morphic(final) type serialized by Fury compatible Struct serializer.
- COMPATIBLE_STRUCT = 17,
- // a non-morphic(non-final) type serialized by Fury compatible Struct
- // serializer.
- POLYMORPHIC_COMPATIBLE_STRUCT = 18,
+ COMPATIBLE_STRUCT = 16,
// a `struct` whose type mapping will be encoded as a name.
- NAMED_STRUCT = 19,
- // a `polymorphic_struct` whose type mapping will be encoded as a name.
- NAMED_POLYMORPHIC_STRUCT = 20,
+ NAMED_STRUCT = 17,
// a `compatible_struct` whose type mapping will be encoded as a name.
- NAMED_COMPATIBLE_STRUCT = 21,
- // a `polymorphic_compatible_struct` whose type mapping will be encoded as a
- // name.
- NAMED_POLYMORPHIC_COMPATIBLE_STRUCT = 22,
+ NAMED_COMPATIBLE_STRUCT = 18,
// a type which will be serialized by a customized serializer.
- EXT = 23,
- // an `ext` type which is not morphic(not final).
- POLYMORPHIC_EXT = 24,
+ EXT = 19,
// an `ext` type whose type mapping will be encoded as a name.
- NAMED_EXT = 25,
- // an `polymorphic_ext` type whose type mapping will be encoded as a name.
- NAMED_POLYMORPHIC_EXT = 26,
+ NAMED_EXT = 20,
// a sequence of objects.
- LIST = 27,
+ LIST = 21,
// an unordered set of unique elements.
- SET = 28,
- // a map of key-value pairs. Mutable types such as
- // `list/map/set/array/tensor/arrow` are not allowed as key of map.
- MAP = 29,
- // an absolute length of time, independent of any calendar/timezone, as a
- // count of nanoseconds.
- DURATION = 30,
- // a point in time, independent of any calendar/timezone, as a count of
- // nanoseconds. The count is relative
- // to an epoch at UTC midnight on January 1, 1970.
- TIMESTAMP = 31,
- // a naive date without timezone. The count is days relative to an epoch at
- // UTC midnight on Jan 1, 1970.
- LOCAL_DATE = 32,
- // exact decimal value represented as an integer value in two's complement.
- DECIMAL = 33,
- // an variable-length array of bytes.
- BINARY = 34,
- // a multidimensional array which every sub-array can have different sizes
but
- // all have same type.
- // only allow numeric components. Other arrays will be taken as List. The
- // implementation should support the
- // interoperability between array and list.
- ARRAY = 35,
- // one dimensional bool array.
- BOOL_ARRAY = 36,
- // one dimensional int16 array.
- INT8_ARRAY = 37,
- // one dimensional int16 array.
- INT16_ARRAY = 38,
- // one dimensional int32 array.
- INT32_ARRAY = 39,
- // one dimensional int64 array.
- INT64_ARRAY = 40,
- // one dimensional half_float_16 array.
- FLOAT16_ARRAY = 41,
- // one dimensional float32 array.
- FLOAT32_ARRAY = 42,
- // one dimensional float64 array.
- FLOAT64_ARRAY = 43,
- // an arrow [record
- // batch](https://arrow.apache.org/docs/cpp/tables.html#record-batches)
- // object.
- ARROW_RECORD_BATCH = 44,
- // an arrow [table](https://arrow.apache.org/docs/cpp/tables.html#tables)
- // object.
- ARROW_TABLE = 45,
+ SET = 22,
+ // a map of key-value pairs.
+ MAP = 23,
+ // an absolute length of time, independent of any calendar/timezone,
+ // as a count of nanoseconds.
+ DURATION = 24,
+ // a point in time, independent of any calendar/timezone, as a count
+ // of nanoseconds.
+ TIMESTAMP = 25,
+ // a naive date without timezone. The count is days relative to an
+ // epoch at UTC midnight on Jan 1, 1970.
+ LOCAL_DATE = 26,
+ // exact decimal value represented as an integer value in two's
+ // complement.
+ DECIMAL = 27,
+ // a variable-length array of bytes.
+ BINARY = 28,
+ // a multidimensional array with varying sub-array sizes but same type.
+ ARRAY = 29,
+ // one-dimensional boolean array.
+ BOOL_ARRAY = 30,
+ // one-dimensional int8 array.
+ INT8_ARRAY = 31,
+ // one-dimensional int16 array.
+ INT16_ARRAY = 32,
+ // one-dimensional int32 array.
+ INT32_ARRAY = 33,
+ // one-dimensional int64 array.
+ INT64_ARRAY = 34,
+ // one-dimensional float16 array.
+ FLOAT16_ARRAY = 35,
+ // one-dimensional float32 array.
+ FLOAT32_ARRAY = 36,
+ // one-dimensional float64 array.
+ FLOAT64_ARRAY = 37,
+ // an arrow record batch object.
+ ARROW_RECORD_BATCH = 38,
+ // an arrow table object.
+ ARROW_TABLE = 39,
+ // Bound value, typically used as a sentinel value.
BOUND = 64
};
@@ -139,11 +111,8 @@ inline bool IsNamespacedType(int32_t type_id) {
switch (static_cast<TypeId>(type_id)) {
case TypeId::NAMED_ENUM:
case TypeId::NAMED_STRUCT:
- case TypeId::NAMED_POLYMORPHIC_STRUCT:
case TypeId::NAMED_COMPATIBLE_STRUCT:
- case TypeId::NAMED_POLYMORPHIC_COMPATIBLE_STRUCT:
case TypeId::NAMED_EXT:
- case TypeId::NAMED_POLYMORPHIC_EXT:
return true;
default:
return false;
diff --git a/docs/guide/xlang_type_mapping.md b/docs/guide/xlang_type_mapping.md
index 13a6d05c..93898456 100644
--- a/docs/guide/xlang_type_mapping.md
+++ b/docs/guide/xlang_type_mapping.md
@@ -12,53 +12,47 @@ Note:
## Type Mapping
-| Fury Type | Fury Type ID | Java |
Python | Javascript | C++
| Golang | Rust |
-|-------------------------------------|--------------|-----------------|-----------------------------------|-----------------|--------------------------------|------------------|------------------|
-| bool | 1 | bool/Boolean | bool
| Boolean | bool
| bool | bool |
-| int8 | 2 | byte/Byte |
int/pyfury.Int8 | Type.int8() | int8_t
| int8 | i8 |
-| int16 | 3 | short/Short |
int/pyfury.Int16 | Type.int16() | int16_t
| int16 | i6 |
-| int32 | 4 | int/Integer |
int/pyfury.Int32 | Type.int32() | int32_t
| int32 | i32 |
-| var_int32 | 5 | int/Integer |
int/pyfury.VarInt32 | Type.varint32() | fury::varint32_t
| fury.varint32 | fury::varint32 |
-| int64 | 6 | long/Long |
int/pyfury.Int64 | Type.int64() | int64_t
| int64 | i64 |
-| var_int64 | 7 | long/Long |
int/pyfury.VarInt64 | Type.varint64() | fury::varint64_t
| fury.varint64 | fury::varint64 |
-| sli_int64 | 8 | long/Long |
int/pyfury.SliInt64 | Type.sliint64() | fury::sliint64_t
| fury.sliint64 | fury::sliint64 |
-| float16 | 9 | float/Float |
float/pyfury.Float16 | Type.float16() | fury::float16_t
| fury.float16 | fury::f16 |
-| float32 | 10 | float/Float |
float/pyfury.Float32 | Type.float32() | float
| float32 | f32 |
-| float64 | 11 | double/Double |
float/pyfury.Float64 | Type.float64() | double
| float64 | f64 |
-| string | 12 | String | str
| String | string
| string | String/str |
-| enum | 13 | Enum subclasses | enum
subclasses | / | enum
| / | enum |
-| named_enum | 14 | Enum subclasses | enum
subclasses | / | enum
| / | enum |
-| struct | 15 | pojo/record | data
class / type with type hints | object | struct/class
| struct | struct |
-| polymorphic_struct | 16 | pojo/record | data
class / type with type hints | object | struct/class
| struct | struct |
-| compatible_struct | 17 | pojo/record | data
class / type with type hints | object | struct/class
| struct | struct |
-| polymorphic_compatible_struct | 18 | pojo/record | data
class / type with type hints | object | struct/class
| struct | struct |
-| named_struct | 19 | pojo/record | data
class / type with type hints | object | struct/class
| struct | struct |
-| named_polymorphic_struct | 20 | pojo/record | data
class / type with type hints | object | struct/class
| struct | struct |
-| named_compatible_struct | 21 | pojo/record | data
class / type with type hints | object | struct/class
| struct | struct |
-| named_polymorphic_compatible_struct | 22 | pojo/record | data
class / type with type hints | object | struct/class
| struct | struct |
-| ext | 23 | pojo/record | data
class / type with type hints | object | struct/class
| struct | struct |
-| polymorphic_ext | 24 | pojo/record | data
class / type with type hints | object | struct/class
| struct | struct |
-| named_ext | 25 | pojo/record | data
class / type with type hints | object | struct/class
| struct | struct |
-| named_polymorphic_ext | 26 | pojo/record | data
class / type with type hints | object | struct/class
| struct | struct |
-| list | 27 | List/Collection |
list/tuple | array | vector
| slice | Vec |
-| set | 28 | Set | set
| / | set
| fury.Set | Set |
-| map | 29 | Map | dict
| Map | unordered_map
| map | HashMap |
-| duration | 30 | Duration |
timedelta | Number | duration
| Duration | Duration |
-| timestamp | 31 | Instant |
datetime | Number | std::chrono::nanoseconds
| Time | DateTime |
-| local_date | 32 | Date |
datetime | Number | std::chrono::nanoseconds
| Time | DateTime |
-| decimal | 33 | BigDecimal |
Decimal | bigint | /
| / | / |
-| binary | 34 | byte[] | bytes
| / | `uint8_t[n]/vector<T>`
| `[n]uint8/[]T` | `Vec<uint8_t>` |
-| array | 35 | array |
np.ndarray | / | /
| array/slice | Vec |
-| bool_array | 36 | bool[] |
ndarray(np.bool_) | / | `bool[n]`
| `[n]bool/[]T` | `Vec<bool>` |
-| int8_array | 37 | byte[] |
ndarray(int8) | / | `int8_t[n]/vector<T>`
| `[n]int8/[]T` | `Vec<i18>` |
-| int16_array | 38 | short[] |
ndarray(int16) | / | `int16_t[n]/vector<T>`
| `[n]int16/[]T` | `Vec<i16>` |
-| int32_array | 39 | int[] |
ndarray(int32) | / | `int32_t[n]/vector<T>`
| `[n]int32/[]T` | `Vec<i32>` |
-| int64_array | 40 | long[] |
ndarray(int64) | / | `int64_t[n]/vector<T>`
| `[n]int64/[]T` | `Vec<i64>` |
-| float16_array | 41 | float[] |
ndarray(float16) | / |
`fury::float16_t[n]/vector<T>` | `[n]float16/[]T` | `Vec<fury::f16>` |
-| float32_array | 42 | float[] |
ndarray(float32) | / | `float[n]/vector<T>`
| `[n]float32/[]T` | `Vec<f32>` |
-| float64_array | 43 | double[] |
ndarray(float64) | / | `double[n]/vector<T>`
| `[n]float64/[]T` | `Vec<f64>` |
-| arrow record batch | 44 | / | /
| / | /
| / | / |
-| arrow table | 45 | / | /
| / | /
| / | / |
+| Fury Type | Fury Type ID | Java | Python
| Javascript | C++ | Golang
| Rust |
+|-------------------------|--------------|-----------------|-----------------------------------|-----------------|--------------------------------|------------------|------------------|
+| bool | 1 | bool/Boolean | bool
| Boolean | bool | bool
| bool |
+| int8 | 2 | byte/Byte | int/pyfury.Int8
| Type.int8() | int8_t | int8
| i8 |
+| int16 | 3 | short/Short | int/pyfury.Int16
| Type.int16() | int16_t | int16
| i6 |
+| int32 | 4 | int/Integer | int/pyfury.Int32
| Type.int32() | int32_t | int32
| i32 |
+| var_int32 | 5 | int/Integer |
int/pyfury.VarInt32 | Type.varint32() | fury::varint32_t
| fury.varint32 | fury::varint32 |
+| int64 | 6 | long/Long | int/pyfury.Int64
| Type.int64() | int64_t | int64
| i64 |
+| var_int64 | 7 | long/Long |
int/pyfury.VarInt64 | Type.varint64() | fury::varint64_t
| fury.varint64 | fury::varint64 |
+| sli_int64 | 8 | long/Long |
int/pyfury.SliInt64 | Type.sliint64() | fury::sliint64_t
| fury.sliint64 | fury::sliint64 |
+| float16 | 9 | float/Float |
float/pyfury.Float16 | Type.float16() | fury::float16_t
| fury.float16 | fury::f16 |
+| float32 | 10 | float/Float |
float/pyfury.Float32 | Type.float32() | float
| float32 | f32 |
+| float64 | 11 | double/Double |
float/pyfury.Float64 | Type.float64() | double
| float64 | f64 |
+| string | 12 | String | str
| String | string | string
| String/str |
+| enum | 13 | Enum subclasses | enum subclasses
| / | enum | /
| enum |
+| named_enum | 14 | Enum subclasses | enum subclasses
| / | enum | /
| enum |
+| struct | 15 | pojo/record | data class / type
with type hints | object | struct/class | struct
| struct |
+| compatible_struct | 16 | pojo/record | data class / type
with type hints | object | struct/class | struct
| struct |
+| named_struct | 17 | pojo/record | data class / type
with type hints | object | struct/class | struct
| struct |
+| named_compatible_struct | 18 | pojo/record | data class / type
with type hints | object | struct/class | struct
| struct |
+| ext | 19 | pojo/record | data class / type
with type hints | object | struct/class | struct
| struct |
+| named_ext | 20 | pojo/record | data class / type
with type hints | object | struct/class | struct
| struct |
+| list | 21 | List/Collection | list/tuple
| array | vector | slice
| Vec |
+| set | 22 | Set | set
| / | set | fury.Set
| Set |
+| map | 23 | Map | dict
| Map | unordered_map | map
| HashMap |
+| duration | 24 | Duration | timedelta
| Number | duration | Duration
| Duration |
+| timestamp | 25 | Instant | datetime
| Number | std::chrono::nanoseconds | Time
| DateTime |
+| local_date | 26 | Date | datetime
| Number | std::chrono::nanoseconds | Time
| DateTime |
+| decimal | 27 | BigDecimal | Decimal
| bigint | / | /
| / |
+| binary | 28 | byte[] | bytes
| / | `uint8_t[n]/vector<T>` |
`[n]uint8/[]T` | `Vec<uint8_t>` |
+| array | 29 | array | np.ndarray
| / | / |
array/slice | Vec |
+| bool_array | 30 | bool[] | ndarray(np.bool_)
| / | `bool[n]` |
`[n]bool/[]T` | `Vec<bool>` |
+| int8_array | 31 | byte[] | ndarray(int8)
| / | `int8_t[n]/vector<T>` |
`[n]int8/[]T` | `Vec<i18>` |
+| int16_array | 32 | short[] | ndarray(int16)
| / | `int16_t[n]/vector<T>` |
`[n]int16/[]T` | `Vec<i16>` |
+| int32_array | 33 | int[] | ndarray(int32)
| / | `int32_t[n]/vector<T>` |
`[n]int32/[]T` | `Vec<i32>` |
+| int64_array | 34 | long[] | ndarray(int64)
| / | `int64_t[n]/vector<T>` |
`[n]int64/[]T` | `Vec<i64>` |
+| float16_array | 35 | float[] | ndarray(float16)
| / | `fury::float16_t[n]/vector<T>` |
`[n]float16/[]T` | `Vec<fury::f16>` |
+| float32_array | 36 | float[] | ndarray(float32)
| / | `float[n]/vector<T>` |
`[n]float32/[]T` | `Vec<f32>` |
+| float64_array | 37 | double[] | ndarray(float64)
| / | `double[n]/vector<T>` |
`[n]float64/[]T` | `Vec<f64>` |
+| arrow record batch | 38 | / | /
| / | / | /
| / |
+| arrow table | 39 | / | /
| / | / | /
| / |
## Type info(not implemented currently)
diff --git a/docs/specification/xlang_serialization_spec.md
b/docs/specification/xlang_serialization_spec.md
index 1e8a5a35..5d5f710f 100644
--- a/docs/specification/xlang_serialization_spec.md
+++ b/docs/specification/xlang_serialization_spec.md
@@ -42,18 +42,11 @@ also introduce more complexities compared to static
serialization frameworks. So
- named_enum: an enum whose value will be serialized as the registered name.
- struct: a morphic(final) type serialized by Fury Struct serializer. i.e. it
doesn't have subclasses. Suppose we're
deserializing `List<SomeClass>`, we can save dynamic serializer dispatch
since `SomeClass` is morphic(final).
-- polymorphic_struct: a type which is not morphic(not final). i.e. it has
subclasses. Suppose we're deserializing
- `List<SomeClass>`, we must dispatch serializer dynamically since `SomeClass`
is morphic(final).
- compatible_struct: a morphic(final) type serialized by Fury compatible
Struct serializer.
-- polymorphic_compatible_struct: a non-morphic(non-final) type serialized by
Fury compatible Struct serializer.
- named_struct: a `struct` whose type mapping will be encoded as a name.
-- named_polymorphic_struct: a `polymorphic_struct` whose type mapping will be
encoded as a name.
- named_compatible_struct: a `compatible_struct` whose type mapping will be
encoded as a name.
-- named_polymorphic_compatible_struct: a `polymorphic_compatible_struct` whose
type mapping will be encoded as a name.
- ext: a type which will be serialized by a customized serializer.
-- polymorphic_ext: an `ext` type which is not morphic(not final).
- named_ext: an `ext` type whose type mapping will be encoded as a name.
-- named_polymorphic_ext: an `polymorphic_ext` type whose type mapping will be
encoded as a name.
- list: a sequence of objects.
- set: an unordered set of unique elements.
- map: a map of key-value pairs. Mutable types such as
`list/map/set/array/tensor/arrow` are not allowed as key of map.
@@ -118,14 +111,13 @@ Users can also provide meta hints for fields of a type,
or the type whole. Here
annotation to provide such information.
```java
-
-@TypeInfo(fieldsNullable = false, trackingRef = false, polymorphic = false)
+@FuryObject(fieldsNullable = false, trackingRef = false)
class Foo {
- @FieldInfo(trackingRef = false)
+ @FuryField(trackingRef = false)
int[] intArray;
- @FieldInfo(polymorphic = true)
+ @FuryField(polymorphic = true)
Object object;
- @FieldInfo(tagId = 1, nullable = true)
+ @FuryField(tagId = 1, nullable = true)
List<Object> objectList;
}
```
@@ -334,10 +326,15 @@ Meta header is a 64 bits number value encoded in little
endian order.
- field name: If tag id is set, tag id will be used instead. Otherwise meta
string encoding `[length]` and data will
be written instead.
- type id:
+ - Format: `id << 1 | polymorphic flag`. If field type is polymorphic, this
flag is set to `0b1`, otherwise it's
+ `0b0`
- For registered type-consistent classes, it will be the registered type
id.
- - Otherwise it will be encoded as `OBJECT_ID` if it isn't `final` and
`FINAL_OBJECT_ID` if it's `final`. The
- meta for such types is written separately instead of inlining here
is to reduce meta space cost if object of
- this type is serialized in current object graph multiple times, and
the field value may be null too.
+ - For struct type it will be written as `STRUCT`.
+ - The meta for struct type is written separately instead of inlining here
is to reduce meta space cost if object of
+ this type is serialized in current object graph multiple times, and the
field value may be null too.
+ - For enum type, it will be written as `ENUM`.
+ - For collection type, it will be written as `COLLECTION`, then write
element type recursively.
+ - For map type, it will be written as `MAP`, then write key and value type
recursively.
Field order are left as implementation details, which is not exposed to
specification, the deserialization need to
resort fields based on Fury field comparator. In this way, fury can compute
statistics for field names or types and
diff --git
a/java/fury-core/src/main/java/org/apache/fury/resolver/XtypeResolver.java
b/java/fury-core/src/main/java/org/apache/fury/resolver/XtypeResolver.java
index f74a0491..b0ef00f0 100644
--- a/java/fury-core/src/main/java/org/apache/fury/resolver/XtypeResolver.java
+++ b/java/fury-core/src/main/java/org/apache/fury/resolver/XtypeResolver.java
@@ -361,10 +361,7 @@ public class XtypeResolver {
case Types.NAMED_ENUM:
case Types.NAMED_STRUCT:
case Types.NAMED_COMPATIBLE_STRUCT:
- case Types.NAMED_POLYMORPHIC_STRUCT:
- case Types.NAMED_POLYMORPHIC_COMPATIBLE_STRUCT:
case Types.NAMED_EXT:
- case Types.NAMED_POLYMORPHIC_EXT:
assert classInfo.namespaceBytes != null;
metaStringResolver.writeMetaStringBytes(buffer,
classInfo.namespaceBytes);
assert classInfo.typeNameBytes != null;
@@ -383,10 +380,7 @@ public class XtypeResolver {
case Types.NAMED_ENUM:
case Types.NAMED_STRUCT:
case Types.NAMED_COMPATIBLE_STRUCT:
- case Types.NAMED_POLYMORPHIC_STRUCT:
- case Types.NAMED_POLYMORPHIC_COMPATIBLE_STRUCT:
case Types.NAMED_EXT:
- case Types.NAMED_POLYMORPHIC_EXT:
MetaStringBytes packageBytes =
metaStringResolver.readMetaStringBytes(buffer);
MetaStringBytes simpleClassNameBytes =
metaStringResolver.readMetaStringBytes(buffer);
return loadBytesToClassInfo(internalTypeId, packageBytes,
simpleClassNameBytes);
@@ -453,8 +447,6 @@ public class XtypeResolver {
case Types.NAMED_ENUM:
case Types.NAMED_STRUCT:
case Types.NAMED_COMPATIBLE_STRUCT:
- case Types.NAMED_POLYMORPHIC_STRUCT:
- case Types.NAMED_POLYMORPHIC_COMPATIBLE_STRUCT:
type =
NonexistentClass.getNonexistentClass(
qualifiedName, isEnum(typeId), 0,
config.isMetaShareEnabled());
diff --git a/java/fury-core/src/main/java/org/apache/fury/type/Types.java
b/java/fury-core/src/main/java/org/apache/fury/type/Types.java
index 87272a3a..a2da4ac4 100644
--- a/java/fury-core/src/main/java/org/apache/fury/type/Types.java
+++ b/java/fury-core/src/main/java/org/apache/fury/type/Types.java
@@ -33,16 +33,16 @@ public class Types {
/** int32: a 32-bit signed integer. */
public static final int INT32 = 4;
- /** var_int32: a 32-bit signed integer which use fury var_int32 encoding. */
+ /** var_int32: a 32-bit signed integer which uses fury var_int32 encoding. */
public static final int VAR_INT32 = 5;
/** int64: a 64-bit signed integer. */
public static final int INT64 = 6;
- /** var_int64: a 64-bit signed integer which use fury PVL encoding. */
+ /** var_int64: a 64-bit signed integer which uses fury PVL encoding. */
public static final int VAR_INT64 = 7;
- /** sli_int64: a 64-bit signed integer which use fury SLI encoding. */
+ /** sli_int64: a 64-bit signed integer which uses fury SLI encoding. */
public static final int SLI_INT64 = 8;
/** float16: a 16-bit floating point number. */
@@ -57,142 +57,119 @@ public class Types {
/** string: a text string encoded using Latin1/UTF16/UTF-8 encoding. */
public static final int STRING = 12;
- /**
- * enum: a data type consisting of a set of named values. Rust enum with
non-predefined field
- * values are not \ supported as an enum.
- */
+ /** enum: a data type consisting of a set of named values. */
public static final int ENUM = 13;
/** named_enum: an enum whose value will be serialized as the registered
name. */
public static final int NAMED_ENUM = 14;
/**
- * a morphic(final) type serialized by Fury Struct serializer. i.e. it
doesn't have subclasses.
- * Suppose we're deserializing {@code List<SomeClass>}`, we can save dynamic
serializer dispatch
+ * A morphic(final) type serialized by Fury Struct serializer. i.e. it
doesn't have subclasses.
+ * Suppose we're deserializing {@code List<SomeClass>}, we can save dynamic
serializer dispatch
* since `SomeClass` is morphic(final).
*/
public static final int STRUCT = 15;
- /**
- * a type which is polymorphic(not final). i.e. it has subclasses. Suppose
we're deserializing
- * {@code List<SomeClass>}`, we must dispatch serializer dynamically since
`SomeClass` is
- * polymorphic(non-final).
- */
- public static final int POLYMORPHIC_STRUCT = 16;
-
- /** a morphic(final) type serialized by Fury compatible Struct serializer. */
- public static final int COMPATIBLE_STRUCT = 17;
-
- /** a non-morphic(non-final) type serialized by Fury compatible Struct
serializer. */
- public static final int POLYMORPHIC_COMPATIBLE_STRUCT = 18;
-
- /** a `struct` whose type mapping will be encoded as a name. */
- public static final int NAMED_STRUCT = 19;
-
- /** a `polymorphic_struct` whose type mapping will be encoded as a name. */
- public static final int NAMED_POLYMORPHIC_STRUCT = 20;
+ /** A morphic(final) type serialized by Fury compatible Struct serializer. */
+ public static final int COMPATIBLE_STRUCT = 16;
- /** a `compatible_struct` whose type mapping will be encoded as a name. */
- public static final int NAMED_COMPATIBLE_STRUCT = 21;
+ /** A `struct` whose type mapping will be encoded as a name. */
+ public static final int NAMED_STRUCT = 17;
- /** a `polymorphic_compatible_struct` whose type mapping will be encoded as
a name. */
- public static final int NAMED_POLYMORPHIC_COMPATIBLE_STRUCT = 22;
+ /** A `compatible_struct` whose type mapping will be encoded as a name. */
+ public static final int NAMED_COMPATIBLE_STRUCT = 18;
- /** a type which will be serialized by a customized serializer. */
- public static final int EXT = 23;
+ /** A type which will be serialized by a customized serializer. */
+ public static final int EXT = 19;
- /** an `ext` type which is not morphic(not final). */
- public static final int POLYMORPHIC_EXT = 24;
+ /** An `ext` type whose type mapping will be encoded as a name. */
+ public static final int NAMED_EXT = 20;
- /** an `ext` type whose type mapping will be encoded as a name. */
- public static final int NAMED_EXT = 25;
+ /** A sequence of objects. */
+ public static final int LIST = 21;
- /** an `polymorphic_ext` type whose type mapping will be encoded as a name.
*/
- public static final int NAMED_POLYMORPHIC_EXT = 26;
-
- /** a sequence of objects. */
- public static final int LIST = 27;
-
- /** an unordered set of unique elements. */
- public static final int SET = 28;
+ /** An unordered set of unique elements. */
+ public static final int SET = 22;
/**
- * a map of key-value pairs. Mutable types such as
`list/map/set/array/tensor/arrow` are not
+ * A map of key-value pairs. Mutable types such as
`list/map/set/array/tensor/arrow` are not
* allowed as key of map.
*/
- public static final int MAP = 29;
+ public static final int MAP = 23;
/**
- * an absolute length of time, independent of any calendar/timezone, as a
count of nanoseconds.
+ * An absolute length of time, independent of any calendar/timezone, as a
count of nanoseconds.
*/
- public static final int DURATION = 30;
+ public static final int DURATION = 24;
/**
- * timestamp: a point in time, independent of any calendar/timezone, as a
count of nanoseconds.
- * The count is relative to an epoch at UTC midnight on January 1, 1970.
+ * A point in time, independent of any calendar/timezone, as a count of
nanoseconds. The count is
+ * relative to an epoch at UTC midnight on January 1, 1970.
*/
- public static final int TIMESTAMP = 31;
+ public static final int TIMESTAMP = 25;
/**
- * a naive date without timezone. The count is days relative to an epoch at
UTC midnight on Jan 1,
+ * A naive date without timezone. The count is days relative to an epoch at
UTC midnight on Jan 1,
* 1970.
*/
- public static final int LOCAL_DATE = 32;
+ public static final int LOCAL_DATE = 26;
- /** exact decimal value represented as an integer value in two's complement.
*/
- public static final int DECIMAL = 33;
+ /** Exact decimal value represented as an integer value in two's complement.
*/
+ public static final int DECIMAL = 27;
- /** an variable-length array of bytes. */
- public static final int BINARY = 34;
+ /** A variable-length array of bytes. */
+ public static final int BINARY = 28;
/**
- * a multidimensional array which every sub-array can have different sizes
but all have same type.
- * only allow numeric components. Other arrays will be taken as List. The
implementation should
- * support the interoperability between array and list.
+ * A multidimensional array where every sub-array can have different sizes
but all have the same
+ * type. Only numeric components allowed. Other arrays will be taken as
List. The implementation
+ * should support interoperability between array and list.
*/
- public static final int ARRAY = 35;
+ public static final int ARRAY = 29;
- /** one dimensional int16 array. */
- public static final int BOOL_ARRAY = 36;
+ /** One dimensional bool array. */
+ public static final int BOOL_ARRAY = 30;
- /** one dimensional int8 array. */
- public static final int INT8_ARRAY = 37;
+ /** One dimensional int8 array. */
+ public static final int INT8_ARRAY = 31;
- /** one dimensional int16 array. */
- public static final int INT16_ARRAY = 38;
+ /** One dimensional int16 array. */
+ public static final int INT16_ARRAY = 32;
- /** one dimensional int32 array. */
- public static final int INT32_ARRAY = 39;
+ /** One dimensional int32 array. */
+ public static final int INT32_ARRAY = 33;
- /** one dimensional int64 array. */
- public static final int INT64_ARRAY = 40;
+ /** One dimensional int64 array. */
+ public static final int INT64_ARRAY = 34;
- /** one dimensional half_float_16 array. */
- public static final int FLOAT16_ARRAY = 41;
+ /** One dimensional half_float_16 array. */
+ public static final int FLOAT16_ARRAY = 35;
- /** one dimensional float32 array. */
- public static final int FLOAT32_ARRAY = 42;
+ /** One dimensional float32 array. */
+ public static final int FLOAT32_ARRAY = 36;
- /** one dimensional float64 array. */
- public static final int FLOAT64_ARRAY = 43;
+ /** One dimensional float64 array. */
+ public static final int FLOAT64_ARRAY = 37;
- /**
- * an (<a
href="https://arrow.apache.org/docs/cpp/tables.html#record-batches">arrow record
- * batch</a>) object.
- */
- public static final int ARROW_RECORD_BATCH = 44;
+ /** An (arrow record batch) object. */
+ public static final int ARROW_RECORD_BATCH = 38;
- /** an (<a href="https://arrow.apache.org/docs/cpp/tables.html#tables">arrow
table</a>) object. */
- public static final int ARROW_TABLE = 45;
+ /** An (arrow table) object. */
+ public static final int ARROW_TABLE = 39;
+ // Helper methods
public static boolean isStructType(int value) {
return value == STRUCT
- || value == POLYMORPHIC_STRUCT
|| value == COMPATIBLE_STRUCT
- || value == POLYMORPHIC_COMPATIBLE_STRUCT
|| value == NAMED_STRUCT
- || value == NAMED_POLYMORPHIC_STRUCT
- || value == NAMED_COMPATIBLE_STRUCT
- || value == NAMED_POLYMORPHIC_COMPATIBLE_STRUCT;
+ || value == NAMED_COMPATIBLE_STRUCT;
+ }
+
+ public static boolean isExtType(int value) {
+ return value == EXT || value == NAMED_EXT;
+ }
+
+ public static boolean isEnumType(int value) {
+ return value == ENUM || value == NAMED_ENUM;
}
}
diff --git a/python/pyfury/includes/libserialization.pxd
b/python/pyfury/includes/libserialization.pxd
index c5f31631..3cb69bc1 100644
--- a/python/pyfury/includes/libserialization.pxd
+++ b/python/pyfury/includes/libserialization.pxd
@@ -37,35 +37,30 @@ cdef extern from "fury/type/type.h" namespace "fury" nogil:
ENUM = 13
NAMED_ENUM = 14
STRUCT = 15
- POLYMORPHIC_STRUCT = 16
- COMPATIBLE_STRUCT = 17
- POLYMORPHIC_COMPATIBLE_STRUCT = 18
- NAMED_STRUCT = 19
- NAMED_POLYMORPHIC_STRUCT = 20
- NAMED_COMPATIBLE_STRUCT = 21
- NAMED_POLYMORPHIC_COMPATIBLE_STRUCT = 22
- EXT = 23
- POLYMORPHIC_EXT = 24
- NAMED_EXT = 25
- NAMED_POLYMORPHIC_EXT = 26
- LIST = 27
- SET = 28
- MAP = 29
- DURATION = 30
- TIMESTAMP = 31
- LOCAL_DATE = 32
- DECIMAL = 33
- BINARY = 34
- ARRAY = 35
- BOOL_ARRAY = 36
- INT8_ARRAY = 37
- INT16_ARRAY = 38
- INT32_ARRAY = 39
- INT64_ARRAY = 40
- FLOAT16_ARRAY = 41
- FLOAT32_ARRAY = 42
- FLOAT64_ARRAY = 43
- ARROW_RECORD_BATCH = 44
- ARROW_TABLE = 45
+ COMPATIBLE_STRUCT = 16
+ NAMED_STRUCT = 17
+ NAMED_COMPATIBLE_STRUCT = 18
+ EXT = 19
+ NAMED_EXT = 20
+ LIST = 21
+ SET = 22
+ MAP = 23
+ DURATION = 24
+ TIMESTAMP = 25
+ LOCAL_DATE = 26
+ DECIMAL = 27
+ BINARY = 28
+ ARRAY = 29
+ BOOL_ARRAY = 30
+ INT8_ARRAY = 31
+ INT16_ARRAY = 32
+ INT32_ARRAY = 33
+ INT64_ARRAY = 34
+ FLOAT16_ARRAY = 35
+ FLOAT32_ARRAY = 36
+ FLOAT64_ARRAY = 37
+ ARROW_RECORD_BATCH = 38
+ ARROW_TABLE = 39
+ BOUND = 64
cdef c_bool IsNamespacedType(int32_t type_id)
diff --git a/python/pyfury/type.py b/python/pyfury/type.py
index dc318174..4ff2a39e 100644
--- a/python/pyfury/type.py
+++ b/python/pyfury/type.py
@@ -138,94 +138,83 @@ class TypeId:
INT16 = 3
# a 32-bit signed integer.
INT32 = 4
- # a 32-bit signed integer which use fury var_int32 encoding.
+ # a 32-bit signed integer which uses fury var_int32 encoding.
VAR_INT32 = 5
# a 64-bit signed integer.
INT64 = 6
- # a 64-bit signed integer which use fury PVL encoding.
+ # a 64-bit signed integer which uses fury PVL encoding.
VAR_INT64 = 7
- # a 64-bit signed integer which use fury SLI encoding.
+ # a 64-bit signed integer which uses fury SLI encoding.
SLI_INT64 = 8
# a 16-bit floating point number.
FLOAT16 = 9
- # a 32-bit floating point number.
+ # a 32-bit floating point number.
FLOAT32 = 10
# a 64-bit floating point number including NaN and Infinity.
FLOAT64 = 11
# a text string encoded using Latin1/UTF16/UTF-8 encoding.
STRING = 12
# a data type consisting of a set of named values. Rust enum with
non-predefined field values are not supported as
- # an enum
+ # an enum.
ENUM = 13
# an enum whose value will be serialized as the registered name.
NAMED_ENUM = 14
- # a morphic(final) type serialized by Fury Struct serializer. i.e. it
doesn't have subclasses. Suppose we're
+ # a morphic(final) type serialized by Fury Struct serializer. i.e., it
doesn't have subclasses. Suppose we're
# deserializing `List[SomeClass]`, we can save dynamic serializer dispatch
since `SomeClass` is morphic(final).
STRUCT = 15
- # a type which is not morphic(not final). i.e. it have subclasses. Suppose
we're deserializing
- # `List[SomeClass]`, we must dispatch serializer dynamically since
`SomeClass` is polymorphic(non-final).
- POLYMORPHIC_STRUCT = 16
# a morphic(final) type serialized by Fury compatible Struct serializer.
- COMPATIBLE_STRUCT = 17
- # a non-morphic(non-final) type serialized by Fury compatible Struct
serializer.
- POLYMORPHIC_COMPATIBLE_STRUCT = 18
+ COMPATIBLE_STRUCT = 16
# a `struct` whose type mapping will be encoded as a name.
- NAMED_STRUCT = 19
- # a `polymorphic_struct` whose type mapping will be encoded as a name.
- NAMED_POLYMORPHIC_STRUCT = 20
+ NAMED_STRUCT = 17
# a `compatible_struct` whose type mapping will be encoded as a name.
- NAMED_COMPATIBLE_STRUCT = 21
- # a `polymorphic_compatible_struct` whose type mapping will be encoded as
a name.
- NAMED_POLYMORPHIC_COMPATIBLE_STRUCT = 22
+ NAMED_COMPATIBLE_STRUCT = 18
# a type which will be serialized by a customized serializer.
- EXT = 23
- # an `ext` type which is not morphic(not final).
- POLYMORPHIC_EXT = 24
+ EXT = 19
# an `ext` type whose type mapping will be encoded as a name.
- NAMED_EXT = 25
- # an `polymorphic_ext` type whose type mapping will be encoded as a name.
- NAMED_POLYMORPHIC_EXT = 26
+ NAMED_EXT = 20
# a sequence of objects.
- LIST = 27
+ LIST = 21
# an unordered set of unique elements.
- SET = 28
+ SET = 22
# a map of key-value pairs. Mutable types such as
`list/map/set/array/tensor/arrow` are not allowed as key of map.
- MAP = 29
+ MAP = 23
# an absolute length of time, independent of any calendar/timezone, as a
count of nanoseconds.
- DURATION = 30
+ DURATION = 24
# a point in time, independent of any calendar/timezone, as a count of
nanoseconds. The count is relative
# to an epoch at UTC midnight on January 1, 1970.
- TIMESTAMP = 31
+ TIMESTAMP = 25
# a naive date without timezone. The count is days relative to an epoch at
UTC midnight on Jan 1, 1970.
- LOCAL_DATE = 32
+ LOCAL_DATE = 26
# exact decimal value represented as an integer value in two's complement.
- DECIMAL = 33
- # an variable-length array of bytes.
- BINARY = 34
- # a multidimensional array which every sub-array can have different sizes
but all have same type.
+ DECIMAL = 27
+ # a variable-length array of bytes.
+ BINARY = 28
+ # a multidimensional array which every sub-array can have different sizes
but all have the same type.
# only allow numeric components. Other arrays will be taken as List. The
implementation should support the
# interoperability between array and list.
- ARRAY = 35
+ ARRAY = 29
# one dimensional bool array.
- BOOL_ARRAY = 36
+ BOOL_ARRAY = 30
+ # one dimensional int8 array.
+ INT8_ARRAY = 31
# one dimensional int16 array.
- INT8_ARRAY = 37
- # one dimensional int16 array.
- INT16_ARRAY = 38
+ INT16_ARRAY = 32
# one dimensional int32 array.
- INT32_ARRAY = 39
+ INT32_ARRAY = 33
# one dimensional int64 array.
- INT64_ARRAY = 40
+ INT64_ARRAY = 34
# one dimensional half_float_16 array.
- FLOAT16_ARRAY = 41
+ FLOAT16_ARRAY = 35
# one dimensional float32 array.
- FLOAT32_ARRAY = 42
+ FLOAT32_ARRAY = 36
# one dimensional float64 array.
- FLOAT64_ARRAY = 43
+ FLOAT64_ARRAY = 37
# an arrow [record
batch](https://arrow.apache.org/docs/cpp/tables.html#record-batches) object.
- ARROW_RECORD_BATCH = 44
+ ARROW_RECORD_BATCH = 38
# an arrow [table](https://arrow.apache.org/docs/cpp/tables.html#tables)
object.
- ARROW_TABLE = 45
+ ARROW_TABLE = 39
+
+ # BOUND id remains at 64
BOUND = 64
@staticmethod
@@ -235,12 +224,9 @@ class TypeId:
__NAMESPACED_TYPES__ = {
TypeId.NAMED_EXT,
- TypeId.NAMED_POLYMORPHIC_EXT,
TypeId.NAMED_ENUM,
TypeId.NAMED_STRUCT,
- TypeId.NAMED_POLYMORPHIC_STRUCT,
TypeId.NAMED_COMPATIBLE_STRUCT,
- TypeId.NAMED_POLYMORPHIC_COMPATIBLE_STRUCT,
}
Int8Type = TypeVar("Int8Type", bound=int)
Int16Type = TypeVar("Int16Type", bound=int)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]