This is an automated email from the ASF dual-hosted git repository. comphead pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push: new 62b3c9100 fix: split expr.proto file (new) (#2267) 62b3c9100 is described below commit 62b3c9100aa7d288a59181e093d05fe51b55629a Author: K.I. (Dennis) Jung <kinatio...@gmail.com> AuthorDate: Sun Aug 31 07:19:32 2025 +0900 fix: split expr.proto file (new) (#2267) * split expr.proto file --- native/proto/src/proto/expr.proto | 78 +--------------------- .../proto/src/proto/{types.proto => literal.proto} | 38 ++++++----- native/proto/src/proto/operator.proto | 1 + native/proto/src/proto/types.proto | 58 +++++++++++++++- .../apache/comet/parquet/SourceFilterSerde.scala | 9 ++- .../org/apache/comet/serde/QueryPlanSerde.scala | 9 +-- .../main/scala/org/apache/comet/serde/hash.scala | 4 +- 7 files changed, 94 insertions(+), 103 deletions(-) diff --git a/native/proto/src/proto/expr.proto b/native/proto/src/proto/expr.proto index 1152d7a1b..04d9376ac 100644 --- a/native/proto/src/proto/expr.proto +++ b/native/proto/src/proto/expr.proto @@ -21,6 +21,7 @@ syntax = "proto3"; package spark.spark_expression; +import "literal.proto"; import "types.proto"; option java_package = "org.apache.comet.serde"; @@ -203,27 +204,6 @@ message BloomFilterAgg { DataType datatype = 4; } -message Literal { - oneof value { - bool bool_val = 1; - // Protobuf doesn't provide int8 and int16, we put them into int32 and convert - // to int8 and int16 when deserializing. - int32 byte_val = 2; - int32 short_val = 3; - int32 int_val = 4; - int64 long_val = 5; - float float_val = 6; - double double_val = 7; - string string_val = 8; - bytes bytes_val = 9; - bytes decimal_val = 10; - ListLiteral list_val = 11; - } - - DataType datatype = 12; - bool is_null = 13; -} - enum EvalMode { LEGACY = 0; TRY = 1; @@ -426,59 +406,3 @@ message ArrayJoin { message Rand { int64 seed = 1; } - -message DataType { - enum DataTypeId { - BOOL = 0; - INT8 = 1; - INT16 = 2; - INT32 = 3; - INT64 = 4; - FLOAT = 5; - DOUBLE = 6; - STRING = 7; - BYTES = 8; - TIMESTAMP = 9; - DECIMAL = 10; - TIMESTAMP_NTZ = 11; - DATE = 12; - NULL = 13; - LIST = 14; - MAP = 15; - STRUCT = 16; - } - DataTypeId type_id = 1; - - message DataTypeInfo { - oneof datatype_struct { - DecimalInfo decimal = 2; - ListInfo list = 3; - MapInfo map = 4; - StructInfo struct = 5; - } - } - - message DecimalInfo { - int32 precision = 1; - int32 scale = 2; - } - - message ListInfo { - DataType element_type = 1; - bool contains_null = 2; - } - - message MapInfo { - DataType key_type = 1; - DataType value_type = 2; - bool value_contains_null = 3; - } - - message StructInfo { - repeated string field_names = 1; - repeated DataType field_datatypes = 2; - repeated bool field_nullable = 3; - } - - DataTypeInfo type_info = 2; -} \ No newline at end of file diff --git a/native/proto/src/proto/types.proto b/native/proto/src/proto/literal.proto similarity index 63% copy from native/proto/src/proto/types.proto copy to native/proto/src/proto/literal.proto index cc163522b..b086b1bd1 100644 --- a/native/proto/src/proto/types.proto +++ b/native/proto/src/proto/literal.proto @@ -21,21 +21,27 @@ syntax = "proto3"; package spark.spark_expression; +import "types.proto"; + option java_package = "org.apache.comet.serde"; -message ListLiteral { - // Only one of these fields should be populated based on the array type - repeated bool boolean_values = 1; - repeated int32 byte_values = 2; - repeated int32 short_values = 3; - repeated int32 int_values = 4; - repeated int64 long_values = 5; - repeated float float_values = 6; - repeated double double_values = 7; - repeated string string_values = 8; - repeated bytes bytes_values = 9; - repeated bytes decimal_values = 10; - repeated ListLiteral list_values = 11; - - repeated bool null_mask = 12; -} \ No newline at end of file +message Literal { + oneof value { + bool bool_val = 1; + // Protobuf doesn't provide int8 and int16, we put them into int32 and convert + // to int8 and int16 when deserializing. + int32 byte_val = 2; + int32 short_val = 3; + int32 int_val = 4; + int64 long_val = 5; + float float_val = 6; + double double_val = 7; + string string_val = 8; + bytes bytes_val = 9; + bytes decimal_val = 10; + ListLiteral list_val = 11; + } + + DataType datatype = 12; + bool is_null = 13; +} diff --git a/native/proto/src/proto/operator.proto b/native/proto/src/proto/operator.proto index 5cb332ef0..77e02e6f4 100644 --- a/native/proto/src/proto/operator.proto +++ b/native/proto/src/proto/operator.proto @@ -23,6 +23,7 @@ package spark.spark_operator; import "expr.proto"; import "partitioning.proto"; +import "types.proto"; option java_package = "org.apache.comet.serde"; diff --git a/native/proto/src/proto/types.proto b/native/proto/src/proto/types.proto index cc163522b..2fd3d59a7 100644 --- a/native/proto/src/proto/types.proto +++ b/native/proto/src/proto/types.proto @@ -38,4 +38,60 @@ message ListLiteral { repeated ListLiteral list_values = 11; repeated bool null_mask = 12; -} \ No newline at end of file +} + +message DataType { + enum DataTypeId { + BOOL = 0; + INT8 = 1; + INT16 = 2; + INT32 = 3; + INT64 = 4; + FLOAT = 5; + DOUBLE = 6; + STRING = 7; + BYTES = 8; + TIMESTAMP = 9; + DECIMAL = 10; + TIMESTAMP_NTZ = 11; + DATE = 12; + NULL = 13; + LIST = 14; + MAP = 15; + STRUCT = 16; + } + DataTypeId type_id = 1; + + message DataTypeInfo { + oneof datatype_struct { + DecimalInfo decimal = 2; + ListInfo list = 3; + MapInfo map = 4; + StructInfo struct = 5; + } + } + + message DecimalInfo { + int32 precision = 1; + int32 scale = 2; + } + + message ListInfo { + DataType element_type = 1; + bool contains_null = 2; + } + + message MapInfo { + DataType key_type = 1; + DataType value_type = 2; + bool value_contains_null = 3; + } + + message StructInfo { + repeated string field_names = 1; + repeated DataType field_datatypes = 2; + repeated bool field_nullable = 3; + } + + DataTypeInfo type_info = 2; +} diff --git a/spark/src/main/scala/org/apache/comet/parquet/SourceFilterSerde.scala b/spark/src/main/scala/org/apache/comet/parquet/SourceFilterSerde.scala index 4ad467cd8..ac6a89ca3 100644 --- a/spark/src/main/scala/org/apache/comet/parquet/SourceFilterSerde.scala +++ b/spark/src/main/scala/org/apache/comet/parquet/SourceFilterSerde.scala @@ -29,13 +29,14 @@ import org.apache.spark.sql.types._ import org.apache.comet.serde.ExprOuterClass import org.apache.comet.serde.ExprOuterClass.Expr +import org.apache.comet.serde.LiteralOuterClass import org.apache.comet.serde.QueryPlanSerde.serializeDataType object SourceFilterSerde extends Logging { def createNameExpr( name: String, - schema: StructType): Option[(DataType, ExprOuterClass.Expr)] = { + schema: StructType): Option[(org.apache.spark.sql.types.DataType, ExprOuterClass.Expr)] = { val filedWithIndex = schema.fields.zipWithIndex.find { case (field, _) => field.name == name } @@ -66,8 +67,10 @@ object SourceFilterSerde extends Logging { /** * create a literal value native expression for source filter value, the value is a scala value */ - def createValueExpr(value: Any, dataType: DataType): Option[ExprOuterClass.Expr] = { - val exprBuilder = ExprOuterClass.Literal.newBuilder() + def createValueExpr( + value: Any, + dataType: org.apache.spark.sql.types.DataType): Option[ExprOuterClass.Expr] = { + val exprBuilder = LiteralOuterClass.Literal.newBuilder() var valueIsSet = true if (value == null) { exprBuilder.setIsNull(true) diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index ad9be300f..2a5b6d075 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -52,10 +52,11 @@ import org.apache.comet.CometSparkSessionExtensions.{isCometScan, withInfo} import org.apache.comet.DataTypeSupport.isComplexType import org.apache.comet.expressions._ import org.apache.comet.objectstore.NativeConfig -import org.apache.comet.serde.ExprOuterClass.{AggExpr, DataType => ProtoDataType, Expr, ScalarFunc} -import org.apache.comet.serde.ExprOuterClass.DataType._ +import org.apache.comet.serde.ExprOuterClass.{AggExpr, Expr, ScalarFunc} import org.apache.comet.serde.OperatorOuterClass.{AggregateMode => CometAggregateMode, BuildSide, JoinType, Operator} import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} +import org.apache.comet.serde.Types.{DataType => ProtoDataType} +import org.apache.comet.serde.Types.DataType._ import org.apache.comet.serde.Types.ListLiteral import org.apache.comet.shims.CometExprShim @@ -228,7 +229,7 @@ object QueryPlanSerde extends Logging with CometExprShim { * doesn't mean it is supported by Comet native execution, i.e., `supportedDataType` may return * false for it. */ - def serializeDataType(dt: DataType): Option[ExprOuterClass.DataType] = { + def serializeDataType(dt: org.apache.spark.sql.types.DataType): Option[Types.DataType] = { val typeId = dt match { case _: BooleanType => 0 case _: ByteType => 1 @@ -762,7 +763,7 @@ object QueryPlanSerde extends Logging with CometExprShim { .contains(CometConf.COMET_NATIVE_SCAN_IMPL.get()) && dataType .isInstanceOf[ArrayType]) && !isComplexType( dataType.asInstanceOf[ArrayType].elementType)) => - val exprBuilder = ExprOuterClass.Literal.newBuilder() + val exprBuilder = LiteralOuterClass.Literal.newBuilder() if (value == null) { exprBuilder.setIsNull(true) diff --git a/spark/src/main/scala/org/apache/comet/serde/hash.scala b/spark/src/main/scala/org/apache/comet/serde/hash.scala index 53f99ea7c..5c45a2593 100644 --- a/spark/src/main/scala/org/apache/comet/serde/hash.scala +++ b/spark/src/main/scala/org/apache/comet/serde/hash.scala @@ -34,7 +34,7 @@ object CometXxHash64 extends CometExpressionSerde[XxHash64] { return None } val exprs = expr.children.map(exprToProtoInternal(_, inputs, binding)) - val seedBuilder = ExprOuterClass.Literal + val seedBuilder = LiteralOuterClass.Literal .newBuilder() .setDatatype(serializeDataType(LongType).get) .setLongVal(expr.seed) @@ -53,7 +53,7 @@ object CometMurmur3Hash extends CometExpressionSerde[Murmur3Hash] { return None } val exprs = expr.children.map(exprToProtoInternal(_, inputs, binding)) - val seedBuilder = ExprOuterClass.Literal + val seedBuilder = LiteralOuterClass.Literal .newBuilder() .setDatatype(serializeDataType(IntegerType).get) .setIntVal(expr.seed) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org