This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 62b3c9100 fix: split expr.proto file (new) (#2267)
62b3c9100 is described below

commit 62b3c9100aa7d288a59181e093d05fe51b55629a
Author: K.I. (Dennis) Jung <kinatio...@gmail.com>
AuthorDate: Sun Aug 31 07:19:32 2025 +0900

    fix: split expr.proto file (new) (#2267)
    
    * split expr.proto file
---
 native/proto/src/proto/expr.proto                  | 78 +---------------------
 .../proto/src/proto/{types.proto => literal.proto} | 38 ++++++-----
 native/proto/src/proto/operator.proto              |  1 +
 native/proto/src/proto/types.proto                 | 58 +++++++++++++++-
 .../apache/comet/parquet/SourceFilterSerde.scala   |  9 ++-
 .../org/apache/comet/serde/QueryPlanSerde.scala    |  9 +--
 .../main/scala/org/apache/comet/serde/hash.scala   |  4 +-
 7 files changed, 94 insertions(+), 103 deletions(-)

diff --git a/native/proto/src/proto/expr.proto 
b/native/proto/src/proto/expr.proto
index 1152d7a1b..04d9376ac 100644
--- a/native/proto/src/proto/expr.proto
+++ b/native/proto/src/proto/expr.proto
@@ -21,6 +21,7 @@ syntax = "proto3";
 
 package spark.spark_expression;
 
+import "literal.proto";
 import "types.proto";
 
 option java_package = "org.apache.comet.serde";
@@ -203,27 +204,6 @@ message BloomFilterAgg {
   DataType datatype = 4;
 }
 
-message Literal {
-  oneof value {
-    bool bool_val = 1;
-    // Protobuf doesn't provide int8 and int16, we put them into int32 and 
convert
-    // to int8 and int16 when deserializing.
-    int32 byte_val = 2;
-    int32 short_val = 3;
-    int32 int_val = 4;
-    int64 long_val = 5;
-    float float_val = 6;
-    double double_val = 7;
-    string string_val = 8;
-    bytes bytes_val = 9;
-    bytes decimal_val = 10;
-    ListLiteral list_val = 11;
-  }
-
-  DataType datatype = 12;
-  bool is_null = 13;
-}
-
 enum EvalMode {
   LEGACY = 0;
   TRY = 1;
@@ -426,59 +406,3 @@ message ArrayJoin {
 message Rand {
   int64 seed = 1;
 }
-
-message DataType {
-  enum DataTypeId {
-    BOOL = 0;
-    INT8 = 1;
-    INT16 = 2;
-    INT32 = 3;
-    INT64 = 4;
-    FLOAT = 5;
-    DOUBLE = 6;
-    STRING = 7;
-    BYTES = 8;
-    TIMESTAMP = 9;
-    DECIMAL = 10;
-    TIMESTAMP_NTZ = 11;
-    DATE = 12;
-    NULL = 13;
-    LIST = 14;
-    MAP = 15;
-    STRUCT = 16;
-  }
-  DataTypeId type_id = 1;
-
-  message DataTypeInfo {
-    oneof datatype_struct {
-      DecimalInfo decimal = 2;
-      ListInfo list = 3;
-      MapInfo map = 4;
-      StructInfo struct = 5;
-    }
-  }
-
-  message DecimalInfo {
-    int32 precision = 1;
-    int32 scale = 2;
-  }
-
-  message ListInfo {
-    DataType element_type = 1;
-    bool contains_null = 2;
-  }
-
-  message MapInfo {
-    DataType key_type = 1;
-    DataType value_type = 2;
-    bool value_contains_null = 3;
-  }
-
-  message StructInfo {
-    repeated string field_names = 1;
-    repeated DataType field_datatypes = 2;
-    repeated bool field_nullable = 3;
-  }
-
-  DataTypeInfo type_info = 2;
-}
\ No newline at end of file
diff --git a/native/proto/src/proto/types.proto 
b/native/proto/src/proto/literal.proto
similarity index 63%
copy from native/proto/src/proto/types.proto
copy to native/proto/src/proto/literal.proto
index cc163522b..b086b1bd1 100644
--- a/native/proto/src/proto/types.proto
+++ b/native/proto/src/proto/literal.proto
@@ -21,21 +21,27 @@ syntax = "proto3";
 
 package spark.spark_expression;
 
+import "types.proto";
+
 option java_package = "org.apache.comet.serde";
 
-message ListLiteral {
-  // Only one of these fields should be populated based on the array type
-  repeated bool boolean_values = 1;
-  repeated int32 byte_values = 2;
-  repeated int32 short_values = 3;
-  repeated int32 int_values = 4;
-  repeated int64 long_values = 5;
-  repeated float float_values = 6;
-  repeated double double_values = 7;
-  repeated string string_values = 8;
-  repeated bytes bytes_values = 9;
-  repeated bytes decimal_values = 10;
-  repeated ListLiteral list_values = 11;
-
-  repeated bool null_mask = 12;
-}
\ No newline at end of file
+message Literal {
+  oneof value {
+    bool bool_val = 1;
+    // Protobuf doesn't provide int8 and int16, we put them into int32 and 
convert
+    // to int8 and int16 when deserializing.
+    int32 byte_val = 2;
+    int32 short_val = 3;
+    int32 int_val = 4;
+    int64 long_val = 5;
+    float float_val = 6;
+    double double_val = 7;
+    string string_val = 8;
+    bytes bytes_val = 9;
+    bytes decimal_val = 10;
+    ListLiteral list_val = 11;
+  }
+
+  DataType datatype = 12;
+  bool is_null = 13;
+}
diff --git a/native/proto/src/proto/operator.proto 
b/native/proto/src/proto/operator.proto
index 5cb332ef0..77e02e6f4 100644
--- a/native/proto/src/proto/operator.proto
+++ b/native/proto/src/proto/operator.proto
@@ -23,6 +23,7 @@ package spark.spark_operator;
 
 import "expr.proto";
 import "partitioning.proto";
+import "types.proto";
 
 option java_package = "org.apache.comet.serde";
 
diff --git a/native/proto/src/proto/types.proto 
b/native/proto/src/proto/types.proto
index cc163522b..2fd3d59a7 100644
--- a/native/proto/src/proto/types.proto
+++ b/native/proto/src/proto/types.proto
@@ -38,4 +38,60 @@ message ListLiteral {
   repeated ListLiteral list_values = 11;
 
   repeated bool null_mask = 12;
-}
\ No newline at end of file
+}
+
+message DataType {
+  enum DataTypeId {
+    BOOL = 0;
+    INT8 = 1;
+    INT16 = 2;
+    INT32 = 3;
+    INT64 = 4;
+    FLOAT = 5;
+    DOUBLE = 6;
+    STRING = 7;
+    BYTES = 8;
+    TIMESTAMP = 9;
+    DECIMAL = 10;
+    TIMESTAMP_NTZ = 11;
+    DATE = 12;
+    NULL = 13;
+    LIST = 14;
+    MAP = 15;
+    STRUCT = 16;
+  }
+  DataTypeId type_id = 1;
+
+  message DataTypeInfo {
+    oneof datatype_struct {
+      DecimalInfo decimal = 2;
+      ListInfo list = 3;
+      MapInfo map = 4;
+      StructInfo struct = 5;
+    }
+  }
+
+  message DecimalInfo {
+    int32 precision = 1;
+    int32 scale = 2;
+  }
+
+  message ListInfo {
+    DataType element_type = 1;
+    bool contains_null = 2;
+  }
+
+  message MapInfo {
+    DataType key_type = 1;
+    DataType value_type = 2;
+    bool value_contains_null = 3;
+  }
+
+  message StructInfo {
+    repeated string field_names = 1;
+    repeated DataType field_datatypes = 2;
+    repeated bool field_nullable = 3;
+  }
+
+  DataTypeInfo type_info = 2;
+}
diff --git 
a/spark/src/main/scala/org/apache/comet/parquet/SourceFilterSerde.scala 
b/spark/src/main/scala/org/apache/comet/parquet/SourceFilterSerde.scala
index 4ad467cd8..ac6a89ca3 100644
--- a/spark/src/main/scala/org/apache/comet/parquet/SourceFilterSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/parquet/SourceFilterSerde.scala
@@ -29,13 +29,14 @@ import org.apache.spark.sql.types._
 
 import org.apache.comet.serde.ExprOuterClass
 import org.apache.comet.serde.ExprOuterClass.Expr
+import org.apache.comet.serde.LiteralOuterClass
 import org.apache.comet.serde.QueryPlanSerde.serializeDataType
 
 object SourceFilterSerde extends Logging {
 
   def createNameExpr(
       name: String,
-      schema: StructType): Option[(DataType, ExprOuterClass.Expr)] = {
+      schema: StructType): Option[(org.apache.spark.sql.types.DataType, 
ExprOuterClass.Expr)] = {
     val filedWithIndex = schema.fields.zipWithIndex.find { case (field, _) =>
       field.name == name
     }
@@ -66,8 +67,10 @@ object SourceFilterSerde extends Logging {
   /**
    * create a literal value native expression for source filter value, the 
value is a scala value
    */
-  def createValueExpr(value: Any, dataType: DataType): 
Option[ExprOuterClass.Expr] = {
-    val exprBuilder = ExprOuterClass.Literal.newBuilder()
+  def createValueExpr(
+      value: Any,
+      dataType: org.apache.spark.sql.types.DataType): 
Option[ExprOuterClass.Expr] = {
+    val exprBuilder = LiteralOuterClass.Literal.newBuilder()
     var valueIsSet = true
     if (value == null) {
       exprBuilder.setIsNull(true)
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala 
b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index ad9be300f..2a5b6d075 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -52,10 +52,11 @@ import 
org.apache.comet.CometSparkSessionExtensions.{isCometScan, withInfo}
 import org.apache.comet.DataTypeSupport.isComplexType
 import org.apache.comet.expressions._
 import org.apache.comet.objectstore.NativeConfig
-import org.apache.comet.serde.ExprOuterClass.{AggExpr, DataType => 
ProtoDataType, Expr, ScalarFunc}
-import org.apache.comet.serde.ExprOuterClass.DataType._
+import org.apache.comet.serde.ExprOuterClass.{AggExpr, Expr, ScalarFunc}
 import org.apache.comet.serde.OperatorOuterClass.{AggregateMode => 
CometAggregateMode, BuildSide, JoinType, Operator}
 import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, 
optExprWithInfo, scalarFunctionExprToProto}
+import org.apache.comet.serde.Types.{DataType => ProtoDataType}
+import org.apache.comet.serde.Types.DataType._
 import org.apache.comet.serde.Types.ListLiteral
 import org.apache.comet.shims.CometExprShim
 
@@ -228,7 +229,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
    * doesn't mean it is supported by Comet native execution, i.e., 
`supportedDataType` may return
    * false for it.
    */
-  def serializeDataType(dt: DataType): Option[ExprOuterClass.DataType] = {
+  def serializeDataType(dt: org.apache.spark.sql.types.DataType): 
Option[Types.DataType] = {
     val typeId = dt match {
       case _: BooleanType => 0
       case _: ByteType => 1
@@ -762,7 +763,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
                 .contains(CometConf.COMET_NATIVE_SCAN_IMPL.get()) && dataType
                 .isInstanceOf[ArrayType]) && !isComplexType(
                 dataType.asInstanceOf[ArrayType].elementType)) =>
-        val exprBuilder = ExprOuterClass.Literal.newBuilder()
+        val exprBuilder = LiteralOuterClass.Literal.newBuilder()
 
         if (value == null) {
           exprBuilder.setIsNull(true)
diff --git a/spark/src/main/scala/org/apache/comet/serde/hash.scala 
b/spark/src/main/scala/org/apache/comet/serde/hash.scala
index 53f99ea7c..5c45a2593 100644
--- a/spark/src/main/scala/org/apache/comet/serde/hash.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/hash.scala
@@ -34,7 +34,7 @@ object CometXxHash64 extends CometExpressionSerde[XxHash64] {
       return None
     }
     val exprs = expr.children.map(exprToProtoInternal(_, inputs, binding))
-    val seedBuilder = ExprOuterClass.Literal
+    val seedBuilder = LiteralOuterClass.Literal
       .newBuilder()
       .setDatatype(serializeDataType(LongType).get)
       .setLongVal(expr.seed)
@@ -53,7 +53,7 @@ object CometMurmur3Hash extends 
CometExpressionSerde[Murmur3Hash] {
       return None
     }
     val exprs = expr.children.map(exprToProtoInternal(_, inputs, binding))
-    val seedBuilder = ExprOuterClass.Literal
+    val seedBuilder = LiteralOuterClass.Literal
       .newBuilder()
       .setDatatype(serializeDataType(IntegerType).get)
       .setIntVal(expr.seed)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to