This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new de8c55ec chore: Move protobuf files to separate crate (#661)
de8c55ec is described below

commit de8c55ec48a67f576b2e65b1427390e93f6bf8b7
Author: Andy Grove <[email protected]>
AuthorDate: Mon Jul 15 14:55:31 2024 -0600

    chore: Move protobuf files to separate crate (#661)
    
    * move protobuf files to separate crate
    
    * format
    
    * revert accidental delete
    
    * Update native/proto/README.md
    
    Co-authored-by: Liang-Chi Hsieh <[email protected]>
    
    ---------
    
    Co-authored-by: Liang-Chi Hsieh <[email protected]>
---
 .gitignore                                         |  2 +-
 native/Cargo.lock                                  | 10 ++++-
 native/Cargo.toml                                  |  3 +-
 native/core/Cargo.toml                             |  4 +-
 native/core/src/execution/datafusion/planner.rs    | 37 +++++++++---------
 native/core/src/execution/jni_api.rs               |  2 +-
 native/core/src/execution/mod.rs                   | 18 ---------
 native/core/src/execution/serde.rs                 | 17 +++++----
 native/proto/Cargo.toml                            | 44 ++++++++++++++++++++++
 native/proto/README.md                             | 23 +++++++++++
 native/{core => proto}/build.rs                    | 12 +++---
 .../src/execution/mod.rs => proto/src/lib.rs}      | 26 -------------
 .../src/execution => proto/src}/proto/expr.proto   |  0
 .../execution => proto/src}/proto/operator.proto   |  0
 .../src}/proto/partitioning.proto                  |  0
 spark/pom.xml                                      |  2 +-
 16 files changed, 116 insertions(+), 84 deletions(-)

diff --git a/.gitignore b/.gitignore
index 8bdcd51d..14e28d3f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,7 +8,7 @@ derby.log
 metastore_db/
 spark-warehouse/
 dependency-reduced-pom.xml
-native/core/src/execution/generated
+native/proto/src/generated
 prebuild
 .flattened-pom.xml
 rat.txt
diff --git a/native/Cargo.lock b/native/Cargo.lock
index 649e137f..c3aae93a 100644
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@@ -864,6 +864,7 @@ dependencies = [
  "crc32fast",
  "criterion",
  "datafusion",
+ "datafusion-comet-proto",
  "datafusion-comet-spark-expr",
  "datafusion-common",
  "datafusion-expr",
@@ -886,7 +887,6 @@ dependencies = [
  "paste",
  "pprof",
  "prost 0.12.6",
- "prost-build",
  "rand",
  "regex",
  "serde",
@@ -900,6 +900,14 @@ dependencies = [
  "zstd",
 ]
 
+[[package]]
+name = "datafusion-comet-proto"
+version = "0.1.0"
+dependencies = [
+ "prost 0.12.6",
+ "prost-build",
+]
+
 [[package]]
 name = "datafusion-comet-spark-expr"
 version = "0.1.0"
diff --git a/native/Cargo.toml b/native/Cargo.toml
index 4f306452..c52d906b 100644
--- a/native/Cargo.toml
+++ b/native/Cargo.toml
@@ -16,7 +16,7 @@
 # under the License.
 
 [workspace]
-members = ["core", "spark-expr"]
+members = ["core", "spark-expr", "proto"]
 resolver = "2"
 
 [workspace.package]
@@ -47,6 +47,7 @@ datafusion-physical-plan = { git = 
"https://github.com/apache/datafusion.git";, r
 datafusion-physical-expr-common = { git = 
"https://github.com/apache/datafusion.git";, rev = "40.0.0", default-features = 
false }
 datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git";, 
rev = "40.0.0", default-features = false }
 datafusion-comet-spark-expr = { path = "spark-expr", version = "0.1.0" }
+datafusion-comet-proto = { path = "proto", version = "0.1.0" }
 chrono = { version = "0.4", default-features = false, features = ["clock"] }
 chrono-tz = { version = "0.8" }
 num = "0.4"
diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml
index 90ead502..158c2631 100644
--- a/native/core/Cargo.toml
+++ b/native/core/Cargo.toml
@@ -76,9 +76,7 @@ regex = { workspace = true }
 crc32fast = "1.3.2"
 simd-adler32 = "0.3.7"
 datafusion-comet-spark-expr = { workspace = true }
-
-[build-dependencies]
-prost-build = "0.9.0"
+datafusion-comet-proto = { workspace = true }
 
 [dev-dependencies]
 pprof = { version = "0.13.0", features = ["flamegraph"] }
diff --git a/native/core/src/execution/datafusion/planner.rs 
b/native/core/src/execution/datafusion/planner.rs
index 7e638305..a6fefba6 100644
--- a/native/core/src/execution/datafusion/planner.rs
+++ b/native/core/src/execution/datafusion/planner.rs
@@ -63,9 +63,6 @@ use itertools::Itertools;
 use jni::objects::GlobalRef;
 use num::{BigInt, ToPrimitive};
 
-use 
crate::execution::spark_operator::lower_window_frame_bound::LowerFrameBoundStruct;
-use 
crate::execution::spark_operator::upper_window_frame_bound::UpperFrameBoundStruct;
-use crate::execution::spark_operator::WindowFrameType;
 use crate::{
     errors::ExpressionError,
     execution::{
@@ -94,17 +91,22 @@ use crate::{
         },
         operators::{CopyExec, ExecutionError, ScanExec},
         serde::to_arrow_datatype,
-        spark_expression,
-        spark_expression::{
-            agg_expr::ExprStruct as AggExprStruct, expr::ExprStruct, 
literal::Value, AggExpr, Expr,
-            ScalarFunc,
-        },
-        spark_operator::{operator::OpStruct, BuildSide, JoinType, Operator},
-        spark_partitioning::{partitioning::PartitioningStruct, Partitioning as 
SparkPartitioning},
     },
 };
 
 use super::expressions::{create_named_struct::CreateNamedStruct, EvalMode};
+use datafusion_comet_proto::{
+    spark_expression::{
+        self, agg_expr::ExprStruct as AggExprStruct, expr::ExprStruct, 
literal::Value, AggExpr,
+        Expr, ScalarFunc,
+    },
+    spark_operator::{
+        self, lower_window_frame_bound::LowerFrameBoundStruct, 
operator::OpStruct,
+        upper_window_frame_bound::UpperFrameBoundStruct, BuildSide, JoinType, 
Operator,
+        WindowFrameType,
+    },
+    spark_partitioning::{partitioning::PartitioningStruct, Partitioning as 
SparkPartitioning},
+};
 use datafusion_comet_spark_expr::{
     Abs, Cast, DateTruncExec, HourExec, IfExpr, MinuteExec, SecondExec, 
TimestampTruncExec,
 };
@@ -1452,7 +1454,7 @@ impl PhysicalPlanner {
     /// Create a DataFusion windows physical expression from Spark physical 
expression
     fn create_window_expr<'a>(
         &'a self,
-        spark_expr: &'a crate::execution::spark_operator::WindowExpr,
+        spark_expr: &'a spark_operator::WindowExpr,
         input_schema: SchemaRef,
         partition_by: &[Arc<dyn PhysicalExpr>],
         sort_exprs: &[PhysicalSortExpr],
@@ -1833,17 +1835,16 @@ mod tests {
     use datafusion::{physical_plan::common::collect, prelude::SessionContext};
     use tokio::sync::mpsc;
 
-    use crate::execution::{
-        datafusion::planner::PhysicalPlanner,
-        operators::InputBatch,
+    use crate::execution::{datafusion::planner::PhysicalPlanner, 
operators::InputBatch};
+
+    use crate::execution::operators::ExecutionError;
+    use datafusion_comet_proto::{
+        spark_expression::expr::ExprStruct::*,
         spark_expression::{self, literal},
         spark_operator,
+        spark_operator::{operator::OpStruct, Operator},
     };
 
-    use crate::execution::operators::ExecutionError;
-    use spark_expression::expr::ExprStruct::*;
-    use spark_operator::{operator::OpStruct, Operator};
-
     #[test]
     fn test_unpack_dictionary_primitive() {
         let op_scan = Operator {
diff --git a/native/core/src/execution/jni_api.rs 
b/native/core/src/execution/jni_api.rs
index bc194238..d326b4f3 100644
--- a/native/core/src/execution/jni_api.rs
+++ b/native/core/src/execution/jni_api.rs
@@ -49,10 +49,10 @@ use crate::{
     execution::{
         datafusion::planner::PhysicalPlanner, 
metrics::utils::update_comet_metric,
         serde::to_arrow_datatype, shuffle::row::process_sorted_row_partition, 
sort::RdxSort,
-        spark_operator::Operator,
     },
     jvm_bridge::{jni_new_global_ref, JVMClasses},
 };
+use datafusion_comet_proto::spark_operator::Operator;
 use futures::stream::StreamExt;
 use jni::{
     objects::GlobalRef,
diff --git a/native/core/src/execution/mod.rs b/native/core/src/execution/mod.rs
index cdd42923..f1793570 100644
--- a/native/core/src/execution/mod.rs
+++ b/native/core/src/execution/mod.rs
@@ -32,24 +32,6 @@ pub(crate) mod utils;
 mod memory_pool;
 pub use memory_pool::*;
 
-// Include generated modules from .proto files.
-#[allow(missing_docs)]
-pub mod spark_expression {
-    include!(concat!("generated", "/spark.spark_expression.rs"));
-}
-
-// Include generated modules from .proto files.
-#[allow(missing_docs)]
-pub mod spark_partitioning {
-    include!(concat!("generated", "/spark.spark_partitioning.rs"));
-}
-
-// Include generated modules from .proto files.
-#[allow(missing_docs)]
-pub mod spark_operator {
-    include!(concat!("generated", "/spark.spark_operator.rs"));
-}
-
 #[cfg(test)]
 mod tests {
     #[test]
diff --git a/native/core/src/execution/serde.rs 
b/native/core/src/execution/serde.rs
index b88e3d65..659ce41f 100644
--- a/native/core/src/execution/serde.rs
+++ b/native/core/src/execution/serde.rs
@@ -17,19 +17,20 @@
 
 //! Ser/De for expression/operators.
 
-use super::{
-    operators::ExecutionError, spark_expression, spark_expression::DataType, 
spark_operator,
-};
-use crate::{
-    errors::ExpressionError,
-    execution::spark_expression::data_type::{
+use super::operators::ExecutionError;
+use crate::errors::ExpressionError;
+use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
+use arrow_schema::{Field, Fields};
+use datafusion_comet_proto::{
+    spark_expression,
+    spark_expression::data_type::{
         data_type_info::DatatypeStruct,
         DataTypeId,
         DataTypeId::{Bool, Bytes, Decimal, Double, Float, Int16, Int32, Int64, 
Int8, String},
     },
+    spark_expression::DataType,
+    spark_operator,
 };
-use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
-use arrow_schema::{Field, Fields};
 use prost::Message;
 use std::{io::Cursor, sync::Arc};
 
diff --git a/native/proto/Cargo.toml b/native/proto/Cargo.toml
new file mode 100644
index 00000000..29aba639
--- /dev/null
+++ b/native/proto/Cargo.toml
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-comet-proto"
+version = "0.1.0"
+homepage = "https://datafusion.apache.org/comet";
+repository = "https://github.com/apache/datafusion-comet";
+authors = ["Apache DataFusion <[email protected]>"]
+description = "Apache DataFusion Comet: High performance accelerator for 
Apache Spark"
+readme = "README.md"
+license = "Apache-2.0"
+edition = "2021"
+
+[dependencies]
+prost = "0.12.1"
+
+[build-dependencies]
+prost-build = "0.9.0"
+
+[features]
+default = []
+
+[lib]
+name = "datafusion_comet_proto"
+path = "src/lib.rs"
+
+
+
+
diff --git a/native/proto/README.md b/native/proto/README.md
new file mode 100644
index 00000000..34bd79c6
--- /dev/null
+++ b/native/proto/README.md
@@ -0,0 +1,23 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion Comet: Intermediate Representation of Query Plan
+
+This crate contains the protocol buffer definitions of Spark physical query 
plans 
+and is intended to be used as part of the Apache DataFusion Comet project.
\ No newline at end of file
diff --git a/native/core/build.rs b/native/proto/build.rs
similarity index 79%
rename from native/core/build.rs
rename to native/proto/build.rs
index 14ff0c11..e707f0c3 100644
--- a/native/core/build.rs
+++ b/native/proto/build.rs
@@ -20,20 +20,20 @@
 use std::{fs, io::Result, path::Path};
 
 fn main() -> Result<()> {
-    println!("cargo:rerun-if-changed=src/execution/proto/");
+    println!("cargo:rerun-if-changed=src/proto/");
 
-    let out_dir = "src/execution/generated";
+    let out_dir = "src/generated";
     if !Path::new(out_dir).is_dir() {
         fs::create_dir(out_dir)?;
     }
 
     prost_build::Config::new().out_dir(out_dir).compile_protos(
         &[
-            "src/execution/proto/expr.proto",
-            "src/execution/proto/partitioning.proto",
-            "src/execution/proto/operator.proto",
+            "src/proto/expr.proto",
+            "src/proto/partitioning.proto",
+            "src/proto/operator.proto",
         ],
-        &["src/execution/proto"],
+        &["src/proto"],
     )?;
     Ok(())
 }
diff --git a/native/core/src/execution/mod.rs b/native/proto/src/lib.rs
similarity index 74%
copy from native/core/src/execution/mod.rs
copy to native/proto/src/lib.rs
index cdd42923..eaf1253c 100644
--- a/native/core/src/execution/mod.rs
+++ b/native/proto/src/lib.rs
@@ -15,23 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! PoC of vectorization execution through JNI to Rust.
-pub mod datafusion;
-pub mod jni_api;
-
-pub mod kernels; // for benchmarking
-
-mod metrics;
-pub mod operators;
-pub mod serde;
-pub mod shuffle;
-pub(crate) mod sort;
-pub use datafusion_comet_spark_expr::timezone;
-pub(crate) mod utils;
-
-mod memory_pool;
-pub use memory_pool::*;
-
 // Include generated modules from .proto files.
 #[allow(missing_docs)]
 pub mod spark_expression {
@@ -49,12 +32,3 @@ pub mod spark_partitioning {
 pub mod spark_operator {
     include!(concat!("generated", "/spark.spark_operator.rs"));
 }
-
-#[cfg(test)]
-mod tests {
-    #[test]
-    fn it_works() {
-        let result = 2 + 2;
-        assert_eq!(result, 4);
-    }
-}
diff --git a/native/core/src/execution/proto/expr.proto 
b/native/proto/src/proto/expr.proto
similarity index 100%
rename from native/core/src/execution/proto/expr.proto
rename to native/proto/src/proto/expr.proto
diff --git a/native/core/src/execution/proto/operator.proto 
b/native/proto/src/proto/operator.proto
similarity index 100%
rename from native/core/src/execution/proto/operator.proto
rename to native/proto/src/proto/operator.proto
diff --git a/native/core/src/execution/proto/partitioning.proto 
b/native/proto/src/proto/partitioning.proto
similarity index 100%
rename from native/core/src/execution/proto/partitioning.proto
rename to native/proto/src/proto/partitioning.proto
diff --git a/spark/pom.xml b/spark/pom.xml
index 49672e0b..70ea3218 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -152,7 +152,7 @@ under the License.
             <configuration>
               
<protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
               <inputDirectories>
-                <include>../native/core/src/execution/proto</include>
+                <include>../native/proto/src/proto</include>
               </inputDirectories>
             </configuration>
           </execution>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to