alamb commented on code in PR #4659:
URL: https://github.com/apache/arrow-datafusion/pull/4659#discussion_r1051172520
##########
datafusion/proto/src/common.rs:
##########
@@ -33,6 +33,46 @@ pub fn str_to_byte(s: &String) -> Result<u8,
DataFusionError> {
Ok(s.as_bytes()[0])
}
-pub(crate) fn proto_error<S: Into<String>>(message: S) -> DataFusionError {
+pub fn byte_to_string(b: u8) -> Result<String, DataFusionError> {
+ let b = &[b];
+ let b = std::str::from_utf8(b)
+ .map_err(|_| DataFusionError::Internal("Invalid CSV
delimiter".to_owned()))?;
+ Ok(b.to_owned())
+}
+
+#[macro_export]
+macro_rules! convert_required {
+ ($PB:expr) => {{
+ if let Some(field) = $PB.as_ref() {
+ Ok(field.try_into()?)
+ } else {
+ Err(proto_error("Missing required field in protobuf"))
Review Comment:
it might be helpful (as a follow on PR) to include the field name that were
missing
##########
datafusion/proto/src/physical_plan/from_proto.rs:
##########
@@ -304,6 +306,96 @@ pub fn parse_protobuf_hash_partitioning(
}
}
+pub fn parse_protobuf_file_scan_config(
Review Comment:
👍
##########
datafusion/proto/proto/datafusion.proto:
##########
@@ -1330,77 +1337,4 @@ message ColumnStats {
ScalarValue max_value = 2;
uint32 null_count = 3;
uint32 distinct_count = 4;
-}
-
-message PartitionLocation {
Review Comment:
👍 -- I believe these are ballista specific
##########
datafusion/proto/proto/datafusion.proto:
##########
@@ -1133,17 +1133,24 @@ message ScanLimit {
message FileScanExecConf {
repeated FileGroup file_groups = 1;
- datafusion.Schema schema = 2;
+ Schema schema = 2;
repeated uint32 projection = 4;
ScanLimit limit = 5;
Statistics statistics = 6;
repeated string table_partition_cols = 7;
string object_store_url = 8;
+ repeated PhysicalSortExprNode output_ordering = 9;
+ repeated ConfigOption options = 10;
Review Comment:
I general I am not sure it is a good idea to serialize the config options as
upon deserialization they will be their own copy.
However that being said, perhaps the issue is that `FileScanExecConfig` has
a copy of the config options in the first place. I think that should get better
over time as we consolidate the configuration more
##########
datafusion/proto/src/physical_plan/to_proto.rs:
##########
@@ -40,99 +40,89 @@ use datafusion::physical_plan::expressions::{Avg,
BinaryExpr, Column, Max, Min,
use datafusion::physical_plan::{AggregateExpr, PhysicalExpr};
use crate::protobuf;
+use crate::protobuf::{ConfigOption, PhysicalSortExprNode};
use datafusion::logical_expr::BuiltinScalarFunction;
use datafusion::physical_expr::expressions::DateTimeIntervalExpr;
use datafusion::physical_expr::ScalarFunctionExpr;
+use datafusion::physical_plan::joins::utils::JoinSide;
use datafusion_common::DataFusionError;
-impl TryInto<protobuf::PhysicalExprNode> for Arc<dyn AggregateExpr> {
+impl TryFrom<Arc<dyn AggregateExpr>> for protobuf::PhysicalExprNode {
Review Comment:
👍
##########
datafusion/proto/src/lib.rs:
##########
@@ -17,1385 +17,13 @@
//! Serde code for logical plans and expressions.
-use datafusion_common::DataFusionError;
-
pub mod bytes;
-mod common;
-pub mod from_proto;
+pub mod common;
pub mod generated;
pub mod logical_plan;
pub mod physical_plan;
-pub mod to_proto;
pub use generated::datafusion as protobuf;
#[cfg(doctest)]
doc_comment::doctest!("../README.md", readme_example_test);
-
-impl From<from_proto::Error> for DataFusionError {
- fn from(e: from_proto::Error) -> Self {
- DataFusionError::Plan(e.to_string())
- }
-}
-
-impl From<to_proto::Error> for DataFusionError {
- fn from(e: to_proto::Error) -> Self {
- DataFusionError::Plan(e.to_string())
- }
-}
-
-#[cfg(test)]
-mod roundtrip_tests {
Review Comment:
These tests are moved to `datafusion/proto/src/logical_plan/mod.rs`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]