adriangb commented on code in PR #22502:
URL: https://github.com/apache/datafusion/pull/22502#discussion_r3318695601
##########
datafusion/physical-plan/src/joins/hash_join/partitioned_hash_eval.rs:
##########
@@ -498,6 +549,172 @@ mod tests {
assert_eq!(compute_hash(&expr1), compute_hash(&expr2));
}
+ #[cfg(feature = "proto")]
+ mod proto_tests {
+ use super::*;
+ use arrow::datatypes::{DataType, Field};
+ use datafusion_common::internal_datafusion_err;
+ use datafusion_physical_expr_common::physical_expr::proto_decode::{
+ PhysicalExprDecode, PhysicalExprDecodeCtx,
+ };
+ use datafusion_physical_expr_common::physical_expr::proto_encode::{
+ PhysicalExprEncode, PhysicalExprEncodeCtx,
+ };
+ use datafusion_proto_models::protobuf;
+
+ struct TestEncoder;
+
+ impl PhysicalExprEncode for TestEncoder {
+ fn encode(
+ &self,
+ expr: &Arc<dyn PhysicalExpr>,
+ ) -> Result<protobuf::PhysicalExprNode> {
+ let ctx = PhysicalExprEncodeCtx::new(self);
+ expr.try_to_proto(&ctx)?.ok_or_else(|| {
+ internal_datafusion_err!("test encoder cannot encode
{expr:?}")
+ })
+ }
+ }
+
+ struct TestDecoder;
+
+ impl PhysicalExprDecode for TestDecoder {
+ fn decode(
+ &self,
+ node: &protobuf::PhysicalExprNode,
+ schema: &Schema,
+ ) -> Result<Arc<dyn PhysicalExpr>> {
+ let ctx = PhysicalExprDecodeCtx::new(schema, self);
+ match &node.expr_type {
+ Some(protobuf::physical_expr_node::ExprType::Column(_)) =>
{
+ Column::try_from_proto(node, &ctx)
+ }
+ _ => internal_err!("test decoder cannot decode {node:?}"),
+ }
+ }
+ }
+
+ fn test_decode_ctx<'a>(
+ schema: &'a Schema,
+ decoder: &'a TestDecoder,
+ ) -> PhysicalExprDecodeCtx<'a> {
+ PhysicalExprDecodeCtx::new(schema, decoder)
+ }
Review Comment:
Should we put these into
https://github.com/apache/datafusion/blob/main/datafusion/physical-expr/src/proto_test_util.rs
?
##########
datafusion/physical-plan/src/joins/hash_join/partitioned_hash_eval.rs:
##########
@@ -199,6 +201,55 @@ impl PhysicalExpr for HashExpr {
fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.description)
}
+
+ #[cfg(feature = "proto")]
+ fn try_to_proto(
+ &self,
+ ctx:
&datafusion_physical_expr_common::physical_expr::proto_encode::PhysicalExprEncodeCtx<'_>,
+ ) -> Result<Option<datafusion_proto_models::protobuf::PhysicalExprNode>> {
+ use datafusion_proto_models::protobuf;
+ let on_columns = ctx.encode_children_expressions(&self.on_columns)?;
+ Ok(Some(protobuf::PhysicalExprNode {
+ expr_id: None,
+ expr_type: Some(protobuf::physical_expr_node::ExprType::HashExpr(
+ protobuf::PhysicalHashExprNode {
+ on_columns,
+ seed0: self.seed(),
+ description: self.description.clone(),
+ },
+ )),
+ }))
+ }
+}
+
+#[cfg(feature = "proto")]
+impl HashExpr {
+ /// Reconstruct a [`HashExpr`] from its protobuf representation.
+ ///
+ /// Takes the whole [`PhysicalExprNode`], the exact inverse of what
+ /// [`PhysicalExpr::try_to_proto`] produces, so every expression's
+ /// `try_from_proto` shares one signature. Child sub-expressions are
+ /// decoded recursively via [`PhysicalExprDecodeCtx::decode`].
+ ///
+ /// [`PhysicalExprNode`]:
datafusion_proto_models::protobuf::PhysicalExprNode
+ /// [`PhysicalExpr::try_to_proto`]:
datafusion_physical_expr_common::physical_expr::PhysicalExpr::try_to_proto
+ /// [`PhysicalExprDecodeCtx::decode`]:
datafusion_physical_expr_common::physical_expr::proto_decode::PhysicalExprDecodeCtx::decode
+ pub fn try_from_proto(
+ node: &datafusion_proto_models::protobuf::PhysicalExprNode,
+ ctx:
&datafusion_physical_expr_common::physical_expr::proto_decode::PhysicalExprDecodeCtx<'_>,
+ ) -> Result<Arc<dyn PhysicalExpr>> {
+ use datafusion_proto_models::protobuf;
+ let hash_expr = match &node.expr_type {
+ Some(protobuf::physical_expr_node::ExprType::HashExpr(h)) => h,
+ _ => return internal_err!("PhysicalExprNode is not a HashExpr"),
+ };
Review Comment:
it might make sense to add a helper for this to `PhysicalExprDecodeCtx` so
we also get unified error messages, etc.
##########
datafusion/physical-plan/Cargo.toml:
##########
@@ -39,14 +39,18 @@ workspace = true
[features]
force_hash_collisions = []
-test_utils = ["arrow/test_utils"]
-tokio_coop = []
-tokio_coop_fallback = []
+# Enables `PhysicalExpr::try_to_proto` / `try_from_proto` hooks on the
+# physical expressions defined in this crate (e.g. `HashExpr`). Off by
+# default so consumers that never serialize plans pay nothing.
proto = [
"dep:datafusion-proto-models",
"dep:datafusion-proto-common",
+ "datafusion-physical-expr/proto",
"datafusion-physical-expr-common/proto",
]
+test_utils = ["arrow/test_utils"]
+tokio_coop = []
+tokio_coop_fallback = []
Review Comment:
I guess a linter auto sorted these but can we move them back up to avoid
conflicts?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]