This is an automated email from the ASF dual-hosted git repository.

jakevin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 60ea6dea2f refactor byte_to_string and string_to_byte (#7091)
60ea6dea2f is described below

commit 60ea6dea2f04b5e77c89b4dc533325b778d3dc75
Author: parkma99 <[email protected]>
AuthorDate: Thu Jul 27 15:31:52 2023 +0800

    refactor byte_to_string and string_to_byte (#7091)
    
    * refactor byte_to_string and string_to_byte
    
    * update
    
    * use pub(crate)
    
    ---------
    
    Co-authored-by: jakevin <[email protected]>
---
 datafusion/proto/src/common.rs            | 24 ++++++++++--------------
 datafusion/proto/src/logical_plan/mod.rs  | 12 ++++++------
 datafusion/proto/src/physical_plan/mod.rs | 14 +++++++-------
 3 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/datafusion/proto/src/common.rs b/datafusion/proto/src/common.rs
index ed826f5874..cbbb469f08 100644
--- a/datafusion/proto/src/common.rs
+++ b/datafusion/proto/src/common.rs
@@ -17,26 +17,22 @@
 
 use datafusion_common::{DataFusionError, Result};
 
-pub fn csv_delimiter_to_string(b: u8) -> Result<String> {
-    let b = &[b];
-    let b = std::str::from_utf8(b)
-        .map_err(|_| DataFusionError::Internal("Invalid CSV 
delimiter".to_owned()))?;
-    Ok(b.to_owned())
-}
-
-pub fn str_to_byte(s: &String) -> Result<u8> {
+pub(crate) fn str_to_byte(s: &String, description: &str) -> Result<u8> {
     if s.len() != 1 {
-        return Err(DataFusionError::Internal(
-            "Invalid CSV delimiter".to_owned(),
-        ));
+        return Err(DataFusionError::Internal(format!(
+            "Invalid CSV {description}: expected single character, got {s}"
+        )));
     }
     Ok(s.as_bytes()[0])
 }
 
-pub fn byte_to_string(b: u8) -> Result<String> {
+pub(crate) fn byte_to_string(b: u8, description: &str) -> Result<String> {
     let b = &[b];
-    let b = std::str::from_utf8(b)
-        .map_err(|_| DataFusionError::Internal("Invalid CSV 
delimiter".to_owned()))?;
+    let b = std::str::from_utf8(b).map_err(|_| {
+        DataFusionError::Internal(format!(
+            "Invalid CSV {description}: can not represent {b:0x?} as utf8"
+        ))
+    })?;
     Ok(b.to_owned())
 }
 
diff --git a/datafusion/proto/src/logical_plan/mod.rs 
b/datafusion/proto/src/logical_plan/mod.rs
index 9d7017eec6..405c58c20b 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -353,10 +353,10 @@ impl AsLogicalPlan for LogicalPlanNode {
                         }) => {
                             let mut csv = CsvFormat::default()
                             .with_has_header(*has_header)
-                            .with_delimiter(str_to_byte(delimiter)?)
-                            .with_quote(str_to_byte(quote)?);
+                            .with_delimiter(str_to_byte(delimiter, 
"delimiter")?)
+                            .with_quote(str_to_byte(quote, "quote")?);
                             if let 
Some(protobuf::csv_format::OptionalEscape::Escape(escape)) = optional_escape {
-                                csv = csv.with_quote(str_to_byte(escape)?);
+                                csv = csv.with_quote(str_to_byte(escape, 
"escape")?);
                             }
                             Arc::new(csv)},
                         FileFormatType::Avro(..) => Arc::new(AvroFormat),
@@ -850,12 +850,12 @@ impl AsLogicalPlan for LogicalPlanNode {
                         FileFormatType::Parquet(protobuf::ParquetFormat {})
                     } else if let Some(csv) = any.downcast_ref::<CsvFormat>() {
                         FileFormatType::Csv(protobuf::CsvFormat {
-                            delimiter: byte_to_string(csv.delimiter())?,
+                            delimiter: byte_to_string(csv.delimiter(), 
"delimiter")?,
                             has_header: csv.has_header(),
-                            quote: byte_to_string(csv.quote())?,
+                            quote: byte_to_string(csv.quote(), "quote")?,
                             optional_escape: if let Some(escape) = 
csv.escape() {
                                 
Some(protobuf::csv_format::OptionalEscape::Escape(
-                                    byte_to_string(escape)?,
+                                    byte_to_string(escape, "escape")?,
                                 ))
                             } else {
                                 None
diff --git a/datafusion/proto/src/physical_plan/mod.rs 
b/datafusion/proto/src/physical_plan/mod.rs
index cebe4eddcc..e97a773d34 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -51,8 +51,8 @@ use datafusion_common::{DataFusionError, Result};
 use prost::bytes::BufMut;
 use prost::Message;
 
+use crate::common::str_to_byte;
 use crate::common::{byte_to_string, proto_error};
-use crate::common::{csv_delimiter_to_string, str_to_byte};
 use crate::physical_plan::from_proto::{
     parse_physical_expr, parse_physical_sort_expr, 
parse_protobuf_file_scan_config,
 };
@@ -155,13 +155,13 @@ impl AsExecutionPlan for PhysicalPlanNode {
                     registry,
                 )?,
                 scan.has_header,
-                str_to_byte(&scan.delimiter)?,
-                str_to_byte(&scan.quote)?,
+                str_to_byte(&scan.delimiter, "delimiter")?,
+                str_to_byte(&scan.quote, "quote")?,
                 if let 
Some(protobuf::csv_scan_exec_node::OptionalEscape::Escape(
                     escape,
                 )) = &scan.optional_escape
                 {
-                    Some(str_to_byte(escape)?)
+                    Some(str_to_byte(escape, "escape")?)
                 } else {
                     None
                 },
@@ -1079,11 +1079,11 @@ impl AsExecutionPlan for PhysicalPlanNode {
                     protobuf::CsvScanExecNode {
                         base_conf: Some(exec.base_config().try_into()?),
                         has_header: exec.has_header(),
-                        delimiter: csv_delimiter_to_string(exec.delimiter())?,
-                        quote: byte_to_string(exec.quote())?,
+                        delimiter: byte_to_string(exec.delimiter(), 
"delimiter")?,
+                        quote: byte_to_string(exec.quote(), "quote")?,
                         optional_escape: if let Some(escape) = exec.escape() {
                             
Some(protobuf::csv_scan_exec_node::OptionalEscape::Escape(
-                                byte_to_string(escape)?,
+                                byte_to_string(escape, "escape")?,
                             ))
                         } else {
                             None

Reply via email to