This is an automated email from the ASF dual-hosted git repository.
jakevin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 60ea6dea2f refactor byte_to_string and string_to_byte (#7091)
60ea6dea2f is described below
commit 60ea6dea2f04b5e77c89b4dc533325b778d3dc75
Author: parkma99 <[email protected]>
AuthorDate: Thu Jul 27 15:31:52 2023 +0800
refactor byte_to_string and string_to_byte (#7091)
* refactor byte_to_string and string_to_byte
* update
* use pub(crate)
---------
Co-authored-by: jakevin <[email protected]>
---
datafusion/proto/src/common.rs | 24 ++++++++++--------------
datafusion/proto/src/logical_plan/mod.rs | 12 ++++++------
datafusion/proto/src/physical_plan/mod.rs | 14 +++++++-------
3 files changed, 23 insertions(+), 27 deletions(-)
diff --git a/datafusion/proto/src/common.rs b/datafusion/proto/src/common.rs
index ed826f5874..cbbb469f08 100644
--- a/datafusion/proto/src/common.rs
+++ b/datafusion/proto/src/common.rs
@@ -17,26 +17,22 @@
use datafusion_common::{DataFusionError, Result};
-pub fn csv_delimiter_to_string(b: u8) -> Result<String> {
- let b = &[b];
- let b = std::str::from_utf8(b)
- .map_err(|_| DataFusionError::Internal("Invalid CSV
delimiter".to_owned()))?;
- Ok(b.to_owned())
-}
-
-pub fn str_to_byte(s: &String) -> Result<u8> {
+pub(crate) fn str_to_byte(s: &String, description: &str) -> Result<u8> {
if s.len() != 1 {
- return Err(DataFusionError::Internal(
- "Invalid CSV delimiter".to_owned(),
- ));
+ return Err(DataFusionError::Internal(format!(
+ "Invalid CSV {description}: expected single character, got {s}"
+ )));
}
Ok(s.as_bytes()[0])
}
-pub fn byte_to_string(b: u8) -> Result<String> {
+pub(crate) fn byte_to_string(b: u8, description: &str) -> Result<String> {
let b = &[b];
- let b = std::str::from_utf8(b)
- .map_err(|_| DataFusionError::Internal("Invalid CSV
delimiter".to_owned()))?;
+ let b = std::str::from_utf8(b).map_err(|_| {
+ DataFusionError::Internal(format!(
+ "Invalid CSV {description}: can not represent {b:0x?} as utf8"
+ ))
+ })?;
Ok(b.to_owned())
}
diff --git a/datafusion/proto/src/logical_plan/mod.rs
b/datafusion/proto/src/logical_plan/mod.rs
index 9d7017eec6..405c58c20b 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -353,10 +353,10 @@ impl AsLogicalPlan for LogicalPlanNode {
}) => {
let mut csv = CsvFormat::default()
.with_has_header(*has_header)
- .with_delimiter(str_to_byte(delimiter)?)
- .with_quote(str_to_byte(quote)?);
+ .with_delimiter(str_to_byte(delimiter,
"delimiter")?)
+ .with_quote(str_to_byte(quote, "quote")?);
if let
Some(protobuf::csv_format::OptionalEscape::Escape(escape)) = optional_escape {
- csv = csv.with_quote(str_to_byte(escape)?);
+ csv = csv.with_quote(str_to_byte(escape,
"escape")?);
}
Arc::new(csv)},
FileFormatType::Avro(..) => Arc::new(AvroFormat),
@@ -850,12 +850,12 @@ impl AsLogicalPlan for LogicalPlanNode {
FileFormatType::Parquet(protobuf::ParquetFormat {})
} else if let Some(csv) = any.downcast_ref::<CsvFormat>() {
FileFormatType::Csv(protobuf::CsvFormat {
- delimiter: byte_to_string(csv.delimiter())?,
+ delimiter: byte_to_string(csv.delimiter(),
"delimiter")?,
has_header: csv.has_header(),
- quote: byte_to_string(csv.quote())?,
+ quote: byte_to_string(csv.quote(), "quote")?,
optional_escape: if let Some(escape) =
csv.escape() {
Some(protobuf::csv_format::OptionalEscape::Escape(
- byte_to_string(escape)?,
+ byte_to_string(escape, "escape")?,
))
} else {
None
diff --git a/datafusion/proto/src/physical_plan/mod.rs
b/datafusion/proto/src/physical_plan/mod.rs
index cebe4eddcc..e97a773d34 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -51,8 +51,8 @@ use datafusion_common::{DataFusionError, Result};
use prost::bytes::BufMut;
use prost::Message;
+use crate::common::str_to_byte;
use crate::common::{byte_to_string, proto_error};
-use crate::common::{csv_delimiter_to_string, str_to_byte};
use crate::physical_plan::from_proto::{
parse_physical_expr, parse_physical_sort_expr,
parse_protobuf_file_scan_config,
};
@@ -155,13 +155,13 @@ impl AsExecutionPlan for PhysicalPlanNode {
registry,
)?,
scan.has_header,
- str_to_byte(&scan.delimiter)?,
- str_to_byte(&scan.quote)?,
+ str_to_byte(&scan.delimiter, "delimiter")?,
+ str_to_byte(&scan.quote, "quote")?,
if let
Some(protobuf::csv_scan_exec_node::OptionalEscape::Escape(
escape,
)) = &scan.optional_escape
{
- Some(str_to_byte(escape)?)
+ Some(str_to_byte(escape, "escape")?)
} else {
None
},
@@ -1079,11 +1079,11 @@ impl AsExecutionPlan for PhysicalPlanNode {
protobuf::CsvScanExecNode {
base_conf: Some(exec.base_config().try_into()?),
has_header: exec.has_header(),
- delimiter: csv_delimiter_to_string(exec.delimiter())?,
- quote: byte_to_string(exec.quote())?,
+ delimiter: byte_to_string(exec.delimiter(),
"delimiter")?,
+ quote: byte_to_string(exec.quote(), "quote")?,
optional_escape: if let Some(escape) = exec.escape() {
Some(protobuf::csv_scan_exec_node::OptionalEscape::Escape(
- byte_to_string(escape)?,
+ byte_to_string(escape, "escape")?,
))
} else {
None