Dandandan commented on a change in pull request #9376:
URL: https://github.com/apache/arrow/pull/9376#discussion_r571419585
##########
File path: rust/datafusion/src/physical_plan/crypto_expressions.rs
##########
@@ -49,58 +58,142 @@ fn sha_process<D: SHA2Digest + Default>(input: &str) ->
SHA2DigestOutput<D> {
digest.finalize()
}
-macro_rules! crypto_unary_string_function {
- ($NAME:ident, $FUNC:expr) => {
- /// crypto function that accepts Utf8 or LargeUtf8 and returns Utf8
string
- pub fn $NAME<T: StringOffsetSizeTrait>(
- args: &[ArrayRef],
- ) -> Result<GenericStringArray<i32>> {
- if args.len() != 1 {
- return Err(DataFusionError::Internal(format!(
- "{:?} args were supplied but {} takes exactly one
argument",
- args.len(),
- String::from(stringify!($NAME)),
- )));
- }
+/// # Errors
+/// This function errors when:
+/// * the number of arguments is not 1
+/// * the first argument is not castable to a `GenericStringArray`
+fn unary_binary_function<T, R, F>(
+ args: &[&dyn Array],
+ op: F,
+ name: &str,
+) -> Result<BinaryArray>
+where
+ R: AsRef<[u8]>,
+ T: StringOffsetSizeTrait,
+ F: Fn(&str) -> R,
+{
+ if args.len() != 1 {
+ return Err(DataFusionError::Internal(format!(
+ "{:?} args were supplied but {} takes exactly one argument",
+ args.len(),
+ name,
+ )));
+ }
+
+ let array = args[0]
+ .as_any()
+ .downcast_ref::<GenericStringArray<T>>()
+ .ok_or_else(|| {
+ DataFusionError::Internal("failed to downcast to
string".to_string())
+ })?;
- let array = args[0]
- .as_any()
- .downcast_ref::<GenericStringArray<T>>()
- .unwrap();
+ // first map is the iterator, second is for the `Option<_>`
+ Ok(array.iter().map(|x| x.map(|x| op(x))).collect())
+}
+
+fn handle<F, R>(args: &[ColumnarValue], op: F, name: &str) ->
Result<ColumnarValue>
+where
+ R: AsRef<[u8]>,
+ F: Fn(&str) -> R,
+{
+ match &args[0] {
+ ColumnarValue::Array(a) => match a.data_type() {
+ DataType::Utf8 => {
+ Ok(ColumnarValue::Array(Arc::new(unary_binary_function::<
+ i32,
+ _,
+ _,
+ >(
+ &[a.as_ref()], op, name
+ )?)))
+ }
+ DataType::LargeUtf8 => {
+ Ok(ColumnarValue::Array(Arc::new(unary_binary_function::<
+ i64,
+ _,
+ _,
+ >(
+ &[a.as_ref()], op, name
+ )?)))
+ }
+ other => Err(DataFusionError::Internal(format!(
+ "Unsupported data type {:?} for function {}",
+ other, name,
+ ))),
+ },
+ ColumnarValue::Scalar(scalar) => match scalar {
+ ScalarValue::Utf8(a) => {
+ let result = a.as_ref().map(|x| (op)(x).as_ref().to_vec());
+ Ok(ColumnarValue::Scalar(ScalarValue::Binary(result)))
+ }
+ ScalarValue::LargeUtf8(a) => {
+ let result = a.as_ref().map(|x| (op)(x).as_ref().to_vec());
+ Ok(ColumnarValue::Scalar(ScalarValue::Binary(result)))
+ }
+ other => Err(DataFusionError::Internal(format!(
+ "Unsupported data type {:?} for function {}",
+ other, name,
+ ))),
+ },
+ }
+}
- // first map is the iterator, second is for the `Option<_>`
- Ok(array.iter().map(|x| x.map(|x| $FUNC(x))).collect())
- }
- };
+fn md5_array<T: StringOffsetSizeTrait>(
+ args: &[&dyn Array],
+) -> Result<GenericStringArray<i32>> {
+ unary_string_function::<T, i32, _, _>(args, md5_process, "md5")
}
-macro_rules! crypto_unary_binary_function {
- ($NAME:ident, $FUNC:expr) => {
- /// crypto function that accepts Utf8 or LargeUtf8 and returns Binary
- pub fn $NAME<T: StringOffsetSizeTrait>(
- args: &[ArrayRef],
- ) -> Result<GenericBinaryArray<i32>> {
- if args.len() != 1 {
- return Err(DataFusionError::Internal(format!(
- "{:?} args were supplied but {} takes exactly one
argument",
- args.len(),
- String::from(stringify!($NAME)),
- )));
+/// crypto function that accepts Utf8 or LargeUtf8 and returns a
[`ColumnarValue`]
+pub fn md5(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ match &args[0] {
+ ColumnarValue::Array(a) => match a.data_type() {
+ DataType::Utf8 =>
Ok(ColumnarValue::Array(Arc::new(md5_array::<i32>(&[
+ a.as_ref()
+ ])?))),
+ DataType::LargeUtf8 => {
+ Ok(ColumnarValue::Array(Arc::new(md5_array::<i64>(&[
+ a.as_ref()
+ ])?)))
}
+ other => Err(DataFusionError::Internal(format!(
+ "Unsupported data type {:?} for function md5",
+ other,
+ ))),
+ },
+ ColumnarValue::Scalar(scalar) => match scalar {
+ ScalarValue::Utf8(a) => {
+ let result = a.as_ref().map(|x| md5_process(x));
+ Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result)))
+ }
+ ScalarValue::LargeUtf8(a) => {
+ let result = a.as_ref().map(|x| md5_process(x));
Review comment:
```suggestion
let result = a.as_ref().map(md5_process);
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]