advancedxy commented on code in PR #245:
URL:
https://github.com/apache/arrow-datafusion-comet/pull/245#discussion_r1554840875
##########
core/src/execution/datafusion/spark_hash.rs:
##########
@@ -364,107 +363,154 @@ mod tests {
use crate::execution::datafusion::spark_hash::{create_hashes, pmod};
use datafusion::arrow::array::{ArrayRef, Int32Array, Int64Array,
Int8Array, StringArray};
+ macro_rules! test_primitive {
+ ($ty:ty, $values:expr, $expected:expr) => {
+ let i = Arc::new(<$ty>::from($values)) as ArrayRef;
+ let mut hashes = vec![42; $values.len()];
+ create_hashes(&[i], &mut hashes).unwrap();
+ assert_eq!(hashes, $expected);
+ };
+ }
+
#[test]
fn test_i8() {
- let i = Arc::new(Int8Array::from(vec![
- Some(1),
- Some(0),
- Some(-1),
- Some(i8::MAX),
- Some(i8::MIN),
- ])) as ArrayRef;
- let mut hashes = vec![42; 5];
- create_hashes(&[i], &mut hashes).unwrap();
-
- // generated with Spark Murmur3_x86_32
- let expected = vec![0xdea578e3, 0x379fae8f, 0xa0590e3d, 0x43b4d8ed,
0x422a1365];
- assert_eq!(hashes, expected);
+ test_primitive!(
+ Int8Array,
+ vec![Some(1), Some(0), Some(-1), Some(i8::MAX), Some(i8::MIN)],
+ vec![0xdea578e3, 0x379fae8f, 0xa0590e3d, 0x43b4d8ed, 0x422a1365]
+ );
+ // with null input
+ test_primitive!(
+ Int8Array,
+ vec![Some(1), None, Some(-1), Some(i8::MAX), Some(i8::MIN)],
+ vec![0xdea578e3, 42, 0xa0590e3d, 0x43b4d8ed, 0x422a1365]
+ );
}
#[test]
fn test_i32() {
- let i = Arc::new(Int32Array::from(vec![
- Some(1),
- Some(0),
- Some(-1),
- Some(i32::MAX),
- Some(i32::MIN),
- ])) as ArrayRef;
- let mut hashes = vec![42; 5];
- create_hashes(&[i], &mut hashes).unwrap();
-
- // generated with Spark Murmur3_x86_32
- let expected = vec![0xdea578e3, 0x379fae8f, 0xa0590e3d, 0x07fb67e7,
0x2b1f0fc6];
- assert_eq!(hashes, expected);
+ test_primitive!(
+ Int32Array,
+ vec![Some(1), Some(0), Some(-1), Some(i32::MAX), Some(i32::MIN)],
+ vec![0xdea578e3, 0x379fae8f, 0xa0590e3d, 0x07fb67e7, 0x2b1f0fc6]
+ );
+ // with null input
+ test_primitive!(
+ Int32Array,
+ vec![
+ Some(1),
+ Some(0),
+ Some(-1),
+ None,
+ Some(i32::MAX),
+ Some(i32::MIN)
+ ],
+ vec![0xdea578e3, 0x379fae8f, 0xa0590e3d, 42, 0x07fb67e7,
0x2b1f0fc6]
+ );
}
#[test]
fn test_i64() {
- let i = Arc::new(Int64Array::from(vec![
- Some(1),
- Some(0),
- Some(-1),
- Some(i64::MAX),
- Some(i64::MIN),
- ])) as ArrayRef;
- let mut hashes = vec![42; 5];
- create_hashes(&[i], &mut hashes).unwrap();
-
- // generated with Spark Murmur3_x86_32
- let expected = vec![0x99f0149d, 0x9c67b85d, 0xc8008529, 0xa05b5d7b,
0xcd1e64fb];
- assert_eq!(hashes, expected);
+ test_primitive!(
+ Int64Array,
+ vec![Some(1), Some(0), Some(-1), Some(i64::MAX), Some(i64::MIN)],
+ vec![0x99f0149d, 0x9c67b85d, 0xc8008529, 0xa05b5d7b, 0xcd1e64fb]
+ );
+ // with null input
+ test_primitive!(
+ Int64Array,
+ vec![
+ Some(1),
+ Some(0),
+ Some(-1),
+ None,
+ Some(i64::MAX),
+ Some(i64::MIN)
+ ],
+ vec![0x99f0149d, 0x9c67b85d, 0xc8008529, 42, 0xa05b5d7b,
0xcd1e64fb]
+ );
}
#[test]
fn test_f32() {
- let i = Arc::new(Float32Array::from(vec![
- Some(1.0),
- Some(0.0),
- Some(-0.0),
- Some(-1.0),
- Some(99999999999.99999999999),
- Some(-99999999999.99999999999),
- ])) as ArrayRef;
- let mut hashes = vec![42; 6];
- create_hashes(&[i], &mut hashes).unwrap();
-
- // generated with Spark Murmur3_x86_32
- let expected = vec![
- 0xe434cc39, 0x379fae8f, 0x379fae8f, 0xdc0da8eb, 0xcbdc340f,
0xc0361c86,
- ];
- assert_eq!(hashes, expected);
+ test_primitive!(
+ Float32Array,
+ vec![
+ Some(1.0),
+ Some(0.0),
+ Some(-0.0),
+ Some(-1.0),
+ Some(99999999999.99999999999),
+ Some(-99999999999.99999999999),
+ ],
+ vec![0xe434cc39, 0x379fae8f, 0x379fae8f, 0xdc0da8eb, 0xcbdc340f,
0xc0361c86]
+ );
+ // with null input
+ test_primitive!(
+ Float32Array,
+ vec![
+ Some(1.0),
+ Some(0.0),
+ Some(-0.0),
+ Some(-1.0),
+ None,
+ Some(99999999999.99999999999),
+ Some(-99999999999.99999999999)
+ ],
+ vec![0xe434cc39, 0x379fae8f, 0x379fae8f, 0xdc0da8eb, 42,
0xcbdc340f, 0xc0361c86]
+ );
}
#[test]
fn test_f64() {
- let i = Arc::new(Float64Array::from(vec![
- Some(1.0),
- Some(0.0),
- Some(-0.0),
- Some(-1.0),
- Some(99999999999.99999999999),
- Some(-99999999999.99999999999),
- ])) as ArrayRef;
- let mut hashes = vec![42; 6];
- create_hashes(&[i], &mut hashes).unwrap();
-
- // generated with Spark Murmur3_x86_32
- let expected = vec![
- 0xe4876492, 0x9c67b85d, 0x9c67b85d, 0x13d81357, 0xb87e1595,
0xa0eef9f9,
- ];
- assert_eq!(hashes, expected);
+ test_primitive!(
+ Float64Array,
+ vec![
+ Some(1.0),
+ Some(0.0),
+ Some(-0.0),
+ Some(-1.0),
+ Some(99999999999.99999999999),
+ Some(-99999999999.99999999999),
+ ],
+ vec![0xe4876492, 0x9c67b85d, 0x9c67b85d, 0x13d81357, 0xb87e1595,
0xa0eef9f9]
+ );
+ // with null input
+ test_primitive!(
+ Float64Array,
+ vec![
+ Some(1.0),
+ Some(0.0),
+ Some(-0.0),
+ Some(-1.0),
+ None,
+ Some(99999999999.99999999999),
+ Some(-99999999999.99999999999)
+ ],
+ vec![0xe4876492, 0x9c67b85d, 0x9c67b85d, 0x13d81357, 42,
0xb87e1595, 0xa0eef9f9]
+ );
}
#[test]
fn test_str() {
- let i = Arc::new(StringArray::from(vec!["hello", "bar", "", "😁",
"天地"]));
- let mut hashes = vec![42; 5];
- create_hashes(&[i], &mut hashes).unwrap();
-
- // generated with Murmur3Hash(Seq(Literal("")), 42).eval() since Spark
is tested against
- // this as well
- let expected = vec![3286402344, 2486176763, 142593372, 885025535,
2395000894];
- assert_eq!(hashes, expected);
+ test_primitive!(
Review Comment:
Hmm, I can rename the macro to `test_hashes` since we have not add tests for
complex types yet.
However, I think we may have different view about the definition of
primitive type. Per my understanding, types other than complex types(Array,
Map, Struct and its nested ones) are considered primitive type. Spark's lexer
also consider String as a primitive type, see
https://github.com/apache/spark/blob/4d9dbb35aacb6bd8ca1e5a6dff5076034b5a042b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala#L61.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]