michalursa commented on a change in pull request #12067:
URL: https://github.com/apache/arrow/pull/12067#discussion_r826485814



##########
File path: cpp/src/arrow/compute/exec/key_hash.h
##########
@@ -32,76 +32,161 @@ namespace compute {
 // Implementations are based on xxh3 32-bit algorithm description from:
 // https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md
 //
-class Hashing {
+class Hashing32 {
  public:
-  static void hash_fixed(int64_t hardware_flags, uint32_t num_keys, uint32_t 
length_key,
-                         const uint8_t* keys, uint32_t* hashes);
+  static void hash_fixed(int64_t hardware_flags, bool combine_hashes, uint32_t 
num_keys,
+                         uint64_t length_key, const uint8_t* keys, uint32_t* 
hashes,
+                         uint32_t* temp_hashes_for_combine);
 
-  static void hash_varlen(int64_t hardware_flags, uint32_t num_rows,
+  static void hash_varlen(int64_t hardware_flags, bool combine_hashes, 
uint32_t num_rows,
                           const uint32_t* offsets, const uint8_t* 
concatenated_keys,
-                          uint32_t* temp_buffer,  // Needs to hold 4 x 32-bit 
per row
-                          uint32_t* hashes);
+                          uint32_t* hashes, uint32_t* temp_hashes_for_combine);
+
+  static void hash_varlen(int64_t hardware_flags, bool combine_hashes, 
uint32_t num_rows,
+                          const uint64_t* offsets, const uint8_t* 
concatenated_keys,
+                          uint32_t* hashes, uint32_t* temp_hashes_for_combine);
 
   static void HashMultiColumn(const std::vector<KeyEncoder::KeyColumnArray>& 
cols,
                               KeyEncoder::KeyEncoderContext* ctx, uint32_t* 
out_hash);
 
  private:
-  static const uint32_t PRIME32_1 = 0x9E3779B1;  // 
0b10011110001101110111100110110001
-  static const uint32_t PRIME32_2 = 0x85EBCA77;  // 
0b10000101111010111100101001110111
-  static const uint32_t PRIME32_3 = 0xC2B2AE3D;  // 
0b11000010101100101010111000111101
-  static const uint32_t PRIME32_4 = 0x27D4EB2F;  // 
0b00100111110101001110101100101111
-  static const uint32_t PRIME32_5 = 0x165667B1;  // 
0b00010110010101100110011110110001
-
-  static void HashCombine(KeyEncoder::KeyEncoderContext* ctx, uint32_t 
num_rows,
-                          uint32_t* accumulated_hash, const uint32_t* 
next_column_hash);
+  static const uint32_t PRIME32_1 = 0x9E3779B1;
+  static const uint32_t PRIME32_2 = 0x85EBCA77;
+  static const uint32_t PRIME32_3 = 0xC2B2AE3D;
+  static const uint32_t PRIME32_4 = 0x27D4EB2F;
+  static const uint32_t PRIME32_5 = 0x165667B1;
+  static const uint32_t kCombineConst = 0x9e3779b9UL;
+  static const int64_t kStripeSize = 4 * sizeof(uint32_t);
+
+  static inline uint32_t avalanche(uint32_t acc) {

Review comment:
       They should be camel case. Pre-existing code (original implementation) 
used snake case. I can change that.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to