zhilinli123 commented on code in PR #4772:
URL: https://github.com/apache/seatunnel/pull/4772#discussion_r1226857544


##########
seatunnel-connectors-v2/connector-clickhouse/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseFactoryTest.java:
##########
@@ -24,12 +24,43 @@
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
+import net.jpountz.xxhash.XXHash64;
+import net.jpountz.xxhash.XXHashFactory;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
 public class ClickhouseFactoryTest {
+    private static final XXHash64 HASH_INSTANCE = 
XXHashFactory.fastestInstance().hash64();
 
     @Test
     public void testOptionRule() {
         Assertions.assertNotNull((new ClickhouseSourceFactory()).optionRule());
         Assertions.assertNotNull((new ClickhouseSinkFactory()).optionRule());
         Assertions.assertNotNull((new 
ClickhouseFileSinkFactory()).optionRule());
     }
+
+    public int getShard(Object shardValue) {
+        int shardWeightCount = 6;
+        int offset =
+                (int)
+                        ((HASH_INSTANCE.hash(
+                                                ByteBuffer.wrap(
+                                                        shardValue
+                                                                .toString()
+                                                                
.getBytes(StandardCharsets.UTF_8)),
+                                                0)
+                                        & Long.MAX_VALUE)
+                                % shardWeightCount);
+        return offset;
+    }
+
+    @Test
+    public void testShared() {
+        String a = "a,b,c,d,e,f";
+        for (Object o : Arrays.stream(a.split(",")).toArray()) {
+            System.out.println(getShard(o));

Review Comment:
   > Please use `Assertions` to make sure to result are right. Also please 
provide the test case to make sure to all shard can be selected. Tips: maybe we 
can use 10000 keys with random string to invoke `getShard` then get each shard 
index count which return by `getShard`. I believe when the number of keys more 
large, the shard index will be more balanced.
   ```
   public static void main(String[] args) {
           // Create an instance of the XXHash64 algorithm
           XXHashFactory factory = XXHashFactory.fastestInstance();
           XXHash64 hash64 = factory.hash64();
   
           // Define your input data
           byte[] input;
           ArrayList<String> strings = new ArrayList<>();
   
           Map<Long, Long> resultCount = new HashMap<>();
           for (int i = 1; i <= 100000; i++) {
               input = UUID.randomUUID().toString().getBytes();
               // Calculate the hash value
               long hashValue = hash64.hash(input, 0, input.length, 0);
   
               // Apply modulo operation to get a non-negative result
               int modulo = 6;
               long nonNegativeResult = (hashValue & Long.MAX_VALUE) % modulo;
               Long keyValue = resultCount.get(nonNegativeResult);  // 
获取当前键值对应的值
   
               if (keyValue != null) {
                   resultCount.put(nonNegativeResult, keyValue + 1L);
   
               } else {
                   resultCount.put(nonNegativeResult, 1L);
               }
   
               // Print the non-negative result
   //            System.out.println("Non-negative result: " + 
nonNegativeResult);
   
           }
           Long totalResult = 0L;
           for (Long key : resultCount.keySet()) {
               System.out.println("Key:"+key+" count:"+resultCount.get(key));
               totalResult+=resultCount.get(key);
           }
           System.out.println("Sum:"+totalResult);
   
           
   //        Console Result:
   //        Key:0 count:16651
   //        Key:1 count:16595
   //        Key:2 count:16648
   //        Key:3 count:16650
   //        Key:4 count:16946
   //        Key:5 count:16510
   //        Sum:100000
   
       }
   ```
   Looks good, PTAL
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to