This is an automated email from the ASF dual-hosted git repository.

ycai pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra-analytics.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 6556d251 CASSANDRA-19981: Fix invalid prefix char produced by 
BundleNameGenerator (#89)
6556d251 is described below

commit 6556d251bdddfbef3935da760bcda2b2387a4391
Author: Yifan Cai <[email protected]>
AuthorDate: Fri Oct 4 20:58:44 2024 -0700

    CASSANDRA-19981: Fix invalid prefix char produced by BundleNameGenerator 
(#89)
    
    Patch by Yifan Cai; Reviewed by         Doug Rohrer for CASSANDRA-19981
---
 .../bulkwriter/blobupload/BundleNameGenerator.java | 23 ++++++----------------
 .../blobupload/BundleNameGeneratorTest.java        | 19 ++++++++++++++++++
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git 
a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/blobupload/BundleNameGenerator.java
 
b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/blobupload/BundleNameGenerator.java
index afeead10..3e146f19 100644
--- 
a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/blobupload/BundleNameGenerator.java
+++ 
b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/blobupload/BundleNameGenerator.java
@@ -33,31 +33,20 @@ public class BundleNameGenerator
         this.commonName = '_' + jobId + '_' + sessionId + '_';
     }
 
+    static final char[] PREFIX_CHARS = 
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".toCharArray();
     /**
-     * We want to introduce variability in starting character of zip file 
name, to guarantee entropy on the object name to
+     * Prefix char is to introduce variability in the file name, to increase 
entropy on the object name to
      * avoid 503s from S3 to workaround the throughput limit that is based on 
the object name.
      * <p>
-     * We use 62 for mod, because 62 = 26 (lower case alphabets) + 26 (upper 
case alphabets) + 10 (digits)
-     * For e.g. seed = 512 will map to lower case alphabet q
+     * The prefix char is picked from the chars pool, i.e. a-z|A-Z|0-9, by 
modding the seed
      * </p>
      * @param seed a random integer to derive the prefix character
      * @return starting character to be used while naming zipped SSTables file
      */
-    private char generatePrefixChar(int seed)
+    static char generatePrefixChar(int seed)
     {
-        int group = seed % 62;
-        if (group <= 25)
-        {
-            return (char) ('a' + group);
-        }
-        else if (group <= 51)
-        {
-            return (char) ('A' + group - 26);
-        }
-        else
-        {
-            return (char) ('0' + group - 52);
-        }
+        int idx = Math.abs(seed % PREFIX_CHARS.length);
+        return PREFIX_CHARS[idx];
     }
 
     public String generate(BigInteger startToken, BigInteger endToken)
diff --git 
a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/blobupload/BundleNameGeneratorTest.java
 
b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/blobupload/BundleNameGeneratorTest.java
index bb3ef0d5..920b203f 100644
--- 
a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/blobupload/BundleNameGeneratorTest.java
+++ 
b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/blobupload/BundleNameGeneratorTest.java
@@ -20,10 +20,14 @@
 package org.apache.cassandra.spark.bulkwriter.blobupload;
 
 import java.math.BigInteger;
+import java.util.concurrent.TimeUnit;
 
 import org.junit.jupiter.api.Test;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.quicktheories.QuickTheory.qt;
+import static org.quicktheories.generators.SourceDSL.integers;
 
 class BundleNameGeneratorTest
 {
@@ -55,4 +59,19 @@ class BundleNameGeneratorTest
             assertEquals(expectedResults[i], 
nameGenerator.generate(BigInteger.valueOf(i), BigInteger.valueOf(i + 
1)).charAt(0));
         }
     }
+
+    @Test
+    void testGenerateValidBundleNamePrefixChar()
+    {
+        qt().withTestingTime(5, TimeUnit.SECONDS)
+            .withUnlimitedExamples()
+            .forAll(integers().all())
+            .checkAssert(i -> {
+                char prefix = BundleNameGenerator.generatePrefixChar(i);
+                assertTrue((prefix >= 'a' && prefix <= 'z')
+                           || (prefix >= 'A' && prefix <= 'Z')
+                           || (prefix >= '0' && prefix <= '9'),
+                           "Seed " + i + " produces invalid prefix " + prefix);
+            });
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to