This is an automated email from the ASF dual-hosted git repository.

smiklosovic pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git

commit ce983cb89c7e9c2256cb55c3f288c49242ea11d0
Author: Stefan Miklosovic <[email protected]>
AuthorDate: Fri Feb 27 13:22:26 2026 +0100

    Harden the possible range of values for max dictionary size and max total 
sample size for dictionary training
    
    patch by Stefan Miklosovic; reviewed by Yifan Cai for CASSANDRA-21194
---
 CHANGES.txt                                        |  1 +
 .../compression/CompressionDictionaryManager.java  |  2 +-
 .../io/compress/IDictionaryCompressor.java         |  2 +-
 .../CompressionDictionaryCommandGroup.java         |  4 +-
 .../CompressionDictionaryIntegrationTest.java      | 47 ++++++++++++++++++++++
 5 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index f35e9a343d..36ad265565 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 5.1
+ * Harden the possible range of values for max dictionary size and max total 
sample size for dictionary training (CASSANDRA-21194)
  * Implement a guardrail ensuring that minimum training frequency parameter is 
provided in ZstdDictionaryCompressor (CASSANDRA-21192)
  * Replace manual referencing with ColumnFamilyStore.selectAndReference when 
training a dictionary (CASSANDRA-21188)
  * Forbid nodes upgrading to a version which cannot read existing log entries 
(CASSANDRA-21174)
diff --git 
a/src/java/org/apache/cassandra/db/compression/CompressionDictionaryManager.java
 
b/src/java/org/apache/cassandra/db/compression/CompressionDictionaryManager.java
index 6fc7c3d8cd..ca30510c8a 100644
--- 
a/src/java/org/apache/cassandra/db/compression/CompressionDictionaryManager.java
+++ 
b/src/java/org/apache/cassandra/db/compression/CompressionDictionaryManager.java
@@ -471,7 +471,7 @@ public class CompressionDictionaryManager implements 
CompressionDictionaryManage
             else
                 resolvedValue = userSuppliedValue;
 
-            return new 
DataStorageSpec.IntKibibytesBound(resolvedValue).toBytes();
+            return new DataStorageSpec.IntBytesBound(resolvedValue).toBytes();
         }
         catch (Throwable t)
         {
diff --git 
a/src/java/org/apache/cassandra/io/compress/IDictionaryCompressor.java 
b/src/java/org/apache/cassandra/io/compress/IDictionaryCompressor.java
index 9296810575..46f46258b2 100644
--- a/src/java/org/apache/cassandra/io/compress/IDictionaryCompressor.java
+++ b/src/java/org/apache/cassandra/io/compress/IDictionaryCompressor.java
@@ -59,7 +59,7 @@ public interface IDictionaryCompressor<T extends 
CompressionDictionary>
     {
         try
         {
-            new DataStorageSpec.IntKibibytesBound(resolvedValue).toBytes();
+            new DataStorageSpec.IntBytesBound(resolvedValue).toBytes();
         }
         catch (Throwable t)
         {
diff --git 
a/src/java/org/apache/cassandra/tools/nodetool/CompressionDictionaryCommandGroup.java
 
b/src/java/org/apache/cassandra/tools/nodetool/CompressionDictionaryCommandGroup.java
index 22a3affcbf..e5f371a161 100644
--- 
a/src/java/org/apache/cassandra/tools/nodetool/CompressionDictionaryCommandGroup.java
+++ 
b/src/java/org/apache/cassandra/tools/nodetool/CompressionDictionaryCommandGroup.java
@@ -182,7 +182,7 @@ public class CompressionDictionaryCommandGroup
             {
                 try
                 {
-                    new 
DataStorageSpec.IntKibibytesBound(trainingMaxDictionarySize).toBytes();
+                    new 
DataStorageSpec.IntBytesBound(trainingMaxDictionarySize).toBytes();
                 }
                 catch (Throwable t)
                 {
@@ -195,7 +195,7 @@ public class CompressionDictionaryCommandGroup
             {
                 try
                 {
-                    new 
DataStorageSpec.IntKibibytesBound(trainingMaxTotalSampleSize).toBytes();
+                    new 
DataStorageSpec.IntBytesBound(trainingMaxTotalSampleSize).toBytes();
                 }
                 catch (Throwable t)
                 {
diff --git 
a/test/unit/org/apache/cassandra/db/compression/CompressionDictionaryIntegrationTest.java
 
b/test/unit/org/apache/cassandra/db/compression/CompressionDictionaryIntegrationTest.java
index 4bf4107712..2c5915f3c8 100644
--- 
a/test/unit/org/apache/cassandra/db/compression/CompressionDictionaryIntegrationTest.java
+++ 
b/test/unit/org/apache/cassandra/db/compression/CompressionDictionaryIntegrationTest.java
@@ -234,6 +234,33 @@ public class CompressionDictionaryIntegrationTest extends 
CQLTester
         .hasMessageContaining("No SSTables available for training");
     }
 
+    @Test
+    public void testMaxBoundForTrainingParameters()
+    {
+        String table = createTable(getTableCql());
+        ColumnFamilyStore cfs = 
Keyspace.open(keyspace()).getColumnFamilyStore(table);
+        CompressionDictionaryManager manager = 
cfs.compressionDictionaryManager();
+
+        assertThatThrownBy(() -> manager.train(false, 
Map.of(TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME, "5GiB",
+                                                             
TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME, TRAINING_MAX_TOTAL_SAMPLE_SIZE)))
+        .as("Should fail when " + TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME 
+ " is bigger than " + Integer.MAX_VALUE)
+        .isInstanceOf(IllegalArgumentException.class)
+        .hasMessageContaining("Invalid value for training_max_dictionary_size: 
5GiB");
+
+        assertThatThrownBy(() -> manager.train(false, 
Map.of(TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME, 
TRAINING_MAX_DICTIONARY_SIZE,
+                                                             
TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME, "5GiB")))
+        .as("Should fail when " + 
TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + " is bigger than " + 
Integer.MAX_VALUE)
+        .isInstanceOf(IllegalArgumentException.class)
+        .hasMessageContaining("Invalid value for " + 
TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + ": 5GiB");
+    }
+
+    @Test
+    public void testInvalidTableCreation()
+    {
+        assertThatThrownBy(() -> 
createTable(getTableCqlWithInvalidTotalMaxSampleSize())).isInstanceOf(RuntimeException.class);
+        assertThatThrownBy(() -> 
createTable(getTableCqlWithInvalidMaxDictionarySize())).isInstanceOf(RuntimeException.class);
+    }
+
     private String getTableCqlWithChunkLength()
     {
         return "CREATE TABLE %s (pk text PRIMARY KEY, data text) " +
@@ -254,4 +281,24 @@ public class CompressionDictionaryIntegrationTest extends 
CQLTester
                '\'' + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + "': '" + 
DEFAULT_TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_VALUE + '\'' +
                '}';
     }
+
+    private String getTableCqlWithInvalidTotalMaxSampleSize()
+    {
+        return "CREATE TABLE %s (pk text PRIMARY KEY, data text) " +
+               "WITH compression = {" +
+               "'class': 'ZstdDictionaryCompressor'," +
+               '\'' + TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME + "': '" + 
DEFAULT_TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_VALUE + "'," +
+               '\'' + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + "': 
'5GiB'" +
+               '}';
+    }
+
+    private String getTableCqlWithInvalidMaxDictionarySize()
+    {
+        return "CREATE TABLE %s (pk text PRIMARY KEY, data text) " +
+               "WITH compression = {" +
+               "'class': 'ZstdDictionaryCompressor'," +
+               '\'' + TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME + "': 
'5GiB'," +
+               '\'' + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + "': '" + 
DEFAULT_TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_VALUE + '\'' +
+               '}';
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to