This is an automated email from the ASF dual-hosted git repository. smiklosovic pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/cassandra.git
commit ce983cb89c7e9c2256cb55c3f288c49242ea11d0 Author: Stefan Miklosovic <[email protected]> AuthorDate: Fri Feb 27 13:22:26 2026 +0100 Harden the possible range of values for max dictionary size and max total sample size for dictionary training patch by Stefan Miklosovic; reviewed by Yifan Cai for CASSANDRA-21194 --- CHANGES.txt | 1 + .../compression/CompressionDictionaryManager.java | 2 +- .../io/compress/IDictionaryCompressor.java | 2 +- .../CompressionDictionaryCommandGroup.java | 4 +- .../CompressionDictionaryIntegrationTest.java | 47 ++++++++++++++++++++++ 5 files changed, 52 insertions(+), 4 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index f35e9a343d..36ad265565 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Harden the possible range of values for max dictionary size and max total sample size for dictionary training (CASSANDRA-21194) * Implement a guardrail ensuring that minimum training frequency parameter is provided in ZstdDictionaryCompressor (CASSANDRA-21192) * Replace manual referencing with ColumnFamilyStore.selectAndReference when training a dictionary (CASSANDRA-21188) * Forbid nodes upgrading to a version which cannot read existing log entries (CASSANDRA-21174) diff --git a/src/java/org/apache/cassandra/db/compression/CompressionDictionaryManager.java b/src/java/org/apache/cassandra/db/compression/CompressionDictionaryManager.java index 6fc7c3d8cd..ca30510c8a 100644 --- a/src/java/org/apache/cassandra/db/compression/CompressionDictionaryManager.java +++ b/src/java/org/apache/cassandra/db/compression/CompressionDictionaryManager.java @@ -471,7 +471,7 @@ public class CompressionDictionaryManager implements CompressionDictionaryManage else resolvedValue = userSuppliedValue; - return new DataStorageSpec.IntKibibytesBound(resolvedValue).toBytes(); + return new DataStorageSpec.IntBytesBound(resolvedValue).toBytes(); } catch (Throwable t) { diff --git a/src/java/org/apache/cassandra/io/compress/IDictionaryCompressor.java b/src/java/org/apache/cassandra/io/compress/IDictionaryCompressor.java index 9296810575..46f46258b2 100644 --- a/src/java/org/apache/cassandra/io/compress/IDictionaryCompressor.java +++ b/src/java/org/apache/cassandra/io/compress/IDictionaryCompressor.java @@ -59,7 +59,7 @@ public interface IDictionaryCompressor<T extends CompressionDictionary> { try { - new DataStorageSpec.IntKibibytesBound(resolvedValue).toBytes(); + new DataStorageSpec.IntBytesBound(resolvedValue).toBytes(); } catch (Throwable t) { diff --git a/src/java/org/apache/cassandra/tools/nodetool/CompressionDictionaryCommandGroup.java b/src/java/org/apache/cassandra/tools/nodetool/CompressionDictionaryCommandGroup.java index 22a3affcbf..e5f371a161 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/CompressionDictionaryCommandGroup.java +++ b/src/java/org/apache/cassandra/tools/nodetool/CompressionDictionaryCommandGroup.java @@ -182,7 +182,7 @@ public class CompressionDictionaryCommandGroup { try { - new DataStorageSpec.IntKibibytesBound(trainingMaxDictionarySize).toBytes(); + new DataStorageSpec.IntBytesBound(trainingMaxDictionarySize).toBytes(); } catch (Throwable t) { @@ -195,7 +195,7 @@ public class CompressionDictionaryCommandGroup { try { - new DataStorageSpec.IntKibibytesBound(trainingMaxTotalSampleSize).toBytes(); + new DataStorageSpec.IntBytesBound(trainingMaxTotalSampleSize).toBytes(); } catch (Throwable t) { diff --git a/test/unit/org/apache/cassandra/db/compression/CompressionDictionaryIntegrationTest.java b/test/unit/org/apache/cassandra/db/compression/CompressionDictionaryIntegrationTest.java index 4bf4107712..2c5915f3c8 100644 --- a/test/unit/org/apache/cassandra/db/compression/CompressionDictionaryIntegrationTest.java +++ b/test/unit/org/apache/cassandra/db/compression/CompressionDictionaryIntegrationTest.java @@ -234,6 +234,33 @@ public class CompressionDictionaryIntegrationTest extends CQLTester .hasMessageContaining("No SSTables available for training"); } + @Test + public void testMaxBoundForTrainingParameters() + { + String table = createTable(getTableCql()); + ColumnFamilyStore cfs = Keyspace.open(keyspace()).getColumnFamilyStore(table); + CompressionDictionaryManager manager = cfs.compressionDictionaryManager(); + + assertThatThrownBy(() -> manager.train(false, Map.of(TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME, "5GiB", + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME, TRAINING_MAX_TOTAL_SAMPLE_SIZE))) + .as("Should fail when " + TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME + " is bigger than " + Integer.MAX_VALUE) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Invalid value for training_max_dictionary_size: 5GiB"); + + assertThatThrownBy(() -> manager.train(false, Map.of(TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME, TRAINING_MAX_DICTIONARY_SIZE, + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME, "5GiB"))) + .as("Should fail when " + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + " is bigger than " + Integer.MAX_VALUE) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Invalid value for " + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + ": 5GiB"); + } + + @Test + public void testInvalidTableCreation() + { + assertThatThrownBy(() -> createTable(getTableCqlWithInvalidTotalMaxSampleSize())).isInstanceOf(RuntimeException.class); + assertThatThrownBy(() -> createTable(getTableCqlWithInvalidMaxDictionarySize())).isInstanceOf(RuntimeException.class); + } + private String getTableCqlWithChunkLength() { return "CREATE TABLE %s (pk text PRIMARY KEY, data text) " + @@ -254,4 +281,24 @@ public class CompressionDictionaryIntegrationTest extends CQLTester '\'' + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + "': '" + DEFAULT_TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_VALUE + '\'' + '}'; } + + private String getTableCqlWithInvalidTotalMaxSampleSize() + { + return "CREATE TABLE %s (pk text PRIMARY KEY, data text) " + + "WITH compression = {" + + "'class': 'ZstdDictionaryCompressor'," + + '\'' + TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME + "': '" + DEFAULT_TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_VALUE + "'," + + '\'' + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + "': '5GiB'" + + '}'; + } + + private String getTableCqlWithInvalidMaxDictionarySize() + { + return "CREATE TABLE %s (pk text PRIMARY KEY, data text) " + + "WITH compression = {" + + "'class': 'ZstdDictionaryCompressor'," + + '\'' + TRAINING_MAX_DICTIONARY_SIZE_PARAMETER_NAME + "': '5GiB'," + + '\'' + TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_NAME + "': '" + DEFAULT_TRAINING_MAX_TOTAL_SAMPLE_SIZE_PARAMETER_VALUE + '\'' + + '}'; + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
