blambov commented on code in PR #2786:
URL: https://github.com/apache/cassandra/pull/2786#discussion_r1355053495
##########
src/java/org/apache/cassandra/config/DatabaseDescriptor.java:
##########
@@ -1441,14 +1446,62 @@ private static void
validateSSTableFormatFactories(Iterable<SSTableFormat.Factor
private static ImmutableMap<String, Supplier<SSTableFormat<?, ?>>>
validateAndMatchSSTableFormatOptions(Iterable<SSTableFormat.Factory> factories,
Map<String, Map<String, String>> options)
{
+ ImmutableMap.Builder<String, Supplier<SSTableFormat<?, ?>>>
providersBuilderLeft = ImmutableMap.builder();
+ ImmutableMap.Builder<String, Supplier<SSTableFormat<?, ?>>>
providersBuilderRight = ImmutableMap.builder();
ImmutableMap.Builder<String, Supplier<SSTableFormat<?, ?>>>
providersBuilder = ImmutableMap.builder();
if (options == null)
options = ImmutableMap.of();
+
+ // add default sstable format options for bti and big
for (SSTableFormat.Factory factory : factories)
{
- Map<String, String> formatOptions =
options.getOrDefault(factory.name(), ImmutableMap.of());
- providersBuilder.put(factory.name(), () ->
factory.getInstance(ImmutableMap.copyOf(formatOptions)));
+ providersBuilderLeft.put(factory.name(), () ->
factory.getInstance(SSTableFormatParams.DEFAULT_FORMAT_MAP.apply(factory.name())));
}
+
+ for (String key : options.keySet())
+ {
+ Map<String, String> formatOptions = Maps.newLinkedHashMap();
+ Map<String, String> map = options.get(key);
Review Comment:
Iterate on `options.entrySet()` so that you can get the map together with
the key without the work of finding the entry in the map.
##########
src/java/org/apache/cassandra/config/DatabaseDescriptor.java:
##########
@@ -1441,14 +1446,62 @@ private static void
validateSSTableFormatFactories(Iterable<SSTableFormat.Factor
private static ImmutableMap<String, Supplier<SSTableFormat<?, ?>>>
validateAndMatchSSTableFormatOptions(Iterable<SSTableFormat.Factory> factories,
Map<String, Map<String, String>> options)
{
+ ImmutableMap.Builder<String, Supplier<SSTableFormat<?, ?>>>
providersBuilderLeft = ImmutableMap.builder();
+ ImmutableMap.Builder<String, Supplier<SSTableFormat<?, ?>>>
providersBuilderRight = ImmutableMap.builder();
ImmutableMap.Builder<String, Supplier<SSTableFormat<?, ?>>>
providersBuilder = ImmutableMap.builder();
if (options == null)
options = ImmutableMap.of();
+
+ // add default sstable format options for bti and big
for (SSTableFormat.Factory factory : factories)
{
- Map<String, String> formatOptions =
options.getOrDefault(factory.name(), ImmutableMap.of());
- providersBuilder.put(factory.name(), () ->
factory.getInstance(ImmutableMap.copyOf(formatOptions)));
+ providersBuilderLeft.put(factory.name(), () ->
factory.getInstance(SSTableFormatParams.DEFAULT_FORMAT_MAP.apply(factory.name())));
Review Comment:
Why not do this after the custom-type construction, checking if
`format.name()` has already been added?
##########
src/java/org/apache/cassandra/schema/SSTableFormatParams.java:
##########
@@ -0,0 +1,383 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.schema;
+
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.TreeMap;
+import java.util.function.Function;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.MoreObjects;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.exceptions.ConfigurationException;
+import org.apache.cassandra.exceptions.SyntaxException;
+import org.apache.cassandra.io.sstable.format.big.BigFormat;
+import org.apache.cassandra.io.sstable.format.bti.BtiFormat;
+import org.apache.cassandra.utils.BloomCalculations;
+
+import static java.lang.String.format;
+import static
org.apache.cassandra.schema.SSTableFormatParams.Option.BLOOM_FILTER_FP_CHANCE;
+import static
org.apache.cassandra.schema.SSTableFormatParams.Option.CRC_CHECK_CHANCE;
+import static
org.apache.cassandra.schema.SSTableFormatParams.Option.MAX_INDEX_INTERVAL;
+import static
org.apache.cassandra.schema.SSTableFormatParams.Option.MIN_INDEX_INTERVAL;
+import static org.apache.cassandra.schema.SSTableFormatParams.Option.TYPE;
+import static org.apache.cassandra.schema.TableParams.fail;
+
+/**
+ * This class describes the params that is used to define the sstable's
+ * format params when modifing the table schema.
+ */
+public final class SSTableFormatParams
+{
+ private final ImmutableMap<Option, String> options;;
+
+ public static final String DEFAUL_BIG_TYPE = "big";
+ public static final double DEFAULT_CRC_CHECK_CHANCE = 1.0;
+ public static final double INVALID_BLOOM_FILTER_FP_CHANCE = -1;
+ public static final double DEFAULT_BLOOM_FILTER_FP_CHANCE = 0.01;
+ public static final int DEFAULT_MAX_INDEX_INTERVAL = 2048;
+ public static final int DEFAULT_MIN_INDEX_INTERVAL = 128;
+
+ private static final Map<String, SSTableFormatParams> CONFIGURATIONS = new
HashMap<>();
+ public static final SSTableFormatParams DEFAULT_BIG_SSTABLE =
SSTableFormatParams.create(ImmutableMap.of(TYPE, DEFAUL_BIG_TYPE,
Review Comment:
This should be called just `DEFAULT`.
##########
src/java/org/apache/cassandra/schema/SSTableFormatParams.java:
##########
@@ -0,0 +1,383 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.schema;
+
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.TreeMap;
+import java.util.function.Function;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.MoreObjects;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.exceptions.ConfigurationException;
+import org.apache.cassandra.exceptions.SyntaxException;
+import org.apache.cassandra.io.sstable.format.big.BigFormat;
+import org.apache.cassandra.io.sstable.format.bti.BtiFormat;
+import org.apache.cassandra.utils.BloomCalculations;
+
+import static java.lang.String.format;
+import static
org.apache.cassandra.schema.SSTableFormatParams.Option.BLOOM_FILTER_FP_CHANCE;
+import static
org.apache.cassandra.schema.SSTableFormatParams.Option.CRC_CHECK_CHANCE;
+import static
org.apache.cassandra.schema.SSTableFormatParams.Option.MAX_INDEX_INTERVAL;
+import static
org.apache.cassandra.schema.SSTableFormatParams.Option.MIN_INDEX_INTERVAL;
+import static org.apache.cassandra.schema.SSTableFormatParams.Option.TYPE;
+import static org.apache.cassandra.schema.TableParams.fail;
+
+/**
+ * This class describes the params that is used to define the sstable's
+ * format params when modifing the table schema.
+ */
+public final class SSTableFormatParams
+{
+ private final ImmutableMap<Option, String> options;;
+
+ public static final String DEFAUL_BIG_TYPE = "big";
+ public static final double DEFAULT_CRC_CHECK_CHANCE = 1.0;
+ public static final double INVALID_BLOOM_FILTER_FP_CHANCE = -1;
+ public static final double DEFAULT_BLOOM_FILTER_FP_CHANCE = 0.01;
+ public static final int DEFAULT_MAX_INDEX_INTERVAL = 2048;
+ public static final int DEFAULT_MIN_INDEX_INTERVAL = 128;
+
+ private static final Map<String, SSTableFormatParams> CONFIGURATIONS = new
HashMap<>();
+ public static final SSTableFormatParams DEFAULT_BIG_SSTABLE =
SSTableFormatParams.create(ImmutableMap.of(TYPE, DEFAUL_BIG_TYPE,
+
BLOOM_FILTER_FP_CHANCE,
String.valueOf(DEFAULT_BLOOM_FILTER_FP_CHANCE),
+
CRC_CHECK_CHANCE,
String.valueOf(DEFAULT_CRC_CHECK_CHANCE),
+
MIN_INDEX_INTERVAL,
String.valueOf(DEFAULT_MIN_INDEX_INTERVAL),
+
MAX_INDEX_INTERVAL,
String.valueOf(DEFAULT_MAX_INDEX_INTERVAL)));
+ private static final Map<String, SSTableFormatParams>
+ CONFIGURATION_DEFINITIONS =
expandDefinitions(DatabaseDescriptor.getSSTableFormatOptions());
+ public static final ImmutableSet<String> SSTABLE_FORMAT_TYPES =
ImmutableSet.of(BigFormat.NAME, BtiFormat.NAME);
+ // big/bti sstable format with non-lcs default bf chance
+ public static Function<String, Map<String, String>> DEFAULT_FORMAT_MAP =
new Function<String, Map<String, String>>() {
+ @Override
+ public Map<String, String> apply(String type)
+ {
+ return ImmutableMap.of(TYPE.getName(), type,
+ BLOOM_FILTER_FP_CHANCE.getName(),
String.valueOf(DEFAULT_BLOOM_FILTER_FP_CHANCE),
+ CRC_CHECK_CHANCE.getName(),
String.valueOf(DEFAULT_CRC_CHECK_CHANCE),
+ MIN_INDEX_INTERVAL.getName(),
String.valueOf(DEFAULT_MIN_INDEX_INTERVAL),
+ MAX_INDEX_INTERVAL.getName(),
String.valueOf(DEFAULT_MAX_INDEX_INTERVAL));
+ }
+ };
+
+ // wether these deprecated properties are defined by ddl or not
+ private Map<Option, Object> schemaProperties = new HashMap<>();
+
+ public enum Option
+ {
+ TYPE("type"),
+ BLOOM_FILTER_FP_CHANCE("bloom_filter_fp_chance"),
Review Comment:
Parsing the format parameters should be completely up to the format factory.
In other words, the only option this needs to understand is `type`, so that it
can expand and merge the schema options with the yaml ones.
`bloom_filter_fp_chance` and `crc_check_chance` should be parsed by the
`AbstractSSTableFormat`, or better a new shared superclass of `Big`- and
`BtiFormat` that parses and holds the processed values.
`min/max_index_interval` are properties relevant to `BigFormat` only and
should be parsed by it.
##########
test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java:
##########
@@ -954,4 +955,113 @@ public void testAlterKeyspaceWithIfExists() throws
Throwable
assertInvalidThrow(InvalidRequestException.class, "ALTER KEYSPACE ks1
WITH replication= { 'class' : 'SimpleStrategy', 'replication_factor' : 1 }");
}
+
+ @Test
+ public void testAlterTableWitSSTableFormatParams() throws Throwable
+ {
+ //use test cassandra.yaml
+ createTable("CREATE TABLE %s (a text, b int, c int, primary key (a,
b)) WITH sstable_format = 'bti-fast'");
+ assertSchemaOption("sstable_format", map("bloom_filter_fp_chance",
"0.01", "crc_check_chance", "0.1", "max_index_interval", "2048",
"min_index_interval", "128", "type", "bti-fast"));
Review Comment:
We don't want to lose the per-node configurability of the format, i.e. the
specific property values should only be given if the user specified them.
Otherwise these will override the settings in `cassandra.yaml`.
We can't return just the type as the user gave it, but we `map("type",
"bti-fast")` should work just as well.
##########
src/java/org/apache/cassandra/config/Config.java:
##########
@@ -363,11 +363,12 @@ public MemtableOptions()
public static class SSTableConfig
{
public String selected_format = BigFormat.NAME;
- public Map<String, Map<String, String>> format = new HashMap<>();
+ public Map<String, Map<String, String>> sstable_format_options = new
HashMap<>();
Review Comment:
This is already under `sstable`, rename to `format_options`.
##########
src/java/org/apache/cassandra/config/DatabaseDescriptor.java:
##########
@@ -1441,14 +1446,62 @@ private static void
validateSSTableFormatFactories(Iterable<SSTableFormat.Factor
private static ImmutableMap<String, Supplier<SSTableFormat<?, ?>>>
validateAndMatchSSTableFormatOptions(Iterable<SSTableFormat.Factory> factories,
Map<String, Map<String, String>> options)
{
+ ImmutableMap.Builder<String, Supplier<SSTableFormat<?, ?>>>
providersBuilderLeft = ImmutableMap.builder();
+ ImmutableMap.Builder<String, Supplier<SSTableFormat<?, ?>>>
providersBuilderRight = ImmutableMap.builder();
ImmutableMap.Builder<String, Supplier<SSTableFormat<?, ?>>>
providersBuilder = ImmutableMap.builder();
if (options == null)
options = ImmutableMap.of();
+
+ // add default sstable format options for bti and big
for (SSTableFormat.Factory factory : factories)
{
- Map<String, String> formatOptions =
options.getOrDefault(factory.name(), ImmutableMap.of());
- providersBuilder.put(factory.name(), () ->
factory.getInstance(ImmutableMap.copyOf(formatOptions)));
+ providersBuilderLeft.put(factory.name(), () ->
factory.getInstance(SSTableFormatParams.DEFAULT_FORMAT_MAP.apply(factory.name())));
}
+
+ for (String key : options.keySet())
+ {
+ Map<String, String> formatOptions = Maps.newLinkedHashMap();
+ Map<String, String> map = options.get(key);
+ Iterator<String> iterator = map.keySet().iterator();
+ // as the map's value parsed by yaml may be numeric type, but
sstable format factory need string
+ // but what about change the type of sstable format factory to a
map that value is Object?
+ while (iterator.hasNext()){
+ String k = iterator.next();
+ String v = String.valueOf(map.get(k));
+ formatOptions.put(k, v);
+ }
+ // TODO add validation for yaml configuration number that must
meet all the sstable param options ?
Review Comment:
Doesn't `factory.getInstance(formatOptions)` below do that?
##########
src/java/org/apache/cassandra/io/sstable/format/SSTableWriter.java:
##########
@@ -275,7 +275,7 @@ public final void abort()
protected Map<MetadataType, MetadataComponent> finalizeMetadata()
{
return
metadataCollector.finalizeMetadata(getPartitioner().getClass().getCanonicalName(),
-
metadata().params.bloomFilterFpChance,
+
metadata().params.getBloomFilterFpChance(),
Review Comment:
This should be set by `SortedTableWriter` (the descendant that creates the
filter component) using a value given by the filter as the metadata may have
changed between the filter construction and this call.
This can be done in a separate ticket.
##########
src/java/org/apache/cassandra/schema/SchemaEvent.java:
##########
@@ -248,6 +249,14 @@ private HashMap<String, Serializable>
repr(CompressionParams compr)
return ret;
}
+ private HashMap<String, Serializable> repr(SSTableFormatParams
sstableFormat)
+ {
+ HashMap<String, Serializable> ret = new HashMap<>();
+ if (sstableFormat == null) return ret;
+ ret.putAll(sstableFormat.asMap());
Review Comment:
This will put the expanded definition in the schema, won't it?
This would defeat one of the main objectives of the ticket. A user must be
able to specify e.g. `bti-fast` format for a table, and have that treated as
specified by the individual node's `cassandra.yaml` rather than the
coordinator's.
##########
src/java/org/apache/cassandra/db/memtable/Flushing.java:
##########
@@ -104,6 +104,7 @@ static FlushRunnable flushRunnable(ColumnFamilyStore cfs,
Directories.DataDirectory flushLocation)
{
Memtable.FlushablePartitionSet<?> flushSet =
memtable.getFlushSet(from, to);
+ // TODO make this per table ,use table's sstable_format type
Review Comment:
This needs to be implemented.
##########
src/java/org/apache/cassandra/schema/TableParams.java:
##########
@@ -194,13 +200,54 @@ public void validate()
if (cdc && memtable.factory().writesShouldSkipCommitLog())
fail("CDC cannot work if writes skip the commit log. Check your
memtable configuration.");
+
+ // conflict validation
+ if
(ssTableFormatParams.isIllegalDeprecatedProperty(SSTableFormatParams.Option.BLOOM_FILTER_FP_CHANCE))
+ {
+ fail("Cannot define bloom_filter_fp_chance AND sstable_format'
bloom_filter_fp_chance at the same time with different value.");
+ }
+
+ if
(ssTableFormatParams.isIllegalDeprecatedProperty(SSTableFormatParams.Option.CRC_CHECK_CHANCE))
+ {
+ fail("Cannot define crc_check_chance AND sstable_format'
crc_check_chance at the same time with different value.");
+ }
+
+ if
(ssTableFormatParams.isIllegalDeprecatedProperty(SSTableFormatParams.Option.MIN_INDEX_INTERVAL))
+ {
+ fail("Cannot define min_index_interval AND sstable_format'
min_index_interval at the same time with different value.");
+ }
+
+ if
(ssTableFormatParams.isIllegalDeprecatedProperty(SSTableFormatParams.Option.MAX_INDEX_INTERVAL))
+ {
+ fail("Cannot define max_index_interval AND sstable_format'
max_index_interval at the same time with different value.");
+ }
}
- private static void fail(String format, Object... args)
+ public static void fail(String format, Object... args)
{
throw new ConfigurationException(format(format, args));
}
+ public double getBloomFilterFpChance()
+ {
+ return bloomFilterFpChance == compaction.defaultBloomFilterFbChance()
? ssTableFormatParams.getBloomFilterFpChance(compaction) : bloomFilterFpChance;
Review Comment:
This means that we practically have two values for this parameter in the
system, which is bound to cause confusion.
Can we not keep only the one in the format? E.g. call methods on the sstable
format if we see one of the deprecated flags and let that both handle conflict
and store the value? (Note: this way the schema will also be updated to use the
format-level properties the next time it is modified.)
##########
src/java/org/apache/cassandra/schema/SchemaKeyspace.java:
##########
@@ -602,6 +604,11 @@ private static void addTableParamsToRowBuilder(TableParams
params, Row.SimpleBui
// incremental_backups is enabled, to avoid RTE in pre-4.2 versioned
node during upgrades
if (!params.incrementalBackups)
builder.add("incremental_backups", false);
+
+ // As above, only add the sstable_format column if the table uses a
non-default sstable_fromat configuration to avoid RTE
Review Comment:
Now that we have a compatibility mode setting, it makes better sense to use
the compatibility level to determine if the setting should be given in the
schema.
This is better done in a separate ticket.
##########
src/java/org/apache/cassandra/schema/TableParams.java:
##########
@@ -71,20 +72,22 @@ public String toString()
public final String comment;
public final boolean allowAutoSnapshot;
- public final double bloomFilterFpChance;
- public final double crcCheckChance;
+ // should we also add some log to remind that the paramter is deprecated ?
like crc_check_chance in CompressionParams
+ @Deprecated public final double bloomFilterFpChance;
+ @Deprecated public final double crcCheckChance;
public final int gcGraceSeconds;
public final boolean incrementalBackups;
public final int defaultTimeToLive;
public final int memtableFlushPeriodInMs;
- public final int minIndexInterval;
- public final int maxIndexInterval;
+ @Deprecated public final int minIndexInterval;
+ @Deprecated public final int maxIndexInterval;
public final SpeculativeRetryPolicy speculativeRetry;
public final SpeculativeRetryPolicy additionalWritePolicy;
public final CachingParams caching;
public final CompactionParams compaction;
public final CompressionParams compression;
public final MemtableParams memtable;
+ public final SSTableFormatParams ssTableFormatParams;
Review Comment:
In line with the fields above, this should be just `ssTableFormat`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]