Avoid rebuilding SASI indexes containing no values Patch by Alex Petrov; reviewed by Corentin Chary for CASSANDRA-12962
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/317a3ed6 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/317a3ed6 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/317a3ed6 Branch: refs/heads/trunk Commit: 317a3ed6271f02b6e942937882ff75a503b93f8a Parents: 5f54d42 Author: Alex Petrov <oleksandr.pet...@gmail.com> Authored: Fri Apr 7 11:42:49 2017 +0200 Committer: Alex Petrov <oleksandr.pet...@gmail.com> Committed: Fri Apr 7 11:42:49 2017 +0200 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../cassandra/index/sasi/conf/DataTracker.java | 30 ++++++++++++-------- .../index/sasi/disk/OnDiskIndexBuilder.java | 11 +++++++ .../index/sasi/disk/PerSSTableIndexWriter.java | 13 +++------ .../cassandra/index/sasi/SASIIndexTest.java | 19 +++++++++++-- 5 files changed, 51 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/317a3ed6/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 7b03b6c..fd09bd2 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 3.11.0 + * Avoid rebuilding SASI indexes containing no values (CASSANDRA-12962) * Add charset to Analyser input stream (CASSANDRA-13151) * Fix testLimitSSTables flake caused by concurrent flush (CASSANDRA-12820) * cdc column addition strikes again (CASSANDRA-13382) http://git-wip-us.apache.org/repos/asf/cassandra/blob/317a3ed6/src/java/org/apache/cassandra/index/sasi/conf/DataTracker.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/conf/DataTracker.java b/src/java/org/apache/cassandra/index/sasi/conf/DataTracker.java index af8e07d..d934c65 100644 --- a/src/java/org/apache/cassandra/index/sasi/conf/DataTracker.java +++ b/src/java/org/apache/cassandra/index/sasi/conf/DataTracker.java @@ -29,8 +29,7 @@ import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.index.sasi.SSTableIndex; import org.apache.cassandra.index.sasi.conf.view.View; import org.apache.cassandra.io.sstable.format.SSTableReader; - -import com.google.common.collect.Sets; +import org.apache.cassandra.utils.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -66,8 +65,9 @@ public class DataTracker */ public Iterable<SSTableReader> update(Collection<SSTableReader> oldSSTables, Collection<SSTableReader> newSSTables) { - final Set<SSTableIndex> newIndexes = getIndexes(newSSTables); - final Set<SSTableReader> indexedSSTables = getSSTables(newIndexes); + final Pair<Set<SSTableIndex>, Set<SSTableReader>> built = getBuiltIndexes(newSSTables); + final Set<SSTableIndex> newIndexes = built.left; + final Set<SSTableReader> indexedSSTables = built.right; View currentView, newView; do @@ -131,9 +131,10 @@ public class DataTracker update(toRemove, Collections.<SSTableReader>emptyList()); } - private Set<SSTableIndex> getIndexes(Collection<SSTableReader> sstables) + private Pair<Set<SSTableIndex>, Set<SSTableReader>> getBuiltIndexes(Collection<SSTableReader> sstables) { Set<SSTableIndex> indexes = new HashSet<>(sstables.size()); + Set<SSTableReader> builtSSTables = new HashSet<>(sstables.size()); for (SSTableReader sstable : sstables) { if (sstable.isMarkedCompacted()) @@ -143,6 +144,14 @@ public class DataTracker if (!indexFile.exists()) continue; + // if the index file is empty, we have to ignore it to avoid re-building, but it doesn't take + // a part in query process + if (indexFile.length() == 0) + { + builtSSTables.add(sstable); + continue; + } + SSTableIndex index = null; try @@ -160,7 +169,9 @@ public class DataTracker // Try to add new index to the set, if set already has such index, we'll simply release and move on. // This covers situation when sstable collection has the same sstable multiple // times because we don't know what kind of collection it actually is. - if (!indexes.add(index)) + if (indexes.add(index)) + builtSSTables.add(sstable); + else index.release(); } catch (Throwable t) @@ -171,11 +182,6 @@ public class DataTracker } } - return indexes; - } - - private Set<SSTableReader> getSSTables(Set<SSTableIndex> indexes) - { - return Sets.newHashSet(indexes.stream().map(SSTableIndex::getSSTable).collect(Collectors.toList())); + return Pair.create(indexes, builtSSTables); } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/317a3ed6/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java index 4946f06..0298539 100644 --- a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java +++ b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java @@ -246,7 +246,18 @@ public class OnDiskIndexBuilder { // no terms means there is nothing to build if (terms.isEmpty()) + { + try + { + file.createNewFile(); + } + catch (IOException e) + { + throw new FSWriteError(e, file); + } + return false; + } // split terms into suffixes only if it's text, otherwise (even if CONTAINS is set) use terms in original form SA sa = ((termComparator instanceof UTF8Type || termComparator instanceof AsciiType) && mode == Mode.CONTAINS) http://git-wip-us.apache.org/repos/asf/cassandra/blob/317a3ed6/src/java/org/apache/cassandra/index/sasi/disk/PerSSTableIndexWriter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/disk/PerSSTableIndexWriter.java b/src/java/org/apache/cassandra/index/sasi/disk/PerSSTableIndexWriter.java index 9fa4e87..708dd9d 100644 --- a/src/java/org/apache/cassandra/index/sasi/disk/PerSSTableIndexWriter.java +++ b/src/java/org/apache/cassandra/index/sasi/disk/PerSSTableIndexWriter.java @@ -79,7 +79,6 @@ public class PerSSTableIndexWriter implements SSTableFlushObserver private final OperationType source; private final AbstractType<?> keyValidator; - private final Map<ColumnDefinition, ColumnIndex> supportedIndexes; @VisibleForTesting protected final Map<ColumnDefinition, Index> indexes; @@ -96,8 +95,9 @@ public class PerSSTableIndexWriter implements SSTableFlushObserver this.keyValidator = keyValidator; this.descriptor = descriptor; this.source = source; - this.supportedIndexes = supportedIndexes; this.indexes = new HashMap<>(); + for (Map.Entry<ColumnDefinition, ColumnIndex> entry : supportedIndexes.entrySet()) + indexes.put(entry.getKey(), newIndex(entry.getValue())); } public void begin() @@ -116,18 +116,13 @@ public class PerSSTableIndexWriter implements SSTableFlushObserver Row row = (Row) unfiltered; - supportedIndexes.keySet().forEach((column) -> { + indexes.forEach((column, index) -> { ByteBuffer value = ColumnIndex.getValueOf(column, row, nowInSec); if (value == null) return; - ColumnIndex columnIndex = supportedIndexes.get(column); - if (columnIndex == null) - return; - - Index index = indexes.get(column); if (index == null) - indexes.put(column, (index = newIndex(columnIndex))); + throw new IllegalArgumentException("No index exists for column " + column.name.toString()); index.add(value.duplicate(), currentKey, currentKeyPosition); }); http://git-wip-us.apache.org/repos/asf/cassandra/blob/317a3ed6/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java index 3bd27e6..37d1961 100644 --- a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java +++ b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java @@ -17,7 +17,6 @@ */ package org.apache.cassandra.index.sasi; -import java.io.File; import java.io.FileWriter; import java.io.Writer; import java.nio.ByteBuffer; @@ -35,6 +34,7 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.config.CFMetaData; import org.apache.cassandra.config.ColumnDefinition; +import org.apache.cassandra.index.Index; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.*; import org.apache.cassandra.cql3.Term; @@ -61,7 +61,6 @@ import org.apache.cassandra.index.sasi.memory.IndexMemtable; import org.apache.cassandra.index.sasi.plan.QueryController; import org.apache.cassandra.index.sasi.plan.QueryPlan; import org.apache.cassandra.io.sstable.SSTable; -import org.apache.cassandra.io.sstable.format.big.BigFormat; import org.apache.cassandra.schema.IndexMetadata; import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.KeyspaceParams; @@ -1909,6 +1908,22 @@ public class SASIIndexTest } @Test + public void testIndexRebuild() throws Exception + { + ColumnFamilyStore store = Keyspace.open(KS_NAME).getColumnFamilyStore(CLUSTERING_CF_NAME_1); + + executeCQL(CLUSTERING_CF_NAME_1, "INSERT INTO %s.%s (name, nickname) VALUES (?, ?)", "Alex", "ifesdjeen"); + + store.forceBlockingFlush(); + + for (Index index : store.indexManager.listIndexes()) + { + SASIIndex idx = (SASIIndex) index; + Assert.assertFalse(idx.getIndex().init(store.getLiveSSTables()).iterator().hasNext()); + } + } + + @Test public void testInvalidIndexOptions() { ColumnFamilyStore store = Keyspace.open(KS_NAME).getColumnFamilyStore(CF_NAME);