This is an automated email from the ASF dual-hosted git repository.

maedhroz pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git

commit eb208d3561eaf645f74f60b54c71ebe5bfc24c33
Author: Mike Adamson <madam...@datastax.com>
AuthorDate: Tue May 9 12:29:01 2023 +0100

    Add basic text analysis to SAI, including "case_sensitive", "normalize", 
and "ascii" modes
    
    patch by Mike Adamson; reviewed by Caleb Rackliffe and Andres de la Peña 
for CASSANDRA-18479
---
 .build/cassandra-deps-template.xml                 |   8 +-
 .build/parent-pom-template.xml                     |  10 +-
 .../apache/cassandra/index/sai/IndexContext.java   |  31 ++--
 .../cassandra/index/sai/StorageAttachedIndex.java  |   6 +-
 .../index/sai/analyzer/AbstractAnalyzer.java       |  26 ++++
 .../index/sai/analyzer/NonTokenizingAnalyzer.java  | 147 +++++++++++++++++++
 .../index/sai/analyzer/NonTokenizingOptions.java   | 156 +++++++++++++++++++++
 .../index/sai/analyzer/filter/BasicFilters.java    |  82 +++++++++++
 .../index/sai/analyzer/filter/FilterPipeline.java  |  69 +++++++++
 .../analyzer/filter/FilterPipelineExecutor.java    |  40 ++++++
 .../index/sai/disk/v1/SSTableIndexWriter.java      |   2 +-
 .../index/sai/memory/TrieMemoryIndex.java          |   2 +-
 .../cassandra/index/sai/plan/Expression.java       |   2 +-
 .../apache/cassandra/index/sai/plan/Operation.java |   2 +-
 .../sai/plan/StorageAttachedIndexSearcher.java     |   2 +-
 .../index/sai/virtual/ColumnIndexesSystemView.java |   2 +-
 .../index/sasi/analyzer/filter/StemmerFactory.java |  55 +++++---
 .../test/sai/ReplicaFilteringProtectionTest.java   |  67 +++++++++
 .../sai/analyzer/NonTokenizingAnalyzerTest.java    |  75 ++++++++++
 .../sai/analyzer/filter/BasicFiltersTest.java      |  73 ++++++++++
 .../index/sai/cql/StorageAttachedIndexDDLTest.java | 106 ++++++++++++++
 .../index/sai/virtual/IndexesSystemViewTest.java   |   2 +-
 22 files changed, 904 insertions(+), 61 deletions(-)

diff --git a/.build/cassandra-deps-template.xml 
b/.build/cassandra-deps-template.xml
index 0185e8fbbf..5e3ebc0e58 100644
--- a/.build/cassandra-deps-template.xml
+++ b/.build/cassandra-deps-template.xml
@@ -324,10 +324,6 @@
       <groupId>org.hdrhistogram</groupId>
       <artifactId>HdrHistogram</artifactId>
     </dependency>
-    <dependency>
-      <groupId>com.github.rholder</groupId>
-      <artifactId>snowball-stemmer</artifactId>
-    </dependency>
     <dependency>
       <groupId>com.googlecode.concurrent-trees</groupId>
       <artifactId>concurrent-trees</artifactId>
@@ -376,5 +372,9 @@
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-core</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-analysis-common</artifactId>
+    </dependency>
   </dependencies>
 </project>
diff --git a/.build/parent-pom-template.xml b/.build/parent-pom-template.xml
index b9ca4769f4..50695a348d 100644
--- a/.build/parent-pom-template.xml
+++ b/.build/parent-pom-template.xml
@@ -882,11 +882,6 @@
           </exclusion>
         </exclusions>
       </dependency>
-      <dependency>
-        <groupId>com.github.rholder</groupId>
-        <artifactId>snowball-stemmer</artifactId>
-        <version>1.3.0.581.1</version>
-      </dependency>
       <dependency>
         <groupId>com.googlecode.concurrent-trees</groupId>
         <artifactId>concurrent-trees</artifactId>
@@ -1055,6 +1050,11 @@
         <artifactId>lucene-core</artifactId>
         <version>9.7.0</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.lucene</groupId>
+        <artifactId>lucene-analysis-common</artifactId>
+        <version>9.7.0</version>
+      </dependency>
       <dependency>
         <groupId>com.carrotsearch.randomizedtesting</groupId>
         <artifactId>randomizedtesting-runner</artifactId>
diff --git a/src/java/org/apache/cassandra/index/sai/IndexContext.java 
b/src/java/org/apache/cassandra/index/sai/IndexContext.java
index 14a6db6c4b..d0a5e08b16 100644
--- a/src/java/org/apache/cassandra/index/sai/IndexContext.java
+++ b/src/java/org/apache/cassandra/index/sai/IndexContext.java
@@ -90,8 +90,7 @@ public class IndexContext
     private final IndexViewManager viewManager;
     private final IndexMetrics indexMetrics;
     private final ColumnQueryMetrics columnQueryMetrics;
-    private final AbstractAnalyzer.AnalyzerFactory indexAnalyzerFactory;
-    private final AbstractAnalyzer.AnalyzerFactory queryAnalyzerFactory;
+    private final AbstractAnalyzer.AnalyzerFactory analyzerFactory;
     private final PrimaryKey.Factory primaryKeyFactory;
 
     public IndexContext(String keyspace,
@@ -119,9 +118,8 @@ public class IndexContext
         this.columnQueryMetrics = isLiteral() ? new 
ColumnQueryMetrics.TrieIndexMetrics(this)
                                               : new 
ColumnQueryMetrics.BalancedTreeIndexMetrics(this);
 
-        // We currently only support the NoOpAnalyzer
-        this.indexAnalyzerFactory = 
AbstractAnalyzer.fromOptions(getValidator(), Collections.emptyMap());
-        this.queryAnalyzerFactory = 
AbstractAnalyzer.fromOptions(getValidator(), Collections.emptyMap());
+        this.analyzerFactory = indexMetadata == null ? 
AbstractAnalyzer.fromOptions(getValidator(), Collections.emptyMap())
+                                                     : 
AbstractAnalyzer.fromOptions(getValidator(), indexMetadata.options);
     }
 
     public AbstractType<?> keyValidator()
@@ -199,23 +197,12 @@ public class IndexContext
     }
 
     /**
-     * Returns an {@code AnalyzerFactory} for use by the write path to 
transform incoming literal
-     * during indexing. The analyzers can be tokenising or non-tokenising. 
Tokenising analyzers
-     * will split the incoming terms into multiple terms in the index while 
non-tokenising analyzers
-     * will not split the incoming term but will transform the term (e.g. 
case-insensitive)
+     * Returns an {@link AbstractAnalyzer.AnalyzerFactory} for use by write 
and query paths to transform
+     * literal values.
      */
-    public AbstractAnalyzer.AnalyzerFactory getIndexAnalyzerFactory()
+    public AbstractAnalyzer.AnalyzerFactory getAnalyzerFactory()
     {
-        return indexAnalyzerFactory;
-    }
-
-    /**
-     * Return an {@code AnalyzerFactory} for use by the query path to 
transform query terms before
-     * searching for them in the index. This can be the same as the 
indexAnalyzerFactory.
-     */
-    public AbstractAnalyzer.AnalyzerFactory getQueryAnalyzerFactory()
-    {
-        return queryAnalyzerFactory;
+        return analyzerFactory;
     }
 
     public View getView()
@@ -256,9 +243,7 @@ public class IndexContext
     public void invalidate()
     {
         viewManager.invalidate();
-        indexAnalyzerFactory.close();
-        if (queryAnalyzerFactory != indexAnalyzerFactory)
-            queryAnalyzerFactory.close();
+        analyzerFactory.close();
         if (memtableIndexManager != null)
             memtableIndexManager.invalidate();
         if (indexMetrics != null)
diff --git a/src/java/org/apache/cassandra/index/sai/StorageAttachedIndex.java 
b/src/java/org/apache/cassandra/index/sai/StorageAttachedIndex.java
index c826e1da3a..264abdd71a 100644
--- a/src/java/org/apache/cassandra/index/sai/StorageAttachedIndex.java
+++ b/src/java/org/apache/cassandra/index/sai/StorageAttachedIndex.java
@@ -74,6 +74,7 @@ import org.apache.cassandra.index.IndexRegistry;
 import org.apache.cassandra.index.SecondaryIndexBuilder;
 import org.apache.cassandra.index.TargetParser;
 import org.apache.cassandra.index.sai.analyzer.AbstractAnalyzer;
+import org.apache.cassandra.index.sai.analyzer.NonTokenizingOptions;
 import org.apache.cassandra.index.sai.disk.SSTableIndex;
 import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
 import org.apache.cassandra.index.sai.disk.format.Version;
@@ -139,7 +140,10 @@ public class StorageAttachedIndex implements Index
     private static final StorageAttachedIndexBuildingSupport 
INDEX_BUILDER_SUPPORT = new StorageAttachedIndexBuildingSupport();
 
     private static final Set<String> VALID_OPTIONS = 
ImmutableSet.of(IndexTarget.TARGET_OPTION_NAME,
-                                                                     
IndexTarget.CUSTOM_INDEX_OPTION_NAME);
+                                                                     
IndexTarget.CUSTOM_INDEX_OPTION_NAME,
+                                                                     
NonTokenizingOptions.CASE_SENSITIVE,
+                                                                     
NonTokenizingOptions.NORMALIZE,
+                                                                     
NonTokenizingOptions.ASCII);
 
     public static final Set<CQL3Type> SUPPORTED_TYPES = 
ImmutableSet.of(CQL3Type.Native.ASCII, CQL3Type.Native.BIGINT, 
CQL3Type.Native.DATE,
                                                                         
CQL3Type.Native.DOUBLE, CQL3Type.Native.FLOAT, CQL3Type.Native.INT,
diff --git 
a/src/java/org/apache/cassandra/index/sai/analyzer/AbstractAnalyzer.java 
b/src/java/org/apache/cassandra/index/sai/analyzer/AbstractAnalyzer.java
index d1e7bae2fe..1f5e339657 100644
--- a/src/java/org/apache/cassandra/index/sai/analyzer/AbstractAnalyzer.java
+++ b/src/java/org/apache/cassandra/index/sai/analyzer/AbstractAnalyzer.java
@@ -24,10 +24,13 @@ import java.util.Map;
 import java.util.NoSuchElementException;
 
 import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.db.marshal.StringType;
+import org.apache.cassandra.exceptions.InvalidRequestException;
 
 public abstract class AbstractAnalyzer implements Iterator<ByteBuffer>
 {
     protected ByteBuffer next = null;
+    protected String nextLiteral = null;
 
     /**
      * @return true if index value is transformed, e.g. normalized or 
lower-cased or tokenized.
@@ -65,6 +68,7 @@ public abstract class AbstractAnalyzer implements 
Iterator<ByteBuffer>
     public void reset(ByteBuffer input)
     {
         this.next = null;
+        this.nextLiteral = null;
 
         resetInternal(input);
     }
@@ -80,6 +84,28 @@ public abstract class AbstractAnalyzer implements 
Iterator<ByteBuffer>
 
     public static AnalyzerFactory fromOptions(AbstractType<?> type, 
Map<String, String> options)
     {
+        if (hasNonTokenizingOptions(options))
+        {
+            if (type instanceof StringType)
+            {
+                // validate options
+                NonTokenizingOptions.fromMap(options);
+                return () -> new NonTokenizingAnalyzer(type, options);
+            }
+            else
+            {
+                throw new InvalidRequestException("CQL type " + 
type.asCQL3Type() + " cannot be analyzed.");
+            }
+        }
+
         return NoOpAnalyzer::new;
     }
+
+    private static boolean hasNonTokenizingOptions(Map<String, String> options)
+    {
+        return options.get(NonTokenizingOptions.ASCII) != null ||
+               options.containsKey(NonTokenizingOptions.CASE_SENSITIVE) ||
+               options.containsKey(NonTokenizingOptions.NORMALIZE);
+    }
+
 }
diff --git 
a/src/java/org/apache/cassandra/index/sai/analyzer/NonTokenizingAnalyzer.java 
b/src/java/org/apache/cassandra/index/sai/analyzer/NonTokenizingAnalyzer.java
new file mode 100644
index 0000000000..f58158a823
--- /dev/null
+++ 
b/src/java/org/apache/cassandra/index/sai/analyzer/NonTokenizingAnalyzer.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.analyzer;
+
+import java.nio.ByteBuffer;
+import java.util.Map;
+
+import com.google.common.base.MoreObjects;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.db.marshal.StringType;
+import org.apache.cassandra.index.sai.analyzer.filter.BasicFilters;
+import org.apache.cassandra.index.sai.analyzer.filter.FilterPipeline;
+import org.apache.cassandra.index.sai.analyzer.filter.FilterPipelineExecutor;
+import org.apache.cassandra.serializers.MarshalException;
+import org.apache.cassandra.utils.ByteBufferUtil;
+
+/**
+ * Analyzer that does *not* tokenize the input. Optionally will
+ * apply filters for the input based on {@link NonTokenizingOptions}.
+ */
+public class NonTokenizingAnalyzer extends AbstractAnalyzer
+{
+    private static final Logger logger = 
LoggerFactory.getLogger(NonTokenizingAnalyzer.class);
+
+    private final AbstractType<?> type;
+    private final NonTokenizingOptions options;
+    private final FilterPipeline filterPipeline;
+
+    private ByteBuffer input;
+    private boolean hasNext = false;
+
+    NonTokenizingAnalyzer(AbstractType<?> type, Map<String, String> options)
+    {
+        this(type, NonTokenizingOptions.fromMap(options));
+    }
+
+    NonTokenizingAnalyzer(AbstractType<?> type, NonTokenizingOptions 
tokenizerOptions)
+    {
+        this.type = type;
+        this.options = tokenizerOptions;
+        this.filterPipeline = getFilterPipeline();
+    }
+
+    @Override
+    public boolean hasNext()
+    {
+        // check that we know how to handle the input, otherwise bail
+        if (!(type instanceof StringType)) return false;
+
+        if (hasNext)
+        {
+            try
+            {
+                String input = type.getString(this.input);
+
+                if (input == null)
+                {
+                    throw new MarshalException(String.format("'null' 
deserialized value for %s with %s",
+                                                             
ByteBufferUtil.bytesToHex(this.input), type));
+                }
+
+                String result = FilterPipelineExecutor.execute(filterPipeline, 
input);
+                
+                if (result == null)
+                {
+                    nextLiteral = null;
+                    next = null;
+                    return false;
+                }
+
+                nextLiteral = result;
+                next = type.fromString(result);
+
+                return true;
+            }
+            catch (MarshalException e)
+            {
+                logger.error("Failed to deserialize value with " + type, e);
+                return false;
+            }
+            finally
+            {
+                hasNext = false;
+            }
+        }
+
+        return false;
+    }
+
+    @Override
+    public boolean transformValue()
+    {
+        return !options.isCaseSensitive() || options.isNormalized() || 
options.isAscii();
+    }
+
+    @Override
+    protected void resetInternal(ByteBuffer input)
+    {
+        this.input = input;
+        this.hasNext = true;
+    }
+
+    private FilterPipeline getFilterPipeline()
+    {
+        FilterPipeline builder = new FilterPipeline(new 
BasicFilters.NoOperation());
+        
+        if (!options.isCaseSensitive())
+            builder = builder.add("to_lower", new BasicFilters.LowerCase());
+        
+        if (options.isNormalized())
+            builder = builder.add("normalize", new BasicFilters.Normalize());
+
+        if (options.isAscii())
+            builder = builder.add("ascii", new BasicFilters.Ascii());
+        
+        return builder;
+    }
+
+    @Override
+    public String toString()
+    {
+        return MoreObjects.toStringHelper(this)
+                          .add("caseSensitive", options.isCaseSensitive())
+                          .add("normalized", options.isNormalized())
+                          .add("ascii", options.isAscii())
+                          .toString();
+    }
+}
diff --git 
a/src/java/org/apache/cassandra/index/sai/analyzer/NonTokenizingOptions.java 
b/src/java/org/apache/cassandra/index/sai/analyzer/NonTokenizingOptions.java
new file mode 100644
index 0000000000..ab6485acf5
--- /dev/null
+++ b/src/java/org/apache/cassandra/index/sai/analyzer/NonTokenizingOptions.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.analyzer;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import com.google.common.base.Strings;
+
+import org.apache.cassandra.exceptions.InvalidRequestException;
+
+public class NonTokenizingOptions
+{
+    public static final String NORMALIZE = "normalize";
+    public static final String CASE_SENSITIVE = "case_sensitive";
+    public static final String ASCII = "ascii";
+
+    private boolean caseSensitive;
+    private boolean normalized;
+    private boolean ascii;
+
+    boolean isCaseSensitive()
+    {
+        return caseSensitive;
+    }
+
+    void setCaseSensitive(boolean caseSensitive)
+    {
+        this.caseSensitive = caseSensitive;
+    }
+    
+    boolean isNormalized()
+    {
+        return this.normalized;
+    }
+
+    void setAscii(boolean ascii)
+    {
+        this.ascii = ascii;
+    }
+
+    boolean isAscii()
+    {
+        return this.ascii;
+    }
+    
+    void setNormalized(boolean normalized)
+    {
+        this.normalized = normalized;
+    }
+
+    public static class OptionsBuilder
+    {
+        private boolean caseSensitive = true;
+        private boolean normalized = false;
+        private boolean ascii = false;
+
+        OptionsBuilder() {}
+
+        OptionsBuilder caseSensitive(boolean caseSensitive)
+        {
+            this.caseSensitive = caseSensitive;
+            return this;
+        }
+
+        OptionsBuilder ascii(boolean ascii)
+        {
+            this.ascii = ascii;
+            return this;
+        }
+
+        OptionsBuilder normalized(boolean normalized)
+        {
+            this.normalized = normalized;
+            return this;
+        }
+
+        public NonTokenizingOptions build()
+        {
+            NonTokenizingOptions options = new NonTokenizingOptions();
+            options.setCaseSensitive(caseSensitive);
+            options.setNormalized(normalized);
+            options.setAscii(ascii);
+            return options;
+        }
+    }
+
+    public static NonTokenizingOptions getDefaultOptions()
+    {
+        return fromMap(new HashMap<>(1));
+    }
+
+    public static NonTokenizingOptions fromMap(Map<String, String> options)
+    {
+        OptionsBuilder builder = new OptionsBuilder();
+
+        for (Map.Entry<String, String> entry : options.entrySet())
+        {
+            switch (entry.getKey())
+            {
+                case CASE_SENSITIVE:
+                {
+                    boolean boolValue = validateBoolean(entry.getValue(), 
CASE_SENSITIVE);
+                    builder = builder.caseSensitive(boolValue);
+                    break;
+                }
+                
+                case NORMALIZE:
+                {
+                    boolean boolValue = validateBoolean(entry.getValue(), 
NORMALIZE);
+                    builder = builder.normalized(boolValue);
+                    break;
+                }
+
+                case ASCII:
+                {
+                    boolean boolValue = validateBoolean(entry.getValue(), 
ASCII);
+                    builder = builder.ascii(boolValue);
+                    break;
+                }
+            }
+        }
+        return builder.build();
+    }
+
+    private static boolean validateBoolean(String value, String option)
+    {
+        if (Strings.isNullOrEmpty(value))
+        {
+            throw new InvalidRequestException("Empty value for boolean option 
'" + option + '\'');
+        }
+
+        if (!value.equalsIgnoreCase(Boolean.TRUE.toString()) && 
!value.equalsIgnoreCase(Boolean.FALSE.toString()))
+        {
+            throw new InvalidRequestException("Illegal value for boolean 
option '" + option + "': " + value);
+        }
+
+        return Boolean.parseBoolean(value);
+    }
+}
diff --git 
a/src/java/org/apache/cassandra/index/sai/analyzer/filter/BasicFilters.java 
b/src/java/org/apache/cassandra/index/sai/analyzer/filter/BasicFilters.java
new file mode 100644
index 0000000000..b70fbae9da
--- /dev/null
+++ b/src/java/org/apache/cassandra/index/sai/analyzer/filter/BasicFilters.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.analyzer.filter;
+
+import java.text.Normalizer;
+import java.util.Locale;
+
+import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
+
+public class BasicFilters
+{
+    private static final Locale DEFAULT_LOCALE = Locale.getDefault();
+
+    public static class LowerCase extends FilterPipeline.Task
+    {
+        private final Locale locale;
+
+        public LowerCase()
+        {
+            this.locale = DEFAULT_LOCALE;
+        }
+
+        @Override
+        public String process(String input)
+        {
+            return input.toLowerCase(locale);
+        }
+    }
+
+    public static class Normalize extends FilterPipeline.Task
+    {
+        public Normalize() { }
+
+        @Override
+        public String process(String input)
+        {
+            if (input == null) return null;
+            return Normalizer.isNormalized(input, Normalizer.Form.NFC) ? input 
: Normalizer.normalize(input, Normalizer.Form.NFC);
+        }
+    }
+
+    public static class Ascii extends FilterPipeline.Task
+    {
+        public Ascii() { }
+
+        @Override
+        public String process(String input)
+        {
+            if (input == null) return null;
+            char[] inputChars = input.toCharArray();
+            // The output can (potentially) be 4 times the size of the input
+            char[] outputChars = new char[inputChars.length * 4];
+            int outputSize = ASCIIFoldingFilter.foldToASCII(inputChars, 0, 
outputChars, 0, inputChars.length);
+            return new String(outputChars, 0, outputSize);
+        }
+    }
+
+    public static class NoOperation extends FilterPipeline.Task
+    {
+        @Override
+        public String process(String input)
+        {
+            return input;
+        }
+    }
+}
diff --git 
a/src/java/org/apache/cassandra/index/sai/analyzer/filter/FilterPipeline.java 
b/src/java/org/apache/cassandra/index/sai/analyzer/filter/FilterPipeline.java
new file mode 100644
index 0000000000..017168321b
--- /dev/null
+++ 
b/src/java/org/apache/cassandra/index/sai/analyzer/filter/FilterPipeline.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.analyzer.filter;
+
+import javax.annotation.concurrent.NotThreadSafe;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * A linked list of {@link Task} objects. Used to apply a sequence of 
filtering tasks
+ * to provided textual input in a guaranteed order.
+ */
+@NotThreadSafe
+public class FilterPipeline
+{
+    private final Task head;
+    private Task tail;
+
+    public FilterPipeline(Task first)
+    {
+        this(first, first);
+    }
+
+    private FilterPipeline(Task first, Task tail)
+    {
+        this.head = first;
+        this.tail = tail;
+    }
+
+    public FilterPipeline add(String name, Task task)
+    {
+        Preconditions.checkArgument(task != this.tail, "Provided last task [" 
+ task.name + "] cannot be set to itself");
+        
+        this.tail.next = task;
+        this.tail.name = name;
+        
+        this.tail = task;
+        return this;
+    }
+
+    public Task head()
+    {
+        return this.head;
+    }
+
+    public abstract static class Task
+    {
+        public String name;
+        public Task next;
+
+        public abstract String process(String input);
+    }
+}
diff --git 
a/src/java/org/apache/cassandra/index/sai/analyzer/filter/FilterPipelineExecutor.java
 
b/src/java/org/apache/cassandra/index/sai/analyzer/filter/FilterPipelineExecutor.java
new file mode 100644
index 0000000000..c863f1e3cd
--- /dev/null
+++ 
b/src/java/org/apache/cassandra/index/sai/analyzer/filter/FilterPipelineExecutor.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.analyzer.filter;
+
+/**
+ * Executes all linked {@link FilterPipeline.Task}s serially on the provided 
input and returns a result
+ */
+public class FilterPipelineExecutor
+{
+    public static String execute(FilterPipeline pipeline, String initialInput)
+    {
+        FilterPipeline.Task currentTask = pipeline.head();
+        String result = initialInput;
+        
+        while (true)
+        {
+            result = currentTask.process(result);
+            currentTask = currentTask.next;
+            
+            if (currentTask == null)
+                return result;
+        }
+    }
+}
diff --git 
a/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java 
b/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java
index 9727816d94..e2f6d85d25 100644
--- a/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java
+++ b/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java
@@ -79,7 +79,7 @@ public class SSTableIndexWriter implements 
PerColumnIndexWriter
     {
         this.indexDescriptor = indexDescriptor;
         this.indexContext = indexContext;
-        this.analyzer = indexContext.getIndexAnalyzerFactory().create();
+        this.analyzer = indexContext.getAnalyzerFactory().create();
         this.limiter = limiter;
         this.isIndexValid = isIndexValid;
         this.maxTermSize = indexContext.isFrozen() ? MAX_FROZEN_TERM_SIZE : 
MAX_STRING_TERM_SIZE;
diff --git 
a/src/java/org/apache/cassandra/index/sai/memory/TrieMemoryIndex.java 
b/src/java/org/apache/cassandra/index/sai/memory/TrieMemoryIndex.java
index e84a3f9466..0df665d930 100644
--- a/src/java/org/apache/cassandra/index/sai/memory/TrieMemoryIndex.java
+++ b/src/java/org/apache/cassandra/index/sai/memory/TrieMemoryIndex.java
@@ -77,7 +77,7 @@ public class TrieMemoryIndex
         this.data = new InMemoryTrie<>(TrieMemtable.BUFFER_TYPE);
         this.primaryKeysReducer = new PrimaryKeysReducer();
         // The use of the analyzer is within a synchronized block so can be 
considered thread-safe
-        this.analyzerFactory = indexContext.getIndexAnalyzerFactory();
+        this.analyzerFactory = indexContext.getAnalyzerFactory();
         this.validator = indexContext.getValidator();
         this.isLiteral = TypeUtil.isLiteral(validator);
     }
diff --git a/src/java/org/apache/cassandra/index/sai/plan/Expression.java 
b/src/java/org/apache/cassandra/index/sai/plan/Expression.java
index a36bf315d5..d33470d515 100644
--- a/src/java/org/apache/cassandra/index/sai/plan/Expression.java
+++ b/src/java/org/apache/cassandra/index/sai/plan/Expression.java
@@ -93,7 +93,7 @@ public class Expression
     public Expression(IndexContext indexContext)
     {
         this.context = indexContext;
-        this.analyzerFactory = indexContext.getQueryAnalyzerFactory();
+        this.analyzerFactory = indexContext.getAnalyzerFactory();
         this.validator = indexContext.getValidator();
     }
 
diff --git a/src/java/org/apache/cassandra/index/sai/plan/Operation.java 
b/src/java/org/apache/cassandra/index/sai/plan/Operation.java
index d911e09277..e9fba9760c 100644
--- a/src/java/org/apache/cassandra/index/sai/plan/Operation.java
+++ b/src/java/org/apache/cassandra/index/sai/plan/Operation.java
@@ -76,7 +76,7 @@ public class Operation
             IndexContext indexContext = controller.getContext(e);
             List<Expression> perColumn = analyzed.get(e.column());
 
-            AbstractAnalyzer analyzer = 
indexContext.getQueryAnalyzerFactory().create();
+            AbstractAnalyzer analyzer = 
indexContext.getAnalyzerFactory().create();
             try
             {
                 analyzer.reset(e.getIndexValue().duplicate());
diff --git 
a/src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java
 
b/src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java
index 09acca2649..adcf36d157 100644
--- 
a/src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java
+++ 
b/src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexSearcher.java
@@ -86,7 +86,7 @@ public class StorageAttachedIndexSearcher implements 
Index.Searcher
     {
         for (RowFilter.Expression expression : 
queryController.filterOperation())
         {
-            AbstractAnalyzer analyzer = 
queryController.getContext(expression).getIndexAnalyzerFactory().create();
+            AbstractAnalyzer analyzer = 
queryController.getContext(expression).getAnalyzerFactory().create();
             try
             {
                 if (analyzer.transformValue())
diff --git 
a/src/java/org/apache/cassandra/index/sai/virtual/ColumnIndexesSystemView.java 
b/src/java/org/apache/cassandra/index/sai/virtual/ColumnIndexesSystemView.java
index 2a9528db62..39eaa6d7ee 100644
--- 
a/src/java/org/apache/cassandra/index/sai/virtual/ColumnIndexesSystemView.java
+++ 
b/src/java/org/apache/cassandra/index/sai/virtual/ColumnIndexesSystemView.java
@@ -104,7 +104,7 @@ public class ColumnIndexesSystemView extends 
AbstractVirtualTable
                                .column(IS_QUERYABLE, 
manager.isIndexQueryable(index))
                                .column(IS_BUILDING, 
manager.isIndexBuilding(indexName))
                                .column(IS_STRING, context.isLiteral())
-                               .column(ANALYZER, 
context.getIndexAnalyzerFactory().toString());
+                               .column(ANALYZER, 
context.getAnalyzerFactory().toString());
                     }
                 }
             }
diff --git 
a/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StemmerFactory.java 
b/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StemmerFactory.java
index 457876ab24..9786a86ae2 100644
--- 
a/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StemmerFactory.java
+++ 
b/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StemmerFactory.java
@@ -22,16 +22,29 @@ import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
 
-import org.apache.cassandra.concurrent.ImmediateExecutor;
-import org.tartarus.snowball.SnowballStemmer;
-import org.tartarus.snowball.ext.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import com.github.benmanes.caffeine.cache.CacheLoader;
 import com.github.benmanes.caffeine.cache.Caffeine;
 import com.github.benmanes.caffeine.cache.LoadingCache;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.cassandra.concurrent.ImmediateExecutor;
+import org.tartarus.snowball.SnowballStemmer;
+import org.tartarus.snowball.ext.DanishStemmer;
+import org.tartarus.snowball.ext.DutchStemmer;
+import org.tartarus.snowball.ext.EnglishStemmer;
+import org.tartarus.snowball.ext.FinnishStemmer;
+import org.tartarus.snowball.ext.FrenchStemmer;
+import org.tartarus.snowball.ext.GermanStemmer;
+import org.tartarus.snowball.ext.HungarianStemmer;
+import org.tartarus.snowball.ext.ItalianStemmer;
+import org.tartarus.snowball.ext.NorwegianStemmer;
+import org.tartarus.snowball.ext.PortugueseStemmer;
+import org.tartarus.snowball.ext.RomanianStemmer;
+import org.tartarus.snowball.ext.RussianStemmer;
+import org.tartarus.snowball.ext.SpanishStemmer;
+import org.tartarus.snowball.ext.SwedishStemmer;
+import org.tartarus.snowball.ext.TurkishStemmer;
 
 /**
  * Returns a SnowballStemmer instance appropriate for
@@ -63,21 +76,21 @@ public class StemmerFactory
     static
     {
         SUPPORTED_LANGUAGES = new HashMap<>();
-        SUPPORTED_LANGUAGES.put("de", germanStemmer.class);
-        SUPPORTED_LANGUAGES.put("da", danishStemmer.class);
-        SUPPORTED_LANGUAGES.put("es", spanishStemmer.class);
-        SUPPORTED_LANGUAGES.put("en", englishStemmer.class);
-        SUPPORTED_LANGUAGES.put("fl", finnishStemmer.class);
-        SUPPORTED_LANGUAGES.put("fr", frenchStemmer.class);
-        SUPPORTED_LANGUAGES.put("hu", hungarianStemmer.class);
-        SUPPORTED_LANGUAGES.put("it", italianStemmer.class);
-        SUPPORTED_LANGUAGES.put("nl", dutchStemmer.class);
-        SUPPORTED_LANGUAGES.put("no", norwegianStemmer.class);
-        SUPPORTED_LANGUAGES.put("pt", portugueseStemmer.class);
-        SUPPORTED_LANGUAGES.put("ro", romanianStemmer.class);
-        SUPPORTED_LANGUAGES.put("ru", russianStemmer.class);
-        SUPPORTED_LANGUAGES.put("sv", swedishStemmer.class);
-        SUPPORTED_LANGUAGES.put("tr", turkishStemmer.class);
+        SUPPORTED_LANGUAGES.put("de", GermanStemmer.class);
+        SUPPORTED_LANGUAGES.put("da", DanishStemmer.class);
+        SUPPORTED_LANGUAGES.put("es", SpanishStemmer.class);
+        SUPPORTED_LANGUAGES.put("en", EnglishStemmer.class);
+        SUPPORTED_LANGUAGES.put("fl", FinnishStemmer.class);
+        SUPPORTED_LANGUAGES.put("fr", FrenchStemmer.class);
+        SUPPORTED_LANGUAGES.put("hu", HungarianStemmer.class);
+        SUPPORTED_LANGUAGES.put("it", ItalianStemmer.class);
+        SUPPORTED_LANGUAGES.put("nl", DutchStemmer.class);
+        SUPPORTED_LANGUAGES.put("no", NorwegianStemmer.class);
+        SUPPORTED_LANGUAGES.put("pt", PortugueseStemmer.class);
+        SUPPORTED_LANGUAGES.put("ro", RomanianStemmer.class);
+        SUPPORTED_LANGUAGES.put("ru", RussianStemmer.class);
+        SUPPORTED_LANGUAGES.put("sv", SwedishStemmer.class);
+        SUPPORTED_LANGUAGES.put("tr", TurkishStemmer.class);
     }
 
     public static SnowballStemmer getStemmer(Locale locale)
diff --git 
a/test/distributed/org/apache/cassandra/distributed/test/sai/ReplicaFilteringProtectionTest.java
 
b/test/distributed/org/apache/cassandra/distributed/test/sai/ReplicaFilteringProtectionTest.java
new file mode 100644
index 0000000000..95ed4c7946
--- /dev/null
+++ 
b/test/distributed/org/apache/cassandra/distributed/test/sai/ReplicaFilteringProtectionTest.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.distributed.test.sai;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+import org.apache.cassandra.distributed.Cluster;
+import org.apache.cassandra.distributed.api.SimpleQueryResult;
+import org.apache.cassandra.distributed.test.TestBaseImpl;
+
+import static org.apache.cassandra.distributed.api.ConsistencyLevel.ALL;
+import static org.apache.cassandra.distributed.shared.AssertUtils.assertRows;
+import static org.apache.cassandra.distributed.shared.AssertUtils.row;
+
+public class ReplicaFilteringProtectionTest extends TestBaseImpl
+{
+    private static final int REPLICAS = 2;
+
+    @Test
+    public void testRFPWithIndexTransformations() throws IOException
+    {
+        try (Cluster cluster = init(Cluster.build()
+                                           .withNodes(REPLICAS)
+                                           .withConfig(config -> 
config.set("hinted_handoff_enabled", false)
+                                                                       
.set("commitlog_sync", "batch")).start()))
+        {
+            String tableName = "sai_rfp";
+            String fullTableName = KEYSPACE + '.' + tableName;
+
+            cluster.schemaChange("CREATE TABLE " + fullTableName + " (k int 
PRIMARY KEY, v text)");
+            cluster.schemaChange("CREATE CUSTOM INDEX ON " + fullTableName + 
"(v) USING 'StorageAttachedIndex' " +
+                                 "WITH OPTIONS = { 'case_sensitive' : false}");
+
+            // both nodes have the old value
+            cluster.coordinator(1).execute("INSERT INTO " + fullTableName + 
"(k, v) VALUES (0, 'OLD')", ALL);
+
+            String select = "SELECT * FROM " + fullTableName + " WHERE v = 
'old'";
+            Object[][] initialRows = cluster.coordinator(1).execute(select, 
ALL);
+            assertRows(initialRows, row(0, "OLD"));
+
+            // only one node gets the new value
+            cluster.get(1).executeInternal("UPDATE " + fullTableName + " SET v 
= 'new' WHERE k = 0");
+
+            // querying by the old value shouldn't return the old surviving row
+            SimpleQueryResult oldResult = 
cluster.coordinator(1).executeWithResult(select, ALL);
+            assertRows(oldResult.toObjectArrays());
+        }
+    }
+}
diff --git 
a/test/unit/org/apache/cassandra/index/sai/analyzer/NonTokenizingAnalyzerTest.java
 
b/test/unit/org/apache/cassandra/index/sai/analyzer/NonTokenizingAnalyzerTest.java
new file mode 100644
index 0000000000..fb73a98eae
--- /dev/null
+++ 
b/test/unit/org/apache/cassandra/index/sai/analyzer/NonTokenizingAnalyzerTest.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.analyzer;
+
+import java.nio.ByteBuffer;
+
+import org.junit.Test;
+
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.utils.ByteBufferUtil;
+
+import static org.junit.Assert.assertEquals;
+
+public class NonTokenizingAnalyzerTest
+{
+    @Test
+    public void asciiAnalyzer() throws Exception
+    {
+        NonTokenizingOptions options = 
NonTokenizingOptions.getDefaultOptions();
+        options.setCaseSensitive(false);
+        options.setAscii(true);
+
+        assertEquals("eppinger", getAnalyzedString("Éppinger", options));
+    }
+
+    @Test
+    public void asciiAnalyzerFalse() throws Exception
+    {
+        NonTokenizingOptions options = 
NonTokenizingOptions.getDefaultOptions();
+        options.setCaseSensitive(true);
+        options.setAscii(false);
+
+        assertEquals("Éppinger", getAnalyzedString("Éppinger", options));
+    }
+
+    @Test
+    public void caseInsensitiveAnalyzer() throws Exception
+    {
+        NonTokenizingOptions options = 
NonTokenizingOptions.getDefaultOptions();
+        options.setCaseSensitive(false);
+
+        assertEquals("nip it in the bud", getAnalyzedString("Nip it in the 
bud", options));
+    }
+
+    @Test
+    public void caseSensitiveAnalyzer() throws Exception
+    {
+        NonTokenizingOptions options = 
NonTokenizingOptions.getDefaultOptions();
+
+        assertEquals("Nip it in the bud", getAnalyzedString("Nip it in the 
bud", options));
+    }
+
+    private String getAnalyzedString(String input, NonTokenizingOptions 
options) throws Exception
+    {
+        NonTokenizingAnalyzer analyzer = new 
NonTokenizingAnalyzer(UTF8Type.instance, options);
+        analyzer.reset(ByteBuffer.wrap(input.getBytes()));
+        return analyzer.hasNext() ? ByteBufferUtil.string(analyzer.next) : 
null;
+    }
+}
diff --git 
a/test/unit/org/apache/cassandra/index/sai/analyzer/filter/BasicFiltersTest.java
 
b/test/unit/org/apache/cassandra/index/sai/analyzer/filter/BasicFiltersTest.java
new file mode 100644
index 0000000000..01faf488c1
--- /dev/null
+++ 
b/test/unit/org/apache/cassandra/index/sai/analyzer/filter/BasicFiltersTest.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.analyzer.filter;
+
+import java.text.Normalizer;
+
+import org.junit.Test;
+
+import org.apache.cassandra.index.sai.SAITester;
+import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
+
+import static org.junit.Assert.assertEquals;
+
+public class BasicFiltersTest
+{
+    @Test
+    public void testLowerCase()
+    {
+        BasicFilters.LowerCase lowerCase = new BasicFilters.LowerCase();
+        
+        for (int count = 0; count < SAITester.getRandom().nextIntBetween(100, 
1000); count++)
+        {
+            String actual = SAITester.getRandom().nextTextString(10, 50);
+            assertEquals(actual.toLowerCase(), lowerCase.process(actual));
+        }
+    }
+    
+    @Test
+    public void testNormalize()
+    {
+        BasicFilters.Normalize normalize = new BasicFilters.Normalize();
+
+        for (int count = 0; count < SAITester.getRandom().nextIntBetween(100, 
1000); count++)
+        {
+            String actual = SAITester.getRandom().nextTextString(10, 50);
+            assertEquals(Normalizer.normalize(actual, Normalizer.Form.NFC), 
normalize.process(actual));
+        }
+    }
+    
+    @Test
+    public void testAscii()
+    {
+        BasicFilters.Ascii ascii = new BasicFilters.Ascii();
+
+        for (int count = 0; count < SAITester.getRandom().nextIntBetween(100, 
1000); count++)
+        {
+            String actual = SAITester.getRandom().nextTextString(100, 5000);
+
+            char[] actualChars = actual.toCharArray();
+            char[] expectedChars = new char[actualChars.length * 4];
+            int expectedSize = ASCIIFoldingFilter.foldToASCII(actualChars, 0, 
expectedChars, 0, actualChars.length);
+            String expected = new String(expectedChars, 0, expectedSize);
+
+            assertEquals(expected, ascii.process(actual));
+        }
+    }
+}
diff --git 
a/test/unit/org/apache/cassandra/index/sai/cql/StorageAttachedIndexDDLTest.java 
b/test/unit/org/apache/cassandra/index/sai/cql/StorageAttachedIndexDDLTest.java
index fab116101f..2b621fcfe7 100644
--- 
a/test/unit/org/apache/cassandra/index/sai/cql/StorageAttachedIndexDDLTest.java
+++ 
b/test/unit/org/apache/cassandra/index/sai/cql/StorageAttachedIndexDDLTest.java
@@ -264,6 +264,112 @@ public class StorageAttachedIndexDDLTest extends SAITester
         assertEquals(1, saiCreationCounter.get());
     }
 
+    @Test
+    public void shouldBeCaseSensitiveByDefault()
+    {
+        createTable("CREATE TABLE %s (id text PRIMARY KEY, val text)");
+
+        createIndex("CREATE CUSTOM INDEX ON %s(val) USING 
'StorageAttachedIndex'");
+
+        execute("INSERT INTO %s (id, val) VALUES ('1', 'Camel')");
+
+        assertEquals(1, execute("SELECT id FROM %s WHERE val = 
'Camel'").size());
+
+        assertEquals(0, execute("SELECT id FROM %s WHERE val = 
'camel'").size());
+    }
+
+    @Test
+    public void shouldEnableCaseSensitiveSearch()
+    {
+        createTable("CREATE TABLE %s (id text PRIMARY KEY, val text)");
+
+        createIndex("CREATE CUSTOM INDEX ON %s(val) USING 
'StorageAttachedIndex' WITH OPTIONS = { 'case_sensitive' : true }");
+
+        execute("INSERT INTO %s (id, val) VALUES ('1', 'Camel')");
+
+        assertEquals(1, execute("SELECT id FROM %s WHERE val = 
'Camel'").size());
+
+        assertEquals(0, execute("SELECT id FROM %s WHERE val = 
'camel'").size());
+    }
+
+    @Test
+    public void shouldEnableCaseInsensitiveSearch()
+    {
+        createTable("CREATE TABLE %s (id text PRIMARY KEY, val text)");
+
+        createIndex("CREATE CUSTOM INDEX ON %s(val) USING 
'StorageAttachedIndex' WITH OPTIONS = { 'case_sensitive' : false }");
+
+        execute("INSERT INTO %s (id, val) VALUES ('1', 'Camel')");
+
+        assertEquals(1, execute("SELECT id FROM %s WHERE val = 
'camel'").size());
+    }
+
+    @Test
+    public void shouldBeNonNormalizedByDefault()
+    {
+        createTable("CREATE TABLE %s (id text PRIMARY KEY, val text)");
+
+        createIndex("CREATE CUSTOM INDEX ON %s(val) USING 
'StorageAttachedIndex'");
+
+        execute("INSERT INTO %s (id, val) VALUES ('1', 'Cam\u00E1l')");
+
+        assertEquals(1, execute("SELECT id FROM %s WHERE val = 
'Cam\u00E1l'").size());
+
+        // Both \u00E1 and \u0061\u0301 are visible as the character á, but 
without NFC normalization, they won't match.
+        assertEquals(0, execute("SELECT id FROM %s WHERE val = 
'Cam\u0061\u0301l'").size());
+    }
+
+    @Test
+    public void shouldEnableNonNormalizedSearch()
+    {
+        createTable("CREATE TABLE %s (id text PRIMARY KEY, val text)");
+
+        createIndex("CREATE CUSTOM INDEX ON %s(val) USING 
'StorageAttachedIndex' WITH OPTIONS = { 'normalize' : false }");
+
+        execute("INSERT INTO %s (id, val) VALUES ('1', 'Cam\u00E1l')");
+
+        assertEquals(1, execute("SELECT id FROM %s WHERE val = 
'Cam\u00E1l'").size());
+
+        // Both \u00E1 and \u0061\u0301 are visible as the character á, but 
without NFC normalization, they won't match.
+        assertEquals(0, execute("SELECT id FROM %s WHERE val = 
'Cam\u0061\u0301l'").size());
+    }
+
+    @Test
+    public void shouldEnableNormalizedSearch()
+    {
+        createTable("CREATE TABLE %s (id text PRIMARY KEY, val text)");
+
+        createIndex("CREATE CUSTOM INDEX ON %s(val) USING 
'StorageAttachedIndex' WITH OPTIONS = { 'normalize' : true }");
+
+        execute("INSERT INTO %s (id, val) VALUES ('1', 'Cam\u00E1l')");
+
+        assertEquals(1, execute("SELECT id FROM %s WHERE val = 
'Cam\u0061\u0301l'").size());
+    }
+
+    @Test
+    public void shouldEnableNormalizedCaseInsensitiveSearch()
+    {
+        createTable("CREATE TABLE %s (id text PRIMARY KEY, val text)");
+
+        createIndex("CREATE CUSTOM INDEX ON %s(val) USING 
'StorageAttachedIndex' WITH OPTIONS = { 'normalize' : true, 'case_sensitive' : 
false}");
+
+        execute("INSERT INTO %s (id, val) VALUES ('1', 'Cam\u00E1l')");
+
+        assertEquals(1, execute("SELECT id FROM %s WHERE val = 
'cam\u0061\u0301l'").size());
+    }
+
+    @Test
+    public void shouldEnableAsciiSearch()
+    {
+        createTable("CREATE TABLE %s (id text PRIMARY KEY, val text)");
+
+        createIndex("CREATE CUSTOM INDEX ON %s(val) USING 
'StorageAttachedIndex' WITH OPTIONS = { 'ascii' : true, 'case_sensitive' : 
false}");
+
+        execute("INSERT INTO %s (id, val) VALUES ('1', 'Éppinger')");
+
+        assertEquals(1, execute("SELECT id FROM %s WHERE val = 
'eppinger'").size());
+    }
+
     @Test
     public void shouldCreateIndexOnReversedType() throws Throwable
     {
diff --git 
a/test/unit/org/apache/cassandra/index/sai/virtual/IndexesSystemViewTest.java 
b/test/unit/org/apache/cassandra/index/sai/virtual/IndexesSystemViewTest.java
index a8f5bf9a58..19d4cbfdf1 100644
--- 
a/test/unit/org/apache/cassandra/index/sai/virtual/IndexesSystemViewTest.java
+++ 
b/test/unit/org/apache/cassandra/index/sai/virtual/IndexesSystemViewTest.java
@@ -129,6 +129,6 @@ public class IndexesSystemViewTest extends SAITester
                        isQueryable,
                        isBuilding,
                        isString,
-                       context.getIndexAnalyzerFactory().toString());
+                       context.getAnalyzerFactory().toString());
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org
For additional commands, e-mail: commits-h...@cassandra.apache.org

Reply via email to