Repository: cassandra Updated Branches: refs/heads/trunk 6dc1004ce -> 479e8aff1
fix EQ semantics of analyzed SASI indexes patch by xedin; reviewed by beobal for CASSANDRA-11130 Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/479e8aff Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/479e8aff Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/479e8aff Branch: refs/heads/trunk Commit: 479e8aff1346d41ebc05e1a113996a803228284d Parents: 6dc1004 Author: Pavel Yaskevich <[email protected]> Authored: Sun Feb 7 17:15:21 2016 -0800 Committer: Pavel Yaskevich <[email protected]> Committed: Thu Feb 11 11:24:40 2016 -0800 ---------------------------------------------------------------------- CHANGES.txt | 1 + src/java/org/apache/cassandra/cql3/Cql.g | 2 +- .../org/apache/cassandra/cql3/Operator.java | 9 + .../org/apache/cassandra/cql3/Relation.java | 4 +- .../apache/cassandra/db/filter/RowFilter.java | 1 + .../index/sasi/analyzer/AbstractAnalyzer.java | 8 + .../index/sasi/analyzer/StandardAnalyzer.java | 5 + .../cassandra/index/sasi/conf/ColumnIndex.java | 12 +- .../cassandra/index/sasi/conf/IndexMode.java | 2 +- .../index/sasi/disk/OnDiskIndexBuilder.java | 4 +- .../index/sasi/memory/TrieMemIndex.java | 2 + .../cassandra/index/sasi/plan/Expression.java | 7 +- .../cassandra/index/sasi/plan/Operation.java | 5 + .../cassandra/index/sasi/SASIIndexTest.java | 219 +++++++++++++++++-- .../index/sasi/plan/OperationTest.java | 6 +- 15 files changed, 263 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index c09a453..28651e2 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 3.4 + * fix EQ semantics of analyzed SASI indexes (CASSANDRA-11130) * Support long name output for nodetool commands (CASSANDRA-7950) * Encrypted hints (CASSANDRA-11040) * SASI index options validation (CASSANDRA-11136) http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/src/java/org/apache/cassandra/cql3/Cql.g ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/Cql.g b/src/java/org/apache/cassandra/cql3/Cql.g index d560119..5cb479c 100644 --- a/src/java/org/apache/cassandra/cql3/Cql.g +++ b/src/java/org/apache/cassandra/cql3/Cql.g @@ -209,7 +209,7 @@ options { } else { - operator = Operator.EQ; + operator = Operator.LIKE_MATCHES; endIndex += 1; } http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/src/java/org/apache/cassandra/cql3/Operator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/Operator.java b/src/java/org/apache/cassandra/cql3/Operator.java index d518961..accb786 100644 --- a/src/java/org/apache/cassandra/cql3/Operator.java +++ b/src/java/org/apache/cassandra/cql3/Operator.java @@ -126,6 +126,14 @@ public enum Operator { return "LIKE '%<term>%'"; } + }, + LIKE_MATCHES(13) + { + @Override + public String toString() + { + return "LIKE '<term>'"; + } }; /** @@ -222,6 +230,7 @@ public enum Operator return ByteBufferUtil.startsWith(leftOperand, rightOperand); case LIKE_SUFFIX: return ByteBufferUtil.endsWith(leftOperand, rightOperand); + case LIKE_MATCHES: case LIKE_CONTAINS: return ByteBufferUtil.contains(leftOperand, rightOperand); default: http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/src/java/org/apache/cassandra/cql3/Relation.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/Relation.java b/src/java/org/apache/cassandra/cql3/Relation.java index 81f46a6..616fd30 100644 --- a/src/java/org/apache/cassandra/cql3/Relation.java +++ b/src/java/org/apache/cassandra/cql3/Relation.java @@ -112,7 +112,8 @@ public abstract class Relation { { return relationType == Operator.LIKE_PREFIX || relationType == Operator.LIKE_SUFFIX - || relationType == Operator.LIKE_CONTAINS; + || relationType == Operator.LIKE_CONTAINS + || relationType == Operator.LIKE_MATCHES; } /** @@ -153,6 +154,7 @@ public abstract class Relation { case LIKE_PREFIX: case LIKE_SUFFIX: case LIKE_CONTAINS: + case LIKE_MATCHES: return newLikeRestriction(cfm, boundNames, relationType); default: throw invalidRequest("Unsupported \"!=\" relation: %s", this); } http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/src/java/org/apache/cassandra/db/filter/RowFilter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/filter/RowFilter.java b/src/java/org/apache/cassandra/db/filter/RowFilter.java index 1141fd9..fcc3cd5 100644 --- a/src/java/org/apache/cassandra/db/filter/RowFilter.java +++ b/src/java/org/apache/cassandra/db/filter/RowFilter.java @@ -604,6 +604,7 @@ public abstract class RowFilter implements Iterable<RowFilter.Expression> case LIKE_PREFIX: case LIKE_SUFFIX: case LIKE_CONTAINS: + case LIKE_MATCHES: { assert !column.isComplex() : "Only CONTAINS and CONTAINS_KEY are supported for 'complex' types"; ByteBuffer foundValue = getValue(metadata, partitionKey, row); http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java index b3fdd8c..31c66cc 100644 --- a/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java +++ b/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java @@ -42,6 +42,14 @@ public abstract class AbstractAnalyzer implements Iterator<ByteBuffer> public abstract void reset(ByteBuffer input); + /** + * @return true if current analyzer provides text tokenization, false otherwise. + */ + public boolean isTokenizing() + { + return false; + } + public static String normalize(String original) { return Normalizer.isNormalized(original, Normalizer.Form.NFC) http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java index bcc63df..5e09b9f 100644 --- a/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java +++ b/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java @@ -191,4 +191,9 @@ public class StandardAnalyzer extends AbstractAnalyzer scanner.yyreset(reader); this.inputReader = reader; } + + public boolean isTokenizing() + { + return true; + } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java b/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java index 29e7c28..1703bd4 100644 --- a/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java +++ b/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java @@ -37,6 +37,7 @@ import org.apache.cassandra.index.sasi.conf.view.View; import org.apache.cassandra.index.sasi.disk.Token; import org.apache.cassandra.index.sasi.memory.IndexMemtable; import org.apache.cassandra.index.sasi.plan.Expression; +import org.apache.cassandra.index.sasi.plan.Expression.Op; import org.apache.cassandra.index.sasi.utils.RangeIterator; import org.apache.cassandra.io.sstable.Component; import org.apache.cassandra.io.sstable.format.SSTableReader; @@ -58,6 +59,8 @@ public class ColumnIndex private final Component component; private final DataTracker tracker; + private final boolean isTokenized; + public ColumnIndex(AbstractType<?> keyValidator, ColumnDefinition column, IndexMetadata metadata) { this.keyValidator = keyValidator; @@ -67,6 +70,7 @@ public class ColumnIndex this.memtable = new AtomicReference<>(new IndexMemtable(this)); this.tracker = new DataTracker(keyValidator, this); this.component = new Component(Component.Type.SECONDARY_INDEX, String.format(FILE_NAME_FORMAT, getIndexName())); + this.isTokenized = getAnalyzer().isTokenizing(); } /** @@ -170,9 +174,13 @@ public class ColumnIndex return isIndexed() ? mode.isLiteral : (validator instanceof UTF8Type || validator instanceof AsciiType); } - public boolean supports(Operator operator) + public boolean supports(Operator op) { - return mode.supports(Expression.Op.valueOf(operator)); + Op operator = Op.valueOf(op); + return !(isTokenized && operator == Op.EQ) // EQ is only applicable to non-tokenized indexes + && !(isLiteral() && operator == Op.RANGE) // RANGE only applicable to indexes non-literal indexes + && mode.supports(operator); // for all other cases let's refer to index itself + } public static ByteBuffer getValueOf(ColumnDefinition column, Row row, int nowInSecs) http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java b/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java index b9c5653..1c85ed5 100644 --- a/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java +++ b/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java @@ -179,6 +179,6 @@ public class IndexMode public boolean supports(Op operator) { - return !(isLiteral && operator == Op.RANGE) && mode.supports(operator); + return mode.supports(operator); } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java index 20a8739..04b7b1c 100644 --- a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java +++ b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java @@ -50,8 +50,8 @@ public class OnDiskIndexBuilder public enum Mode { - PREFIX(EnumSet.of(Op.EQ, Op.PREFIX, Op.NOT_EQ, Op.RANGE)), - CONTAINS(EnumSet.of(Op.EQ, Op.CONTAINS, Op.SUFFIX, Op.NOT_EQ)), + PREFIX(EnumSet.of(Op.EQ, Op.MATCH, Op.PREFIX, Op.NOT_EQ, Op.RANGE)), + CONTAINS(EnumSet.of(Op.MATCH, Op.CONTAINS, Op.SUFFIX, Op.NOT_EQ)), SPARSE(EnumSet.of(Op.EQ, Op.NOT_EQ, Op.RANGE)); Set<Op> supportedOps; http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java b/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java index 99a417a..0da65c7 100644 --- a/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java +++ b/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java @@ -182,6 +182,7 @@ public class TrieMemIndex extends MemIndex switch (operator) { case EQ: + case MATCH: ConcurrentSkipListSet<DecoratedKey> keys = trie.getValueForExactKey(value); return keys == null ? Collections.emptyList() : Collections.singletonList(keys); @@ -219,6 +220,7 @@ public class TrieMemIndex extends MemIndex switch (operator) { case EQ: + case MATCH: ConcurrentSkipListSet<DecoratedKey> keys = trie.getValueForExactKey(value); return keys == null ? Collections.emptyList() : Collections.singletonList(keys); http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/src/java/org/apache/cassandra/index/sasi/plan/Expression.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/plan/Expression.java b/src/java/org/apache/cassandra/index/sasi/plan/Expression.java index 43f8251..679d866 100644 --- a/src/java/org/apache/cassandra/index/sasi/plan/Expression.java +++ b/src/java/org/apache/cassandra/index/sasi/plan/Expression.java @@ -46,7 +46,7 @@ public class Expression public enum Op { - EQ, PREFIX, SUFFIX, CONTAINS, NOT_EQ, RANGE; + EQ, MATCH, PREFIX, SUFFIX, CONTAINS, NOT_EQ, RANGE; public static Op valueOf(Operator operator) { @@ -73,6 +73,9 @@ public class Expression case LIKE_CONTAINS: return CONTAINS; + case LIKE_MATCHES: + return MATCH; + default: throw new IllegalArgumentException("unknown operator: " + operator); } @@ -140,6 +143,7 @@ public class Expression case LIKE_PREFIX: case LIKE_SUFFIX: case LIKE_CONTAINS: + case LIKE_MATCHES: case EQ: lower = new Bound(value, true); upper = lower; @@ -262,6 +266,7 @@ public class Expression switch (operation) { case EQ: + case MATCH: // Operation.isSatisfiedBy handles conclusion on !=, // here we just need to make sure that term matched it case NOT_EQ: http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/src/java/org/apache/cassandra/index/sasi/plan/Operation.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/plan/Operation.java b/src/java/org/apache/cassandra/index/sasi/plan/Operation.java index 28bcc51..f8b02a3 100644 --- a/src/java/org/apache/cassandra/index/sasi/plan/Operation.java +++ b/src/java/org/apache/cassandra/index/sasi/plan/Operation.java @@ -292,9 +292,13 @@ public class Operation extends RangeIterator<Long, Token> switch (e.operator()) { case EQ: + isMultiExpression = false; + break; + case LIKE_PREFIX: case LIKE_SUFFIX: case LIKE_CONTAINS: + case LIKE_MATCHES: isMultiExpression = true; break; @@ -341,6 +345,7 @@ public class Operation extends RangeIterator<Long, Token> case LIKE_PREFIX: case LIKE_SUFFIX: case LIKE_CONTAINS: + case LIKE_MATCHES: return 4; case GTE: http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java index 2ae1e70..a88e594 100644 --- a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java +++ b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java @@ -167,7 +167,7 @@ public class SASIIndexTest ColumnFamilyStore store = loadData(data, forceFlush); - Set<String> rows= getIndexed(store, 10, buildExpression(UTF8Type.instance.decompose("first_name"), Operator.EQ, UTF8Type.instance.decompose("doesntmatter"))); + Set<String> rows= getIndexed(store, 10, buildExpression(UTF8Type.instance.decompose("first_name"), Operator.LIKE_MATCHES, UTF8Type.instance.decompose("doesntmatter"))); Assert.assertTrue(rows.toString(), Arrays.equals(new String[]{}, rows.toArray(new String[rows.size()]))); } @@ -502,18 +502,18 @@ public class SASIIndexTest store = loadData(part4, forceFlush); rows = getIndexed(store, 10, - buildExpression(firstName, Operator.EQ, UTF8Type.instance.decompose("Susana")), + buildExpression(firstName, Operator.LIKE_MATCHES, UTF8Type.instance.decompose("Susana")), buildExpression(age, Operator.LTE, Int32Type.instance.decompose(13)), buildExpression(age, Operator.GT, Int32Type.instance.decompose(10))); Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key12" }, rows.toArray(new String[rows.size()]))); rows = getIndexed(store, 10, - buildExpression(firstName, Operator.EQ, UTF8Type.instance.decompose("Demario")), + buildExpression(firstName, Operator.LIKE_MATCHES, UTF8Type.instance.decompose("Demario")), buildExpression(age, Operator.LTE, Int32Type.instance.decompose(30))); Assert.assertTrue(rows.toString(), rows.size() == 0); rows = getIndexed(store, 10, - buildExpression(firstName, Operator.EQ, UTF8Type.instance.decompose("Josephine"))); + buildExpression(firstName, Operator.LIKE_MATCHES, UTF8Type.instance.decompose("Josephine"))); Assert.assertTrue(rows.toString(), rows.size() == 0); rows = getIndexed(store, 10, @@ -1142,7 +1142,7 @@ public class SASIIndexTest rows = getIndexed(store, 10, buildExpression(comment, Operator.LIKE_SUFFIX, UTF8Type.instance.decompose("ã³"))); Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key4", "key5" }, rows.toArray(new String[rows.size()]))); - rows = getIndexed(store, 10, buildExpression(comment, Operator.EQ, UTF8Type.instance.decompose("ã¬ã¹ãã©ã³"))); + rows = getIndexed(store, 10, buildExpression(comment, Operator.LIKE_MATCHES, UTF8Type.instance.decompose("ã¬ã¹ãã©ã³"))); Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key4" }, rows.toArray(new String[rows.size()]))); } @@ -1211,7 +1211,7 @@ public class SASIIndexTest rows = getIndexed(store, 10, buildExpression(comment, Operator.LIKE_SUFFIX, UTF8Type.instance.decompose("ã³"))); Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key3" }, rows.toArray(new String[rows.size()]))); - rows = getIndexed(store, 10, buildExpression(comment, Operator.EQ, UTF8Type.instance.decompose("ãã³ã¸ã£ãã³ ã¦ã¨ã¹ã"))); + rows = getIndexed(store, 10, buildExpression(comment, Operator.LIKE_MATCHES, UTF8Type.instance.decompose("ãã³ã¸ã£ãã³ ã¦ã¨ã¹ã"))); Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key4" }, rows.toArray(new String[rows.size()]))); } @@ -1235,12 +1235,12 @@ public class SASIIndexTest Set<String> rows; - rows = getIndexed(store, 10, buildExpression(comment, Operator.EQ, bigValue.duplicate())); + rows = getIndexed(store, 10, buildExpression(comment, Operator.LIKE_MATCHES, bigValue.duplicate())); Assert.assertEquals(0, rows.size()); store.forceBlockingFlush(); - rows = getIndexed(store, 10, buildExpression(comment, Operator.EQ, bigValue.duplicate())); + rows = getIndexed(store, 10, buildExpression(comment, Operator.LIKE_MATCHES, bigValue.duplicate())); Assert.assertEquals(0, rows.size()); } } @@ -1471,6 +1471,10 @@ public class SASIIndexTest update(rm, name, UTF8Type.instance.decompose("Vijay"), System.currentTimeMillis()); rm.apply(); + rm = new Mutation(KS_NAME, decoratedKey("key8")); // this name is going to be tokenized + update(rm, name, UTF8Type.instance.decompose("Jean-Claude"), System.currentTimeMillis()); + rm.apply(); + // this flush is going to produce range - 'jason' -> 'vijay' store.forceBlockingFlush(); @@ -1478,11 +1482,12 @@ public class SASIIndexTest // since simple interval tree lookup is not going to cover it, prefix lookup actually required. Set<String> rows; + rows = getIndexed(store, 10, buildExpression(name, Operator.LIKE_PREFIX, UTF8Type.instance.decompose("J"))); - Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key2", "key5", "key6" }, rows.toArray(new String[rows.size()]))); + Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key2", "key5", "key6", "key8"}, rows.toArray(new String[rows.size()]))); rows = getIndexed(store, 10, buildExpression(name, Operator.LIKE_PREFIX, UTF8Type.instance.decompose("j"))); - Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key2", "key5", "key6" }, rows.toArray(new String[rows.size()]))); + Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key2", "key5", "key6", "key8" }, rows.toArray(new String[rows.size()]))); rows = getIndexed(store, 10, buildExpression(name, Operator.LIKE_PREFIX, UTF8Type.instance.decompose("m"))); Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key3", "key4" }, rows.toArray(new String[rows.size()]))); @@ -1495,13 +1500,28 @@ public class SASIIndexTest rows = getIndexed(store, 10, buildExpression(name, Operator.LIKE_PREFIX, UTF8Type.instance.decompose("j")), buildExpression(name, Operator.NEQ, UTF8Type.instance.decompose("joh"))); - Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key2", "key6" }, rows.toArray(new String[rows.size()]))); + Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key2", "key6", "key8" }, rows.toArray(new String[rows.size()]))); - rows = getIndexed(store, 10, buildExpression(name, Operator.EQ, UTF8Type.instance.decompose("pavel"))); + rows = getIndexed(store, 10, buildExpression(name, Operator.LIKE_MATCHES, UTF8Type.instance.decompose("pavel"))); Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key1" }, rows.toArray(new String[rows.size()]))); rows = getIndexed(store, 10, buildExpression(name, Operator.EQ, UTF8Type.instance.decompose("Pave"))); Assert.assertTrue(rows.isEmpty()); + + rows = getIndexed(store, 10, buildExpression(name, Operator.EQ, UTF8Type.instance.decompose("Pavel"))); + Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key1" }, rows.toArray(new String[rows.size()]))); + + rows = getIndexed(store, 10, buildExpression(name, Operator.LIKE_MATCHES, UTF8Type.instance.decompose("JeAn"))); + Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key8" }, rows.toArray(new String[rows.size()]))); + + rows = getIndexed(store, 10, buildExpression(name, Operator.LIKE_MATCHES, UTF8Type.instance.decompose("claUde"))); + Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key8" }, rows.toArray(new String[rows.size()]))); + + rows = getIndexed(store, 10, buildExpression(name, Operator.EQ, UTF8Type.instance.decompose("Jean"))); + Assert.assertTrue(rows.isEmpty()); + + rows = getIndexed(store, 10, buildExpression(name, Operator.EQ, UTF8Type.instance.decompose("Jean-Claude"))); + Assert.assertTrue(rows.toString(), Arrays.equals(new String[] { "key8" }, rows.toArray(new String[rows.size()]))); } @Test @@ -1746,6 +1766,178 @@ public class SASIIndexTest } } + @Test + public void testLIKEAndEQSemanticsWithDifferenceKindsOfIndexes() + { + String containsTable = "sasi_like_contains_test"; + String prefixTable = "sasi_like_prefix_test"; + String analyzedPrefixTable = "sasi_like_analyzed_prefix_test"; + + QueryProcessor.executeOnceInternal(String.format("CREATE TABLE IF NOT EXISTS %s.%s (k int primary key, v text);", KS_NAME, containsTable)); + QueryProcessor.executeOnceInternal(String.format("CREATE TABLE IF NOT EXISTS %s.%s (k int primary key, v text);", KS_NAME, prefixTable)); + QueryProcessor.executeOnceInternal(String.format("CREATE TABLE IF NOT EXISTS %s.%s (k int primary key, v text);", KS_NAME, analyzedPrefixTable)); + + QueryProcessor.executeOnceInternal(String.format("CREATE CUSTOM INDEX IF NOT EXISTS ON %s.%s(v) " + + "USING 'org.apache.cassandra.index.sasi.SASIIndex' WITH OPTIONS = { 'mode' : 'CONTAINS' };", KS_NAME, containsTable)); + QueryProcessor.executeOnceInternal(String.format("CREATE CUSTOM INDEX IF NOT EXISTS ON %s.%s(v) " + + "USING 'org.apache.cassandra.index.sasi.SASIIndex' WITH OPTIONS = { 'mode' : 'PREFIX' };", KS_NAME, prefixTable)); + QueryProcessor.executeOnceInternal(String.format("CREATE CUSTOM INDEX IF NOT EXISTS ON %s.%s(v) " + + "USING 'org.apache.cassandra.index.sasi.SASIIndex' WITH OPTIONS = { 'mode' : 'PREFIX', 'analyzed': 'true' };", KS_NAME, analyzedPrefixTable)); + + testLIKEAndEQSemanticsWithDifferenceKindsOfIndexes(containsTable, prefixTable, analyzedPrefixTable, false); + testLIKEAndEQSemanticsWithDifferenceKindsOfIndexes(containsTable, prefixTable, analyzedPrefixTable, true); + } + + private void testLIKEAndEQSemanticsWithDifferenceKindsOfIndexes(String containsTable, + String prefixTable, + String analyzedPrefixTable, + boolean forceFlush) + { + QueryProcessor.executeOnceInternal(String.format("INSERT INTO %s.%s (k, v) VALUES (?, ?);", KS_NAME, containsTable), 0, "Pavel"); + QueryProcessor.executeOnceInternal(String.format("INSERT INTO %s.%s (k, v) VALUES (?, ?);", KS_NAME, prefixTable), 0, "Jean-Claude"); + QueryProcessor.executeOnceInternal(String.format("INSERT INTO %s.%s (k, v) VALUES (?, ?);", KS_NAME, analyzedPrefixTable), 0, "Jean-Claude"); + + if (forceFlush) + { + Keyspace keyspace = Keyspace.open(KS_NAME); + for (String table : Arrays.asList(containsTable, prefixTable, analyzedPrefixTable)) + keyspace.getColumnFamilyStore(table).forceBlockingFlush(); + } + + UntypedResultSet results; + + // CONTAINS + + results = QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE 'Pav';", KS_NAME, containsTable)); + Assert.assertNotNull(results); + Assert.assertEquals(0, results.size()); + + results = QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE 'Pavel';", KS_NAME, containsTable)); + Assert.assertNotNull(results); + Assert.assertEquals(1, results.size()); + + try + { + QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v = 'Pav';", KS_NAME, containsTable)); + Assert.fail(); + } + catch (InvalidRequestException e) + { + // expected since CONTAINS indexes only support LIKE + } + + try + { + QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE 'Pav%%';", KS_NAME, containsTable)); + Assert.fail(); + } + catch (InvalidRequestException e) + { + // expected since CONTAINS indexes only support LIKE '%<term>' and LIKE '%<term>%' + } + + results = QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE '%%Pav';", KS_NAME, containsTable)); + Assert.assertNotNull(results); + Assert.assertEquals(0, results.size()); + + results = QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE '%%Pav%%';", KS_NAME, containsTable)); + Assert.assertNotNull(results); + Assert.assertEquals(1, results.size()); + + // PREFIX + + results = QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v = 'Jean';", KS_NAME, prefixTable)); + Assert.assertNotNull(results); + Assert.assertEquals(0, results.size()); + + results = QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v = 'Jean-Claude';", KS_NAME, prefixTable)); + Assert.assertNotNull(results); + Assert.assertEquals(1, results.size()); + + results = QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE 'Jea';", KS_NAME, prefixTable)); + Assert.assertNotNull(results); + Assert.assertEquals(0, results.size()); + + results = QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE 'Jea%%';", KS_NAME, prefixTable)); + Assert.assertNotNull(results); + Assert.assertEquals(1, results.size()); + + try + { + QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE '%%Jea';", KS_NAME, prefixTable)); + Assert.fail(); + } + catch (InvalidRequestException e) + { + // expected since PREFIX indexes only support LIKE '<term>%' + } + + try + { + QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE '%%Jea%%';", KS_NAME, prefixTable)); + Assert.fail(); + } + catch (InvalidRequestException e) + { + // expected since PREFIX indexes only support LIKE '<term>%' + } + + // PREFIX + analyzer + + try + { + QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v = 'Jean';", KS_NAME, analyzedPrefixTable)); + Assert.fail(); + } + catch (InvalidRequestException e) + { + // expected since PREFIX indexes only support EQ without tokenization + } + + results = QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE 'Jean';", KS_NAME, analyzedPrefixTable)); + Assert.assertNotNull(results); + Assert.assertEquals(1, results.size()); + + results = QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE 'Claude';", KS_NAME, analyzedPrefixTable)); + Assert.assertNotNull(results); + Assert.assertEquals(1, results.size()); + + results = QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE 'Jean-Claude';", KS_NAME, analyzedPrefixTable)); + Assert.assertNotNull(results); + Assert.assertEquals(1, results.size()); + + results = QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE 'Jean%%';", KS_NAME, analyzedPrefixTable)); + Assert.assertNotNull(results); + Assert.assertEquals(1, results.size()); + + results = QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE 'Claude%%';", KS_NAME, analyzedPrefixTable)); + Assert.assertNotNull(results); + Assert.assertEquals(1, results.size()); + + try + { + QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE '%%Jean';", KS_NAME, analyzedPrefixTable)); + Assert.fail(); + } + catch (InvalidRequestException e) + { + // expected since PREFIX indexes only support LIKE '<term>%' and LIKE '<term>' + } + + try + { + QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s WHERE v LIKE '%%Claude%%';", KS_NAME, analyzedPrefixTable)); + Assert.fail(); + } + catch (InvalidRequestException e) + { + // expected since PREFIX indexes only support LIKE '<term>%' and LIKE '<term>' + } + + for (String table : Arrays.asList(containsTable, prefixTable, analyzedPrefixTable)) + QueryProcessor.executeOnceInternal(String.format("TRUNCATE TABLE %s.%s", KS_NAME, table)); + } + private static ColumnFamilyStore loadData(Map<String, Pair<String, Integer>> data, boolean forceFlush) { return loadData(data, System.currentTimeMillis(), forceFlush); @@ -1860,7 +2052,8 @@ public class SASIIndexTest { try (UnfilteredRowIterator row = rows.next()) { - add(AsciiType.instance.compose(row.partitionKey().getKey())); + if (!row.isEmpty()) + add(AsciiType.instance.compose(row.partitionKey().getKey())); } } }}; http://git-wip-us.apache.org/repos/asf/cassandra/blob/479e8aff/test/unit/org/apache/cassandra/index/sasi/plan/OperationTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/index/sasi/plan/OperationTest.java b/test/unit/org/apache/cassandra/index/sasi/plan/OperationTest.java index 4f38b92..cf2b8c0 100644 --- a/test/unit/org/apache/cassandra/index/sasi/plan/OperationTest.java +++ b/test/unit/org/apache/cassandra/index/sasi/plan/OperationTest.java @@ -181,21 +181,21 @@ public class OperationTest extends SchemaLoader // comment = 'soft eng' and comment != 'likes do' ListMultimap<ColumnDefinition, Expression> e = Operation.analyzeGroup(controller, OperationType.OR, - Arrays.asList(new SimpleExpression(comment, Operator.EQ, UTF8Type.instance.decompose("soft eng")), + Arrays.asList(new SimpleExpression(comment, Operator.LIKE_MATCHES, UTF8Type.instance.decompose("soft eng")), new SimpleExpression(comment, Operator.NEQ, UTF8Type.instance.decompose("likes do")))); List<Expression> expectedExpressions = new ArrayList<Expression>(2) {{ add(new Expression("comment", UTF8Type.instance) {{ - operation = Op.EQ; + operation = Op.MATCH; lower = new Bound(UTF8Type.instance.decompose("soft"), true); upper = lower; }}); add(new Expression("comment", UTF8Type.instance) {{ - operation = Op.EQ; + operation = Op.MATCH; lower = new Bound(UTF8Type.instance.decompose("eng"), true); upper = lower; }});
