PHOENIX-1684 Functional Index using REGEXP_SUBSTR doesn't work correctly

Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/a8b27e3f
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/a8b27e3f
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/a8b27e3f

Branch: refs/heads/4.x-HBase-1.x
Commit: a8b27e3f010d15d3f3b519c38fbb052ebb4a6cdb
Parents: 250474d
Author: Thomas <tdsi...@salesforce.com>
Authored: Thu Mar 19 13:57:27 2015 -0700
Committer: Thomas <tdsi...@salesforce.com>
Committed: Tue Mar 24 15:56:25 2015 -0700

----------------------------------------------------------------------
 .../end2end/index/IndexExpressionIT.java        | 161 ++++++++++++++-----
 .../phoenix/compile/PostIndexDDLCompiler.java   |   4 +-
 .../parse/IndexExpressionParseNodeRewriter.java |  30 +---
 .../apache/phoenix/schema/MetaDataClient.java   |   4 +-
 .../org/apache/phoenix/util/StringUtil.java     |   5 +
 .../phoenix/compile/QueryCompilerTest.java      |  22 ++-
 6 files changed, 153 insertions(+), 73 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/phoenix/blob/a8b27e3f/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/IndexExpressionIT.java
----------------------------------------------------------------------
diff --git 
a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/IndexExpressionIT.java
 
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/IndexExpressionIT.java
index 1e3733b..0203e35 100644
--- 
a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/IndexExpressionIT.java
+++ 
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/IndexExpressionIT.java
@@ -1202,54 +1202,60 @@ public class IndexExpressionIT extends 
BaseHBaseManagedTimeIT {
     
     @Test
     public void testViewUsesTableIndex() throws Exception {
-        ResultSet rs;
         Connection conn = DriverManager.getConnection(getUrl());
-        String ddl = "CREATE TABLE t (k1 INTEGER NOT NULL, k2 INTEGER NOT 
NULL, s1 VARCHAR, s2 VARCHAR, s3 VARCHAR, s4 VARCHAR CONSTRAINT pk PRIMARY KEY 
(k1, k2))";
-        conn.createStatement().execute(ddl);
-        conn.createStatement().execute("CREATE INDEX i1 ON t(k2, s2, s3, s1)");
-        conn.createStatement().execute("CREATE INDEX i2 ON t(k2, s2||'_'||s3, 
s1, s4)");
-        
-        ddl = "CREATE VIEW v AS SELECT * FROM t WHERE s1 = 'foo'";
-        conn.createStatement().execute(ddl);
-        conn.createStatement().execute("UPSERT INTO t 
VALUES(1,1,'foo','abc','cab')");
-        conn.createStatement().execute("UPSERT INTO t 
VALUES(2,2,'bar','xyz','zyx')");
-        conn.commit();
-        
-        rs = conn.createStatement().executeQuery("SELECT count(*) FROM v");
-        assertTrue(rs.next());
-        assertEquals(1, rs.getLong(1));
-        assertFalse(rs.next());
-        
-        //i2 should be used since it contains s3||'_'||s4 i
-        String query = "SELECT s2||'_'||s3 FROM v WHERE k2=1 AND 
(s2||'_'||s3)='abc_cab'";
-        rs = conn.createStatement(  ).executeQuery("EXPLAIN " + query);
-        String queryPlan = QueryUtil.getExplainPlan(rs);
-        assertEquals(
-                "CLIENT PARALLEL 1-WAY RANGE SCAN OVER I2 
[1,'abc_cab','foo']\n" + 
-                "    SERVER FILTER BY FIRST KEY ONLY", queryPlan);
-        rs = conn.createStatement().executeQuery(query);
-        assertTrue(rs.next());
-        assertEquals("abc_cab", rs.getString(1));
-        assertFalse(rs.next());
-        
-        conn.createStatement().execute("ALTER VIEW v DROP COLUMN s4");
-        //i2 cannot be used since s4 has been dropped from the view, so i1 
will be used 
-        rs = conn.createStatement().executeQuery("EXPLAIN " + query);
-        queryPlan = QueryUtil.getExplainPlan(rs);
-        assertEquals(
-                "CLIENT PARALLEL 1-WAY RANGE SCAN OVER I1 [1]\n" + 
-                "    SERVER FILTER BY FIRST KEY ONLY AND ((\"S2\" || '_' || 
\"S3\") = 'abc_cab' AND \"S1\" = 'foo')", queryPlan);
-        rs = conn.createStatement().executeQuery(query);
-        assertTrue(rs.next());
-        assertEquals("abc_cab", rs.getString(1));
-        assertFalse(rs.next());    
+        try 
+        {
+               ResultSet rs;
+               String ddl = "CREATE TABLE t (k1 INTEGER NOT NULL, k2 INTEGER 
NOT NULL, s1 VARCHAR, s2 VARCHAR, s3 VARCHAR, s4 VARCHAR CONSTRAINT pk PRIMARY 
KEY (k1, k2))";
+               conn.createStatement().execute(ddl);
+               conn.createStatement().execute("CREATE INDEX i1 ON t(k2, s2, 
s3, s1)");
+               conn.createStatement().execute("CREATE INDEX i2 ON t(k2, 
s2||'_'||s3, s1, s4)");
+               
+               ddl = "CREATE VIEW v AS SELECT * FROM t WHERE s1 = 'foo'";
+               conn.createStatement().execute(ddl);
+               conn.createStatement().execute("UPSERT INTO t 
VALUES(1,1,'foo','abc','cab')");
+               conn.createStatement().execute("UPSERT INTO t 
VALUES(2,2,'bar','xyz','zyx')");
+               conn.commit();
+               
+               rs = conn.createStatement().executeQuery("SELECT count(*) FROM 
v");
+               assertTrue(rs.next());
+               assertEquals(1, rs.getLong(1));
+               assertFalse(rs.next());
+               
+               //i2 should be used since it contains s3||'_'||s4 i
+               String query = "SELECT s2||'_'||s3 FROM v WHERE k2=1 AND 
(s2||'_'||s3)='abc_cab'";
+               rs = conn.createStatement(  ).executeQuery("EXPLAIN " + query);
+               String queryPlan = QueryUtil.getExplainPlan(rs);
+               assertEquals(
+                       "CLIENT PARALLEL 1-WAY RANGE SCAN OVER I2 
[1,'abc_cab','foo']\n" + 
+                       "    SERVER FILTER BY FIRST KEY ONLY", queryPlan);
+               rs = conn.createStatement().executeQuery(query);
+               assertTrue(rs.next());
+               assertEquals("abc_cab", rs.getString(1));
+               assertFalse(rs.next());
+               
+               conn.createStatement().execute("ALTER VIEW v DROP COLUMN s4");
+               //i2 cannot be used since s4 has been dropped from the view, so 
i1 will be used 
+               rs = conn.createStatement().executeQuery("EXPLAIN " + query);
+               queryPlan = QueryUtil.getExplainPlan(rs);
+               assertEquals(
+                       "CLIENT PARALLEL 1-WAY RANGE SCAN OVER I1 [1]\n" + 
+                       "    SERVER FILTER BY FIRST KEY ONLY AND ((\"S2\" || 
'_' || \"S3\") = 'abc_cab' AND \"S1\" = 'foo')", queryPlan);
+               rs = conn.createStatement().executeQuery(query);
+               assertTrue(rs.next());
+               assertEquals("abc_cab", rs.getString(1));
+               assertFalse(rs.next());    
+        }
+        finally {
+               conn.close();
+        }
     }
     
        @Test
        public void testExpressionThrowsException() throws Exception {
                Connection conn = DriverManager.getConnection(getUrl());
-               String ddl = "CREATE TABLE t (k1 INTEGER PRIMARY KEY, k2 
INTEGER)";
                try {
+                       String ddl = "CREATE TABLE t (k1 INTEGER PRIMARY KEY, 
k2 INTEGER)";
                        conn.createStatement().execute(ddl);
                        ddl = "CREATE INDEX i on t(k1/k2)";
                        conn.createStatement().execute(ddl);
@@ -1261,6 +1267,79 @@ public class IndexExpressionIT extends 
BaseHBaseManagedTimeIT {
                        conn.commit();
                        fail();
                } catch (CommitException e) {
+               } finally {
+                       conn.close();
+               }
+       }
+       
+       @Test
+       public void testImmutableCaseSensitiveFunctionIndex() throws Exception {
+               helpTestCaseSensitiveFunctionIndex(false, false);
+       }
+
+       @Test
+       public void testImmutableLocalCaseSensitiveFunctionIndex() throws 
Exception {
+               helpTestCaseSensitiveFunctionIndex(false, true);
+       }
+
+       @Test
+       public void testMutableCaseSensitiveFunctionIndex() throws Exception {
+               helpTestCaseSensitiveFunctionIndex(true, false);
+       }
+
+       @Test
+       public void testMutableLocalCaseSensitiveFunctionIndex() throws 
Exception {
+               helpTestCaseSensitiveFunctionIndex(true, true);
+       }
+
+       protected void helpTestCaseSensitiveFunctionIndex(boolean mutable,
+                       boolean localIndex) throws Exception {
+               Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
+               Connection conn = DriverManager.getConnection(getUrl(), props);
+               try {
+                       conn.createStatement().execute(
+                                       "CREATE TABLE t (k VARCHAR NOT NULL 
PRIMARY KEY, v VARCHAR) "
+                                                       + (mutable ? 
"IMMUTABLE_ROWS=true" : ""));
+                       String query = "SELECT * FROM t";
+                       ResultSet rs = 
conn.createStatement().executeQuery(query);
+                       assertFalse(rs.next());
+                       String ddl = "CREATE " + (localIndex ? "LOCAL" : "")
+                                       + " INDEX idx ON t 
(REGEXP_SUBSTR(v,'id:\\\\w+'))";
+                       PreparedStatement stmt = conn.prepareStatement(ddl);
+                       stmt.execute();
+                       query = "SELECT * FROM idx";
+                       rs = conn.createStatement().executeQuery(query);
+                       assertFalse(rs.next());
+
+                       stmt = conn.prepareStatement("UPSERT INTO t 
VALUES(?,?)");
+                       stmt.setString(1, "k1");
+                       stmt.setString(2, "{id:id1}");
+                       stmt.execute();
+                       stmt.setString(1, "k2");
+                       stmt.setString(2, "{id:id2}");
+                       stmt.execute();
+                       conn.commit();
+                       
+                       query = "SELECT k FROM t WHERE 
REGEXP_SUBSTR(v,'id:\\\\w+') = 'id:id1'";
+                       rs = conn.createStatement().executeQuery("EXPLAIN " + 
query);
+                       if (localIndex) {
+                               assertEquals(
+                                               "CLIENT PARALLEL 1-WAY RANGE 
SCAN OVER _LOCAL_IDX_T [-32768,'id:id1']\n"
+                                                               + "    SERVER 
FILTER BY FIRST KEY ONLY\nCLIENT MERGE SORT",
+                                               QueryUtil.getExplainPlan(rs));
+                       } else {
+                               assertEquals(
+                                               "CLIENT PARALLEL 1-WAY RANGE 
SCAN OVER IDX ['id:id1']\n"
+                                                               + "    SERVER 
FILTER BY FIRST KEY ONLY",
+                                               QueryUtil.getExplainPlan(rs));
+                       }
+
+                       rs = conn.createStatement().executeQuery(query);
+                       assertTrue(rs.next());
+                       assertEquals("k1", rs.getString(1));
+                       assertFalse(rs.next());
+               } finally {
+                       conn.close();
                }
        }
 

http://git-wip-us.apache.org/repos/asf/phoenix/blob/a8b27e3f/phoenix-core/src/main/java/org/apache/phoenix/compile/PostIndexDDLCompiler.java
----------------------------------------------------------------------
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/compile/PostIndexDDLCompiler.java
 
b/phoenix-core/src/main/java/org/apache/phoenix/compile/PostIndexDDLCompiler.java
index 5836b99..9f99f1c 100644
--- 
a/phoenix-core/src/main/java/org/apache/phoenix/compile/PostIndexDDLCompiler.java
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/compile/PostIndexDDLCompiler.java
@@ -27,6 +27,7 @@ import org.apache.phoenix.schema.PColumnFamily;
 import org.apache.phoenix.schema.PTable;
 import org.apache.phoenix.schema.TableRef;
 import org.apache.phoenix.util.IndexUtil;
+import org.apache.phoenix.util.StringUtil;
 
 import com.google.common.collect.Lists;
 
@@ -73,7 +74,8 @@ public class PostIndexDDLCompiler {
         for (int i = posOffset; i < nIndexPKColumns; i++) {
             PColumn col = indexPKColumns.get(i);
             String indexColName = col.getName().getString();
-            String dataColName = col.getExpressionStr();
+            // need to escape backslash as this used in the SELECT statement
+            String dataColName = 
StringUtil.escapeBackslash(col.getExpressionStr());
             dataColumns.append(dataColName).append(",");
             indexColumns.append('"').append(indexColName).append("\",");
             indexColumnNames.add(indexColName);

http://git-wip-us.apache.org/repos/asf/phoenix/blob/a8b27e3f/phoenix-core/src/main/java/org/apache/phoenix/parse/IndexExpressionParseNodeRewriter.java
----------------------------------------------------------------------
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/parse/IndexExpressionParseNodeRewriter.java
 
b/phoenix-core/src/main/java/org/apache/phoenix/parse/IndexExpressionParseNodeRewriter.java
index 43cb9f3..0273041 100644
--- 
a/phoenix-core/src/main/java/org/apache/phoenix/parse/IndexExpressionParseNodeRewriter.java
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/parse/IndexExpressionParseNodeRewriter.java
@@ -37,26 +37,6 @@ public class IndexExpressionParseNodeRewriter extends 
ParseNodeRewriter {
 
     private final Map<ParseNode, ParseNode> 
indexedParseNodeToColumnParseNodeMap;
     
-    private static class ColumnParseNodeVisitor extends 
StatelessTraverseAllParseNodeVisitor {
-        
-        private boolean isParseNodeCaseSensitive;
-        
-        public void reset() {
-            this.isParseNodeCaseSensitive = false;
-        }
-        
-        @Override
-        public Void visit(ColumnParseNode node) throws SQLException {
-            isParseNodeCaseSensitive = isParseNodeCaseSensitive  || 
node.isCaseSensitive() || node.isTableNameCaseSensitive();
-            return null;
-        }
-        
-        public boolean isParseNodeCaseSensitive() {
-            return isParseNodeCaseSensitive;
-        }
-        
-    }
-
     public IndexExpressionParseNodeRewriter(PTable index, PhoenixConnection 
connection) throws SQLException {
         indexedParseNodeToColumnParseNodeMap = 
Maps.newHashMapWithExpectedSize(index.getColumns().size());
         NamedTableNode tableNode = NamedTableNode.create(null,
@@ -66,21 +46,13 @@ public class IndexExpressionParseNodeRewriter extends 
ParseNodeRewriter {
         StatementContext context = new StatementContext(new 
PhoenixStatement(connection), dataResolver);
         IndexStatementRewriter rewriter = new 
IndexStatementRewriter(dataResolver, null);
         ExpressionCompiler expressionCompiler = new 
ExpressionCompiler(context);
-        ColumnParseNodeVisitor columnParseNodeVisitor = new 
ColumnParseNodeVisitor();
         int indexPosOffset = (index.getBucketNum() == null ? 0 : 1) + 
(index.isMultiTenant() ? 1 : 0) + (index.getViewIndexId() == null ? 0 : 1);
         List<PColumn> pkColumns = index.getPKColumns();
                for (int i=indexPosOffset; i<pkColumns.size(); ++i) {
                PColumn column = pkColumns.get(i);
                String expressionStr = 
IndexUtil.getIndexColumnExpressionStr(column);
             ParseNode expressionParseNode  = 
SQLParser.parseCondition(expressionStr);
-            columnParseNodeVisitor.reset();
-            expressionParseNode.accept(columnParseNodeVisitor);
-            String colName = column.getName().getString();
-            if (columnParseNodeVisitor.isParseNodeCaseSensitive()) {
-                // force column name to be case sensitive name by surround 
with double quotes
-                colName = "\"" + colName + "\"";
-            }
-            
+            String colName = "\"" + column.getName().getString() + "\"";
             Expression dataExpression = 
expressionParseNode.accept(expressionCompiler);
             PDataType expressionDataType = dataExpression.getDataType();
             ParseNode indexedParseNode = expressionParseNode.accept(rewriter);

http://git-wip-us.apache.org/repos/asf/phoenix/blob/a8b27e3f/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
----------------------------------------------------------------------
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java 
b/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
index 2ba0cde..e414039 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
@@ -164,6 +164,7 @@ import org.apache.phoenix.util.MetaDataUtil;
 import org.apache.phoenix.util.PhoenixRuntime;
 import org.apache.phoenix.util.ReadOnlyProps;
 import org.apache.phoenix.util.SchemaUtil;
+import org.apache.phoenix.util.StringUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -1036,7 +1037,8 @@ public class MetaDataClient {
                     // can lose information during compilation
                     StringBuilder buf = new StringBuilder();
                     parseNode.toSQL(resolver, buf);
-                    String expressionStr = buf.toString();
+                    // need to escape backslash as this expression will be 
re-parsed later
+                    String expressionStr = 
StringUtil.escapeBackslash(buf.toString());
                     
                     ColumnName colName = null;
                     ColumnRef colRef = expressionIndexCompiler.getColumnRef();

http://git-wip-us.apache.org/repos/asf/phoenix/blob/a8b27e3f/phoenix-core/src/main/java/org/apache/phoenix/util/StringUtil.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/util/StringUtil.java 
b/phoenix-core/src/main/java/org/apache/phoenix/util/StringUtil.java
index a83098a..4a7ae38 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/util/StringUtil.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/util/StringUtil.java
@@ -331,4 +331,9 @@ public class StringUtil {
     public static String escapeStringConstant(String pattern) {
         return StringEscapeUtils.escapeSql(pattern); // Need to escape double 
quotes
     }   
+    
+    public static String escapeBackslash(String input) {
+       // see 
http://stackoverflow.com/questions/4653831/regex-how-to-escape-backslashes-and-special-characters
+       return input.replaceAll("\\\\","\\\\\\\\");
+    }
 }

http://git-wip-us.apache.org/repos/asf/phoenix/blob/a8b27e3f/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java
----------------------------------------------------------------------
diff --git 
a/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java 
b/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java
index e17c528..83c984b 100644
--- 
a/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java
+++ 
b/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java
@@ -1564,5 +1564,25 @@ public class QueryCompilerTest extends 
BaseConnectionlessQueryTest {
             stmt.close();
         }
     }
-
+    
+    @Test
+    public void testRegex() throws Exception {
+        Connection conn = DriverManager.getConnection(getUrl());
+        Statement stmt = conn.createStatement();
+        stmt.execute("CREATE TABLE t (k1 INTEGER PRIMARY KEY, v VARCHAR)");
+        
+        //character classes
+        stmt.executeQuery("select * from T where REGEXP_SUBSTR(v, '[abc]') = 
'val'");
+        stmt.executeQuery("select * from T where REGEXP_SUBSTR(v, '[^abc]') = 
'val'");
+        stmt.executeQuery("select * from T where REGEXP_SUBSTR(v, '[a-zA-Z]') 
= 'val'");
+        stmt.executeQuery("select * from T where REGEXP_SUBSTR(v, 
'[a-d[m-p]]') = 'val'");
+        stmt.executeQuery("select * from T where REGEXP_SUBSTR(v, 
'[a-z&&[def]]') = 'val'");
+        stmt.executeQuery("select * from T where REGEXP_SUBSTR(v, 
'[a-z&&[^bc]]') = 'val'");
+        stmt.executeQuery("select * from T where REGEXP_SUBSTR(v, 
'[a-z&&[^m-p]]') = 'val'");
+        
+        // predefined character classes
+        stmt.executeQuery("select * from T where REGEXP_SUBSTR(v, 
'.\\\\d\\\\D\\\\s\\\\S\\\\w\\\\W') = 'val'");
+    }
+    
+   
 }

Reply via email to