[jira] [Commented] (PHOENIX-7032) Partial Global Secondary Indexes

ASF GitHub Bot (Jira) Thu, 26 Oct 2023 14:40:05 -0700


    [ 
https://issues.apache.org/jira/browse/PHOENIX-7032?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17780101#comment-17780101
 ]


ASF GitHub Bot commented on PHOENIX-7032:
-----------------------------------------

jpisaac commented on code in PR #1701:
URL: https://github.com/apache/phoenix/pull/1701#discussion_r1373843926


##########
phoenix-core/src/it/java/org/apache/phoenix/end2end/index/PartialIndexIT.java:
##########
@@ -0,0 +1,564 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.end2end.index;
+
+import static org.apache.phoenix.compile.WhereCompiler.transformDNF;
+import static 
org.apache.phoenix.end2end.index.GlobalIndexCheckerIT.assertExplainPlan;
+import static 
org.apache.phoenix.end2end.index.GlobalIndexCheckerIT.assertExplainPlanWithLimit;
+import static org.apache.phoenix.mapreduce.index.PhoenixIndexToolJobCounters.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.SimpleRegionObserver;
+import org.apache.hadoop.mapreduce.CounterGroup;
+import org.apache.phoenix.compile.FromCompiler;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.end2end.IndexToolIT;
+import org.apache.phoenix.exception.PhoenixParserException;
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.hbase.index.IndexRegionObserver;
+import org.apache.phoenix.jdbc.PhoenixConnection;
+import org.apache.phoenix.jdbc.PhoenixPreparedStatement;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
+import org.apache.phoenix.mapreduce.index.IndexTool;
+import org.apache.phoenix.parse.ParseNode;
+import org.apache.phoenix.query.KeyRange;
+import org.apache.phoenix.schema.ColumnFamilyNotFoundException;
+import org.apache.phoenix.schema.ColumnNotFoundException;
+import org.apache.phoenix.schema.PTable;
+import org.apache.phoenix.thirdparty.com.google.common.collect.Maps;
+import org.apache.phoenix.end2end.NeedsOwnMiniClusterTest;
+import org.apache.phoenix.query.BaseTest;
+import org.apache.phoenix.query.QueryServices;
+import org.apache.phoenix.util.*;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class PartialIndexIT extends BaseTest {
+    private final boolean local;
+    private final boolean uncovered;
+    private final boolean salted;
+
+    public PartialIndexIT (boolean local, boolean uncovered, boolean salted) {
+        this.local = local;
+        this.uncovered = uncovered;
+        this.salted = salted;
+    }
+    @BeforeClass
+    public static synchronized void doSetup() throws Exception {
+        Map<String, String> props = Maps.newHashMapWithExpectedSize(1);
+        
props.put(QueryServices.GLOBAL_INDEX_ROW_AGE_THRESHOLD_TO_DELETE_MS_ATTRIB, 
Long.toString(0));
+        setUpTestDriver(new ReadOnlyProps(props.entrySet().iterator()));
+    }
+
+    @After
+    public void unsetFailForTesting() throws Exception {
+        boolean refCountLeaked = isAnyStoreRefCountLeaked();
+        assertFalse("refCount leaked", refCountLeaked);
+    }
+    @Parameterized.Parameters(
+            name = "local={0}, uncovered={1}, salted={2}")
+    public static synchronized Collection<Boolean[]> data() {
+        return Arrays.asList(new Boolean[][] {
+                // Partial local indexes are not supported currently.
+                {false, false, true},
+                {false, false, false},
+                {false, true, false},
+                {false, true, true}
+        });
+    }
+
+    public static void assertPlan(PhoenixResultSet rs, String schemaName, 
String tableName) {
+        PTable table = rs.getContext().getCurrentTable().getTable();
+        assertTrue(table.getSchemaName().getString().equals(schemaName) &&
+                table.getTableName().getString().equals(tableName));
+    }
+
+    private static void verifyIndex(String dataTableName, String 
indexTableName) throws Exception {
+        IndexTool indexTool = IndexToolIT.runIndexTool(false, "", 
dataTableName,
+                indexTableName, null, 0, IndexTool.IndexVerifyType.ONLY);
+
+        assertEquals(0, indexTool.getJob().getCounters().
+                findCounter(REBUILT_INDEX_ROW_COUNT).getValue());
+        assertEquals(0, indexTool.getJob().getCounters().
+                
findCounter(BEFORE_REBUILD_INVALID_INDEX_ROW_COUNT).getValue());
+        assertEquals(0, indexTool.getJob().getCounters().
+                
findCounter(BEFORE_REBUILD_MISSING_INDEX_ROW_COUNT).getValue());
+        assertEquals(0, indexTool.getJob().getCounters().
+                
findCounter(BEFORE_REBUILD_BEYOND_MAXLOOKBACK_MISSING_INDEX_ROW_COUNT).getValue());
+        assertEquals(0, indexTool.getJob().getCounters().
+                
findCounter(BEFORE_REBUILD_BEYOND_MAXLOOKBACK_INVALID_INDEX_ROW_COUNT).getValue());
+        assertEquals(0, indexTool.getJob().getCounters().
+                findCounter(BEFORE_REBUILD_OLD_INDEX_ROW_COUNT).getValue());
+        assertEquals(0, indexTool.getJob().getCounters().
+                
findCounter(BEFORE_REBUILD_UNKNOWN_INDEX_ROW_COUNT).getValue());
+
+        IndexToolIT.runIndexTool(false, "", dataTableName,
+                indexTableName, null, 0, IndexTool.IndexVerifyType.ONLY, 
"-fi");
+        CounterGroup mrJobCounters = IndexToolIT.getMRJobCounters(indexTool);
+        assertEquals(0,
+                
mrJobCounters.findCounter(BEFORE_REBUILD_INVALID_INDEX_ROW_COUNT.name()).getValue());
+        assertEquals(0,
+                
mrJobCounters.findCounter(BEFORE_REPAIR_EXTRA_VERIFIED_INDEX_ROW_COUNT.name()).getValue());
+        assertEquals(0,
+                
mrJobCounters.findCounter(BEFORE_REPAIR_EXTRA_UNVERIFIED_INDEX_ROW_COUNT.name()).getValue());
+    }
+

Review Comment:
   Can you add tests for other data types for e.g. DATE. The following test 
fails.
   
       @Test
       public void testSampleDDLs() throws Exception {
           try (Connection conn = DriverManager.getConnection(getUrl())) {
               String dataTableName = generateUniqueName();
               String fullTableName = String.format("%s.%s", "S", 
dataTableName);
               conn.createStatement().execute(
                       "create table " + fullTableName + " (id varchar not 
null, kp varchar not null, "
                               + "A integer, B integer, C double, D date 
CONSTRAINT pk PRIMARY KEY (id,kp)) MULTI_TENANT=true,COLUMN_ENCODED_BYTES=0" );
               String indexTableName = generateUniqueName();
               try {
                   conn.createStatement().execute(
                           "CREATE INDEX IF NOT EXISTS " + indexTableName + " 
on " + fullTableName + " (kp,A)"
                                   + " WHERE kp  > '5'");
               } catch (PhoenixParserException e) {
                   e.printStackTrace();
                   Assert.fail();
               }
           }
       }
   





> Partial Global Secondary Indexes
> --------------------------------
>
>                 Key: PHOENIX-7032
>                 URL: https://issues.apache.org/jira/browse/PHOENIX-7032
>             Project: Phoenix
>          Issue Type: New Feature
>            Reporter: Kadir Ozdemir
>            Assignee: Kadir Ozdemir
>            Priority: Major
>
> The secondary indexes supported in Phoenix have been full indexes such that 
> for every data table row there is an index row. Generating an index row for 
> every data table row is not always required. For example, some use cases do 
> not require index rows for the data table rows in which indexed column values 
> are null. Such indexes are called sparse indexes. Partial indexes generalize 
> the concept of sparse indexing and allow users to specify the subset of the 
> data table rows for which index rows will be maintained. This subset is 
> specified using a WHERE clause added to the CREATE INDEX DDL statement.
> Partial secondary indexes were first proposed by Michael Stonebraker 
> [here|https://dsf.berkeley.edu/papers/ERL-M89-17.pdf]. Since then several SQL 
> databases (e.g., 
> [Postgres|https://www.postgresql.org/docs/current/indexes-partial.html] and 
> [SQLite|https://www.sqlite.org/partialindex.html])  and NoSQL databases 
> (e.g., [MongoDB|https://www.mongodb.com/docs/manual/core/index-partial/]) 
> have supported some form of partial indexes. It is challenging to allow 
> arbitrary WHERE clauses in DDL statements. For example, Postgres does not 
> allow subqueries in these where clauses and SQLite supports much more 
> restrictive where clauses. 
> Supporting arbitrary where clauses creates challenges for query optimizers in 
> deciding the usability of a partial index for a given query. If the set of 
> data table rows that satisfy the query is a subset of the data table rows 
> that the partial index points back, then the query can use the index. Thus, 
> the query optimizer has to decide if the WHERE clause of the query implies 
> the WHERE clause of the index. 
> Michael Stonebraker [here|https://dsf.berkeley.edu/papers/ERL-M89-17.pdf] 
> suggests that an index WHERE clause is a conjunct of simple terms, i.e: 
> i-clause-1 and i-clause-2 and ... and i-clause-m where each clause is of the 
> form <column> <operator> <constant>. Hence, the qualification can be 
> evaluated for each tuple in the indicated relation without consulting 
> additional tuples. 
> Phoenix partial indexes will initially support a more general set of index 
> WHERE clauses that can be evaluated on a single row with the following 
> exceptions
>  * Subqueries are not allowed.
>  * Like expressions are allowed with very limited support such that an index 
> WHERE clause with like expressions can imply/contain a query if the query has 
> the same like expressions that the index WHERE clause has.
>  * Comparison between columns are allowed without supporting transitivity, 
> for example, a > b and b > c does not imply a > c.
> Partial indexes will be supported initially for global secondary indexes, 
> i.e.,  covered global indexes and uncovered global indexes. The local 
> secondary indexes will be supported in future.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

[jira] [Commented] (PHOENIX-7032) Partial Global Secondary Indexes

Reply via email to