[
https://issues.apache.org/jira/browse/PHOENIX-7032?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17780101#comment-17780101
]
ASF GitHub Bot commented on PHOENIX-7032:
-----------------------------------------
jpisaac commented on code in PR #1701:
URL: https://github.com/apache/phoenix/pull/1701#discussion_r1373843926
##########
phoenix-core/src/it/java/org/apache/phoenix/end2end/index/PartialIndexIT.java:
##########
@@ -0,0 +1,564 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.end2end.index;
+
+import static org.apache.phoenix.compile.WhereCompiler.transformDNF;
+import static
org.apache.phoenix.end2end.index.GlobalIndexCheckerIT.assertExplainPlan;
+import static
org.apache.phoenix.end2end.index.GlobalIndexCheckerIT.assertExplainPlanWithLimit;
+import static org.apache.phoenix.mapreduce.index.PhoenixIndexToolJobCounters.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.SimpleRegionObserver;
+import org.apache.hadoop.mapreduce.CounterGroup;
+import org.apache.phoenix.compile.FromCompiler;
+import org.apache.phoenix.compile.QueryPlan;
+import org.apache.phoenix.end2end.IndexToolIT;
+import org.apache.phoenix.exception.PhoenixParserException;
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.hbase.index.IndexRegionObserver;
+import org.apache.phoenix.jdbc.PhoenixConnection;
+import org.apache.phoenix.jdbc.PhoenixPreparedStatement;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
+import org.apache.phoenix.mapreduce.index.IndexTool;
+import org.apache.phoenix.parse.ParseNode;
+import org.apache.phoenix.query.KeyRange;
+import org.apache.phoenix.schema.ColumnFamilyNotFoundException;
+import org.apache.phoenix.schema.ColumnNotFoundException;
+import org.apache.phoenix.schema.PTable;
+import org.apache.phoenix.thirdparty.com.google.common.collect.Maps;
+import org.apache.phoenix.end2end.NeedsOwnMiniClusterTest;
+import org.apache.phoenix.query.BaseTest;
+import org.apache.phoenix.query.QueryServices;
+import org.apache.phoenix.util.*;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@Category(NeedsOwnMiniClusterTest.class)
+@RunWith(Parameterized.class)
+public class PartialIndexIT extends BaseTest {
+ private final boolean local;
+ private final boolean uncovered;
+ private final boolean salted;
+
+ public PartialIndexIT (boolean local, boolean uncovered, boolean salted) {
+ this.local = local;
+ this.uncovered = uncovered;
+ this.salted = salted;
+ }
+ @BeforeClass
+ public static synchronized void doSetup() throws Exception {
+ Map<String, String> props = Maps.newHashMapWithExpectedSize(1);
+
props.put(QueryServices.GLOBAL_INDEX_ROW_AGE_THRESHOLD_TO_DELETE_MS_ATTRIB,
Long.toString(0));
+ setUpTestDriver(new ReadOnlyProps(props.entrySet().iterator()));
+ }
+
+ @After
+ public void unsetFailForTesting() throws Exception {
+ boolean refCountLeaked = isAnyStoreRefCountLeaked();
+ assertFalse("refCount leaked", refCountLeaked);
+ }
+ @Parameterized.Parameters(
+ name = "local={0}, uncovered={1}, salted={2}")
+ public static synchronized Collection<Boolean[]> data() {
+ return Arrays.asList(new Boolean[][] {
+ // Partial local indexes are not supported currently.
+ {false, false, true},
+ {false, false, false},
+ {false, true, false},
+ {false, true, true}
+ });
+ }
+
+ public static void assertPlan(PhoenixResultSet rs, String schemaName,
String tableName) {
+ PTable table = rs.getContext().getCurrentTable().getTable();
+ assertTrue(table.getSchemaName().getString().equals(schemaName) &&
+ table.getTableName().getString().equals(tableName));
+ }
+
+ private static void verifyIndex(String dataTableName, String
indexTableName) throws Exception {
+ IndexTool indexTool = IndexToolIT.runIndexTool(false, "",
dataTableName,
+ indexTableName, null, 0, IndexTool.IndexVerifyType.ONLY);
+
+ assertEquals(0, indexTool.getJob().getCounters().
+ findCounter(REBUILT_INDEX_ROW_COUNT).getValue());
+ assertEquals(0, indexTool.getJob().getCounters().
+
findCounter(BEFORE_REBUILD_INVALID_INDEX_ROW_COUNT).getValue());
+ assertEquals(0, indexTool.getJob().getCounters().
+
findCounter(BEFORE_REBUILD_MISSING_INDEX_ROW_COUNT).getValue());
+ assertEquals(0, indexTool.getJob().getCounters().
+
findCounter(BEFORE_REBUILD_BEYOND_MAXLOOKBACK_MISSING_INDEX_ROW_COUNT).getValue());
+ assertEquals(0, indexTool.getJob().getCounters().
+
findCounter(BEFORE_REBUILD_BEYOND_MAXLOOKBACK_INVALID_INDEX_ROW_COUNT).getValue());
+ assertEquals(0, indexTool.getJob().getCounters().
+ findCounter(BEFORE_REBUILD_OLD_INDEX_ROW_COUNT).getValue());
+ assertEquals(0, indexTool.getJob().getCounters().
+
findCounter(BEFORE_REBUILD_UNKNOWN_INDEX_ROW_COUNT).getValue());
+
+ IndexToolIT.runIndexTool(false, "", dataTableName,
+ indexTableName, null, 0, IndexTool.IndexVerifyType.ONLY,
"-fi");
+ CounterGroup mrJobCounters = IndexToolIT.getMRJobCounters(indexTool);
+ assertEquals(0,
+
mrJobCounters.findCounter(BEFORE_REBUILD_INVALID_INDEX_ROW_COUNT.name()).getValue());
+ assertEquals(0,
+
mrJobCounters.findCounter(BEFORE_REPAIR_EXTRA_VERIFIED_INDEX_ROW_COUNT.name()).getValue());
+ assertEquals(0,
+
mrJobCounters.findCounter(BEFORE_REPAIR_EXTRA_UNVERIFIED_INDEX_ROW_COUNT.name()).getValue());
+ }
+
Review Comment:
Can you add tests for other data types for e.g. DATE. The following test
fails.
@Test
public void testSampleDDLs() throws Exception {
try (Connection conn = DriverManager.getConnection(getUrl())) {
String dataTableName = generateUniqueName();
String fullTableName = String.format("%s.%s", "S",
dataTableName);
conn.createStatement().execute(
"create table " + fullTableName + " (id varchar not
null, kp varchar not null, "
+ "A integer, B integer, C double, D date
CONSTRAINT pk PRIMARY KEY (id,kp)) MULTI_TENANT=true,COLUMN_ENCODED_BYTES=0" );
String indexTableName = generateUniqueName();
try {
conn.createStatement().execute(
"CREATE INDEX IF NOT EXISTS " + indexTableName + "
on " + fullTableName + " (kp,A)"
+ " WHERE kp > '5'");
} catch (PhoenixParserException e) {
e.printStackTrace();
Assert.fail();
}
}
}
> Partial Global Secondary Indexes
> --------------------------------
>
> Key: PHOENIX-7032
> URL: https://issues.apache.org/jira/browse/PHOENIX-7032
> Project: Phoenix
> Issue Type: New Feature
> Reporter: Kadir Ozdemir
> Assignee: Kadir Ozdemir
> Priority: Major
>
> The secondary indexes supported in Phoenix have been full indexes such that
> for every data table row there is an index row. Generating an index row for
> every data table row is not always required. For example, some use cases do
> not require index rows for the data table rows in which indexed column values
> are null. Such indexes are called sparse indexes. Partial indexes generalize
> the concept of sparse indexing and allow users to specify the subset of the
> data table rows for which index rows will be maintained. This subset is
> specified using a WHERE clause added to the CREATE INDEX DDL statement.
> Partial secondary indexes were first proposed by Michael Stonebraker
> [here|https://dsf.berkeley.edu/papers/ERL-M89-17.pdf]. Since then several SQL
> databases (e.g.,
> [Postgres|https://www.postgresql.org/docs/current/indexes-partial.html] and
> [SQLite|https://www.sqlite.org/partialindex.html]) and NoSQL databases
> (e.g., [MongoDB|https://www.mongodb.com/docs/manual/core/index-partial/])
> have supported some form of partial indexes. It is challenging to allow
> arbitrary WHERE clauses in DDL statements. For example, Postgres does not
> allow subqueries in these where clauses and SQLite supports much more
> restrictive where clauses.
> Supporting arbitrary where clauses creates challenges for query optimizers in
> deciding the usability of a partial index for a given query. If the set of
> data table rows that satisfy the query is a subset of the data table rows
> that the partial index points back, then the query can use the index. Thus,
> the query optimizer has to decide if the WHERE clause of the query implies
> the WHERE clause of the index.
> Michael Stonebraker [here|https://dsf.berkeley.edu/papers/ERL-M89-17.pdf]
> suggests that an index WHERE clause is a conjunct of simple terms, i.e:
> i-clause-1 and i-clause-2 and ... and i-clause-m where each clause is of the
> form <column> <operator> <constant>. Hence, the qualification can be
> evaluated for each tuple in the indicated relation without consulting
> additional tuples.
> Phoenix partial indexes will initially support a more general set of index
> WHERE clauses that can be evaluated on a single row with the following
> exceptions
> * Subqueries are not allowed.
> * Like expressions are allowed with very limited support such that an index
> WHERE clause with like expressions can imply/contain a query if the query has
> the same like expressions that the index WHERE clause has.
> * Comparison between columns are allowed without supporting transitivity,
> for example, a > b and b > c does not imply a > c.
> Partial indexes will be supported initially for global secondary indexes,
> i.e., covered global indexes and uncovered global indexes. The local
> secondary indexes will be supported in future.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)