[jira] [Commented] (PHOENIX-6501) Use batching when joining data table rows with uncovered global index rows

ASF GitHub Bot (Jira) Tue, 15 Mar 2022 13:44:07 -0700


    [ 
https://issues.apache.org/jira/browse/PHOENIX-6501?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17507225#comment-17507225
 ]


ASF GitHub Bot commented on PHOENIX-6501:
-----------------------------------------

kadirozde commented on a change in pull request #1399:
URL: https://github.com/apache/phoenix/pull/1399#discussion_r827394267



##########
File path: 
phoenix-core/src/it/java/org/apache/phoenix/end2end/index/UncoveredGlobalIndexRegionScannerIT.java
##########
@@ -0,0 +1,393 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.end2end.index;
+
+import static 
org.apache.phoenix.end2end.index.GlobalIndexCheckerIT.assertExplainPlan;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+
+import java.sql.Timestamp;
+import java.util.Calendar;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.SimpleRegionObserver;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.query.KeyRange;
+import org.apache.phoenix.thirdparty.com.google.common.collect.Maps;
+import org.apache.phoenix.end2end.NeedsOwnMiniClusterTest;
+import org.apache.phoenix.query.BaseTest;
+import org.apache.phoenix.query.QueryServices;
+import org.apache.phoenix.util.*;
+import org.junit.After;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category(NeedsOwnMiniClusterTest.class)
+public class UncoveredGlobalIndexRegionScannerIT extends BaseTest {
+    @BeforeClass
+    public static synchronized void doSetup() throws Exception {
+        Map<String, String> props = Maps.newHashMapWithExpectedSize(1);
+        
props.put(QueryServices.GLOBAL_INDEX_ROW_AGE_THRESHOLD_TO_DELETE_MS_ATTRIB, 
Long.toString(0));
+        setUpTestDriver(new ReadOnlyProps(props.entrySet().iterator()));
+    }
+
+    @After
+    public void unsetFailForTesting() throws Exception {
+        boolean refCountLeaked = isAnyStoreRefCountLeaked();
+        assertFalse("refCount leaked", refCountLeaked);
+    }
+
+    private void populateTable(String tableName) throws Exception {
+        Connection conn = DriverManager.getConnection(getUrl());
+        conn.createStatement().execute("create table " + tableName +
+                " (id varchar(10) not null primary key, val1 varchar(10), val2 
varchar(10), val3 varchar(10))");
+        conn.createStatement().execute("upsert into " + tableName + " values 
('a', 'ab', 'abc', 'abcd')");
+        conn.commit();
+        conn.createStatement().execute("upsert into " + tableName + " values 
('b', 'bc', 'bcd', 'bcde')");
+        conn.commit();
+        conn.close();
+    }
+
+    @Test
+    public void testUncoveredIndexWithPhoenixRowTimestamp() throws Exception {
+        try (Connection conn = DriverManager.getConnection(getUrl())) {
+            String dataTableName = generateUniqueName();
+            String indexTableName = generateUniqueName();
+            Timestamp initial = new 
Timestamp(EnvironmentEdgeManager.currentTimeMillis() - 1);
+            conn.createStatement().execute("create table " + dataTableName +
+                    " (id varchar(10) not null primary key, val1 varchar(10), 
val2 varchar(10), val3 varchar(10))");
+            conn.createStatement().execute("upsert into " + dataTableName + " 
values ('a', 'ab', 'abc', 'abcd')");
+            conn.commit();
+            Timestamp before = new 
Timestamp(EnvironmentEdgeManager.currentTimeMillis());
+            // Sleep 1ms to get a different row timestamps
+            Thread.sleep(1);
+            conn.createStatement().execute("upsert into " + dataTableName + " 
values ('b', 'bc', 'bcd', 'bcde')");
+            conn.commit();
+            Timestamp after = new 
Timestamp(EnvironmentEdgeManager.currentTimeMillis() + 1);
+            conn.createStatement().execute("CREATE INDEX " + indexTableName + 
" on " +
+                    dataTableName + " (val1, PHOENIX_ROW_TIMESTAMP()) ");
+
+            String timeZoneID = Calendar.getInstance().getTimeZone().getID();
+            // Write a query to get the val2 = 'bc' with a time range query
+            String query = "SELECT /*+ INDEX(" + dataTableName + " " + 
indexTableName + ")*/ "
+                    + "val1, val2, PHOENIX_ROW_TIMESTAMP() from " + 
dataTableName
+                    + " WHERE val1 = 'bc' AND " + "PHOENIX_ROW_TIMESTAMP() > 
TO_DATE('"
+                    + before.toString() + "','yyyy-MM-dd HH:mm:ss.SSS', '"
+                    + timeZoneID + "') AND " + "PHOENIX_ROW_TIMESTAMP() < 
TO_DATE('" + after
+                    + "','yyyy-MM-dd HH:mm:ss.SSS', '" + timeZoneID + "')";
+            // Verify that we will read from the index table
+            assertExplainPlan(conn, query, dataTableName, indexTableName);
+            ResultSet rs = conn.createStatement().executeQuery(query);
+            assertTrue(rs.next());
+            assertEquals("bc", rs.getString(1));
+            assertEquals("bcd", rs.getString(2));
+            assertTrue(rs.getTimestamp(3).after(before));
+            assertTrue(rs.getTimestamp(3).before(after));
+            assertFalse(rs.next());
+            // Count the number of index rows
+            rs = conn.createStatement().executeQuery("SELECT COUNT(*) from " + 
indexTableName);
+            assertTrue(rs.next());
+            assertEquals(2, rs.getInt(1));
+            // Add one more row with val2 ='bc' and check this does not change 
the result of the previous
+            // query
+            // Sleep 1ms to get a different row timestamps
+            Thread.sleep(1);
+            conn.createStatement().execute("upsert into " + dataTableName + " 
values ('c', 'bc', 'ccc', 'cccc')");
+            conn.commit();
+            assertExplainPlan(conn, query, dataTableName, indexTableName);
+            rs = conn.createStatement().executeQuery(query);
+            assertTrue(rs.next());
+            assertEquals("bc", rs.getString(1));
+            assertEquals("bcd", rs.getString(2));
+            assertTrue(rs.getTimestamp(3).after(before));
+            assertTrue(rs.getTimestamp(3).before(after));
+            assertFalse(rs.next());
+            // Write a time range query to get the last row with val2 ='bc'
+            query = "SELECT /*+ INDEX(" + dataTableName + " " + indexTableName 
+ ")*/ "
+                    +"val1, val2, PHOENIX_ROW_TIMESTAMP() from " + 
dataTableName +
+                    " WHERE val1 = 'bc' AND " + "PHOENIX_ROW_TIMESTAMP() > 
TO_DATE('" + after
+                    + "','yyyy-MM-dd HH:mm:ss.SSS', '" + timeZoneID + "')";
+            // Verify that we will read from the index table
+            assertExplainPlan(conn, query, dataTableName, indexTableName);
+            rs = conn.createStatement().executeQuery(query);
+            assertTrue(rs.next());
+            assertEquals("bc", rs.getString(1));
+            assertEquals("ccc", rs.getString(2));
+            assertTrue(rs.getTimestamp(3).after(after));
+            assertFalse(rs.next());
+            // Verify that we can execute the same query without using the 
index
+            String noIndexQuery = "SELECT /*+ NO_INDEX */ val1, val2, 
PHOENIX_ROW_TIMESTAMP() from " + dataTableName + " WHERE val1 = 'bc' AND " +
+                    "PHOENIX_ROW_TIMESTAMP() > TO_DATE('" + after + 
"','yyyy-MM-dd HH:mm:ss.SSS', '" + timeZoneID + "')";
+            // Verify that we will read from the data table
+            rs = conn.createStatement().executeQuery("EXPLAIN " + 
noIndexQuery);
+            String explainPlan = QueryUtil.getExplainPlan(rs);
+            assertTrue(explainPlan.contains("FULL SCAN OVER " + 
dataTableName));
+            rs = conn.createStatement().executeQuery(noIndexQuery);
+            assertTrue(rs.next());
+            assertEquals("bc", rs.getString(1));
+            assertEquals("ccc", rs.getString(2));
+            assertTrue(rs.getTimestamp(3).after(after));
+            after = rs.getTimestamp(3);
+            assertFalse(rs.next());
+            // Sleep 1ms to get a different row timestamps
+            Thread.sleep(1);
+            conn.createStatement().execute("upsert into " + dataTableName + " 
values ('d', 'de', 'def', 'defg')");
+            conn.commit();
+
+            query = "SELECT /*+ INDEX(" + dataTableName + " " + indexTableName 
+ ")*/ "
+                    + " val1, val2, PHOENIX_ROW_TIMESTAMP()  from " + 
dataTableName
+                    + " WHERE val1 = 'de'";
+            // Verify that we will read from the index table
+            assertExplainPlan(conn, query, dataTableName, indexTableName);
+            rs = conn.createStatement().executeQuery(query);
+            assertTrue(rs.next());
+            assertEquals("de", rs.getString(1));
+            assertEquals("def", rs.getString(2));
+            assertTrue(rs.getTimestamp(3).after(after));
+            assertFalse(rs.next());
+            // Add a new index where the index row key starts with 
PHOENIX_ROW_TIMESTAMP()
+            indexTableName = generateUniqueName();
+            conn.createStatement().execute("CREATE INDEX " + indexTableName + 
" on " +
+                    dataTableName + " (PHOENIX_ROW_TIMESTAMP()) ");
+            // Add one more row
+            // Sleep 1ms to get a different row timestamps
+            Thread.sleep(1);
+            conn.createStatement().execute("upsert into " + dataTableName + " 
values ('e', 'ae', 'efg', 'efgh')");
+            conn.commit();
+            // Write a query to get all the rows in the order of their 
timestamps
+            query = "SELECT /*+ INDEX(" + dataTableName + " " + indexTableName 
+ ")*/ "
+                    + " val1, val2, PHOENIX_ROW_TIMESTAMP() from " + 
dataTableName + " WHERE "
+                    + "PHOENIX_ROW_TIMESTAMP() > TO_DATE('" + initial
+                    + "','yyyy-MM-dd HH:mm:ss.SSS', '" + timeZoneID + "')";
+            // Verify that we will read from the index table
+            assertExplainPlan(conn, query, dataTableName, indexTableName);
+            rs = conn.createStatement().executeQuery(query);
+            assertTrue(rs.next());
+            assertEquals("ab", rs.getString(1));
+            assertEquals("abc", rs.getString(2));
+            assertTrue(rs.next());
+            assertEquals("bc", rs.getString(1));
+            assertEquals("bcd", rs.getString(2));
+            assertTrue(rs.next());
+            assertEquals("bc", rs.getString(1));
+            assertEquals("ccc", rs.getString(2));
+            assertTrue(rs.next());
+            assertEquals("de", rs.getString(1));
+            assertEquals("def", rs.getString(2));
+            assertTrue(rs.next());
+            assertEquals("ae", rs.getString(1));
+            assertEquals("efg", rs.getString(2));
+            assertFalse(rs.next());
+        }
+    }
+
+    private void assertIndexTableNotSelected(Connection conn, String 
dataTableName, String indexTableName, String sql)
+            throws Exception {
+        try {
+            assertExplainPlan(conn, sql, dataTableName, indexTableName);
+            throw new AssertionError("The index table should not be selected 
without an index hint");

Review comment:
       Good catch. I should throw a different exception here.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


> Use batching when joining data table rows with uncovered global index rows
> --------------------------------------------------------------------------
>
>                 Key: PHOENIX-6501
>                 URL: https://issues.apache.org/jira/browse/PHOENIX-6501
>             Project: Phoenix
>          Issue Type: Improvement
>    Affects Versions: 5.1.2
>            Reporter: Kadir Ozdemir
>            Assignee: Lars Hofhansl
>            Priority: Major
>         Attachments: PHOENIX-6501.master.001.patch
>
>
> PHOENIX-6458 extends the existing uncovered local index support for global 
> indexes. The current solution uses HBase get operations to join data table 
> rows with uncovered index rows on the server side. Doing a separate RPC call 
> for every data table row can be expensive. Instead, we can buffer lots of 
> data row keys in memory,  use a skip scan filter and even multiple threads to 
> issue a separate scan for each data table region in parallel. This will 
> reduce the cost of join and also improve the performance.



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

[jira] [Commented] (PHOENIX-6501) Use batching when joining data table rows with uncovered global index rows

Reply via email to