This is an automated email from the ASF dual-hosted git repository.

kadir pushed a commit to branch 4.x-HBase-1.5
in repository https://gitbox.apache.org/repos/asf/phoenix.git


The following commit(s) were added to refs/heads/4.x-HBase-1.5 by this push:
     new 598b61c  PHOENIX-5708 GlobalIndexChecker returns unverified index row 
cells
598b61c is described below

commit 598b61c864354307cd5bd7b3a31c35f971e298fe
Author: Kadir <kozde...@salesforce.com>
AuthorDate: Sat Feb 1 22:02:01 2020 -0800

    PHOENIX-5708 GlobalIndexChecker returns unverified index row cells
---
 .../org/apache/phoenix/end2end/IndexToolIT.java    | 14 ++++++-
 .../end2end/index/GlobalIndexCheckerIT.java        | 38 +++++++++++++++++++
 .../coprocessor/IndexRebuildRegionScanner.java     |  8 +++-
 .../apache/phoenix/index/GlobalIndexChecker.java   | 43 +++++++++++++++++++++-
 4 files changed, 97 insertions(+), 6 deletions(-)

diff --git 
a/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexToolIT.java 
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexToolIT.java
index f8ae8b5..d3ca67d 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexToolIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexToolIT.java
@@ -198,8 +198,8 @@ public class IndexToolIT extends 
BaseUniqueNamesOwnClusterIT {
 
     @Test
     public void testWithSetNull() throws Exception {
-        // This test is for building non-transactional global indexes with 
direct api
-        if (localIndex || transactional || !directApi || useSnapshot || 
useTenantId) {
+        // This test is for building non-transactional mutable global indexes 
with direct api
+        if (localIndex || transactional || !directApi || useSnapshot || 
useTenantId || !mutable) {
             return;
         }
         // This tests the cases where a column having a null value is 
overwritten with a not null value and vice versa;
@@ -244,6 +244,16 @@ public class IndexToolIT extends 
BaseUniqueNamesOwnClusterIT {
             TestUtil.doMajorCompaction(conn, dataTableFullName);
             actualRowCount = IndexScrutiny.scrutinizeIndex(conn, 
dataTableFullName, indexTableFullName);
             assertEquals(NROWS, actualRowCount);
+            indexTool = runIndexTool(directApi, useSnapshot, schemaName, 
dataTableName, indexTableName, null,
+                    0, IndexTool.IndexVerifyType.ONLY, new String[0]);
+            assertEquals(NROWS, 
indexTool.getJob().getCounters().findCounter(INPUT_RECORDS).getValue());
+            assertEquals(NROWS, 
indexTool.getJob().getCounters().findCounter(SCANNED_DATA_ROW_COUNT).getValue());
+            assertEquals(0, 
indexTool.getJob().getCounters().findCounter(REBUILT_INDEX_ROW_COUNT).getValue());
+            assertEquals(NROWS, 
indexTool.getJob().getCounters().findCounter(BEFORE_REBUILD_VALID_INDEX_ROW_COUNT).getValue());
+            assertEquals(0, 
indexTool.getJob().getCounters().findCounter(BEFORE_REBUILD_EXPIRED_INDEX_ROW_COUNT).getValue());
+            assertEquals(0, 
indexTool.getJob().getCounters().findCounter(BEFORE_REBUILD_INVALID_INDEX_ROW_COUNT).getValue());
+            assertEquals(0, 
indexTool.getJob().getCounters().findCounter(BEFORE_REBUILD_MISSING_INDEX_ROW_COUNT).getValue());
+            dropIndexToolTables(conn);
         }
     }
 
diff --git 
a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/GlobalIndexCheckerIT.java
 
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/GlobalIndexCheckerIT.java
index a825826..55c7d31 100644
--- 
a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/GlobalIndexCheckerIT.java
+++ 
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/GlobalIndexCheckerIT.java
@@ -269,6 +269,44 @@ public class GlobalIndexCheckerIT extends 
BaseUniqueNamesOwnClusterIT {
     }
 
     @Test
+    public void testUnverifiedValuesAreNotVisible() throws Exception {
+        if (async) {
+            // No need to run the same test twice one for async = true and the 
other for async = false
+            return;
+        }
+        try (Connection conn = DriverManager.getConnection(getUrl())) {
+            String dataTableName = generateUniqueName();
+            conn.createStatement().execute("create table " + dataTableName +
+                    " (id varchar(10) not null primary key, val1 varchar(10), 
val2 varchar(10), val3 varchar(10))" + tableDDLOptions);
+            String indexTableName = generateUniqueName();
+            conn.createStatement().execute("CREATE INDEX " + indexTableName + 
" on " +
+                    dataTableName + " (val1) include (val2, val3)");
+
+            // Configure IndexRegionObserver to fail the data write phase
+            IndexRegionObserver.setFailDataTableUpdatesForTesting(true);
+            conn.createStatement().execute("upsert into " + dataTableName + " 
values ('a', 'ab','abc', 'abcd')");
+            commitWithException(conn);
+            // The above upsert will create an unverified index row
+            // Configure IndexRegionObserver to allow the data write phase
+            IndexRegionObserver.setFailDataTableUpdatesForTesting(false);
+            // Insert the same row with missing value for val3
+            conn.createStatement().execute("upsert into " + dataTableName + " 
(id, val1, val2) values ('a', 'ab','abc')");
+            conn.commit();
+            // At this moment val3 in the data table row has null value
+            String selectSql = "SELECT val3 from " + dataTableName + " WHERE 
val1  = 'ab'";
+            // Verify that we will read from the index table
+            assertExplainPlan(conn, selectSql, dataTableName, indexTableName);
+            ResultSet rs = conn.createStatement().executeQuery(selectSql);
+            // Verify that we do not read from the unverified row
+            assertTrue(rs.next());
+            assertEquals(null, rs.getString(1));
+            assertFalse(rs.next());
+            // Add rows and check everything is still okay
+            verifyTableHealth(conn, dataTableName, indexTableName);
+        }
+    }
+
+    @Test
     public void testOnePhaseOverwiteFollowingTwoPhaseWrite() throws Exception {
         try (Connection conn = DriverManager.getConnection(getUrl())) {
             String dataTableName = generateUniqueName();
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/IndexRebuildRegionScanner.java
 
b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/IndexRebuildRegionScanner.java
index e726aa7..e0c61d8 100644
--- 
a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/IndexRebuildRegionScanner.java
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/IndexRebuildRegionScanner.java
@@ -718,6 +718,10 @@ public class IndexRebuildRegionScanner extends 
BaseRegionScanner {
                     logToIndexToolOutputTable(dataRow.getRow(), 
indexRow.getRow(), ts, getMaxTimestamp(indexRow), errorMsg);
                     return false;
                 }
+                if (actualCell.getTimestamp() < ts) {
+                    // Skip older cells since a Phoenix index row is composed 
of cells with the same timestamp
+                    continue;
+                }
                 // Check all columns
                 if (!CellUtil.matchingValue(actualCell, expectedCell)) {
                     String errorMsg = "Not matching value for " + 
Bytes.toString(family) + ":" +
@@ -725,9 +729,9 @@ public class IndexRebuildRegionScanner extends 
BaseRegionScanner {
                     logToIndexToolOutputTable(dataRow.getRow(), 
indexRow.getRow(), ts, getMaxTimestamp(indexRow),
                             errorMsg, CellUtil.cloneValue(expectedCell), 
CellUtil.cloneValue(actualCell));
                     return false;
-                } else if (!CellUtil.matchingTimestamp(actualCell, 
expectedCell)) {
+                } else if (actualCell.getTimestamp() != ts) {
                     String errorMsg = "Not matching timestamp for " + 
Bytes.toString(family) + ":" +
-                            Bytes.toString(qualifier) + " E: " + 
expectedCell.getTimestamp() + " A: " +
+                            Bytes.toString(qualifier) + " E: " + ts + " A: " +
                             actualCell.getTimestamp();
                     logToIndexToolOutputTable(dataRow.getRow(), 
indexRow.getRow(), ts, getMaxTimestamp(indexRow),
                             errorMsg, null, null);
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/index/GlobalIndexChecker.java 
b/phoenix-core/src/main/java/org/apache/phoenix/index/GlobalIndexChecker.java
index 0e502ad..b9db476 100644
--- 
a/phoenix-core/src/main/java/org/apache/phoenix/index/GlobalIndexChecker.java
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/index/GlobalIndexChecker.java
@@ -438,7 +438,47 @@ public class GlobalIndexChecker extends BaseRegionObserver 
{
             return true;
         }
 
+        /**
+         *  An index row is composed of cells with the same timestamp. 
However, if there are multiple versions of an
+         *  index row, HBase can return an index row with cells from multiple 
versions, and thus it can return cells
+         *  with different timestamps. This happens if the version of the row 
we are reading does not have a value
+         *  (i.e., effectively has null value) for a column whereas an older 
version has a value for the column.
+         *  In this case, we need to remove the older cells for correctness.
+         */
+        private void removeOlderCells(List<Cell> cellList) {
+            Iterator<Cell> cellIterator = cellList.iterator();
+            if (!cellIterator.hasNext()) {
+                return;
+            }
+            Cell cell = cellIterator.next();
+            long maxTs = cell.getTimestamp();
+            long ts;
+            boolean allTheSame = true;
+            while (cellIterator.hasNext()) {
+                cell = cellIterator.next();
+                ts = cell.getTimestamp();
+                if (ts != maxTs) {
+                    if (ts > maxTs) {
+                        maxTs = ts;
+                    }
+                    allTheSame = false;
+                }
+            }
+            if (allTheSame) {
+                return;
+            }
+            cellIterator = cellList.iterator();
+            while (cellIterator.hasNext()) {
+                cell = cellIterator.next();
+                if (cell.getTimestamp() != maxTs) {
+                    cellIterator.remove();
+                }
+            }
+        }
+
+
         private boolean verifyRowAndRemoveEmptyColumn(List<Cell> cellList) 
throws IOException {
+            removeOlderCells(cellList);
             long cellListSize = cellList.size();
             Cell cell = null;
             if (cellListSize == 0) {
@@ -485,7 +525,6 @@ public class GlobalIndexChecker extends BaseRegionObserver {
          */
         private boolean verifyRowAndRepairIfNecessary(List<Cell> cellList) 
throws IOException {
             metricsSource.incrementIndexInspections();
-
             Cell cell = cellList.get(0);
             if (verifyRowAndRemoveEmptyColumn(cellList)) {
                 return true;
@@ -493,7 +532,7 @@ public class GlobalIndexChecker extends BaseRegionObserver {
                 long repairStart = EnvironmentEdgeManager.currentTimeMillis();
 
                 byte[] rowKey = CellUtil.cloneRow(cell);
-                long ts = getMaxTimestamp(cellList);
+                long ts = cellList.get(0).getTimestamp();
                 cellList.clear();
 
                 try {

Reply via email to