This is an automated email from the ASF dual-hosted git repository. kadir pushed a commit to branch 4.14-HBase-1.3 in repository https://gitbox.apache.org/repos/asf/phoenix.git
The following commit(s) were added to refs/heads/4.14-HBase-1.3 by this push: new e104e90 PHOENIX-5708 GlobalIndexChecker returns unverified index row cells e104e90 is described below commit e104e905d42db2d6046996fc16d0f2e6382fbf63 Author: Kadir <kozde...@salesforce.com> AuthorDate: Sat Feb 1 22:02:01 2020 -0800 PHOENIX-5708 GlobalIndexChecker returns unverified index row cells --- .../org/apache/phoenix/end2end/IndexToolIT.java | 14 ++++++- .../end2end/index/GlobalIndexCheckerIT.java | 38 +++++++++++++++++++ .../coprocessor/IndexRebuildRegionScanner.java | 8 +++- .../apache/phoenix/index/GlobalIndexChecker.java | 43 +++++++++++++++++++++- 4 files changed, 97 insertions(+), 6 deletions(-) diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexToolIT.java index c129100..9dde323 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexToolIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexToolIT.java @@ -190,8 +190,8 @@ public class IndexToolIT extends ParallelStatsEnabledIT { @Test public void testWithSetNull() throws Exception { - // This test is for building non-transactional global indexes with direct api - if (localIndex || transactional || !directApi || useSnapshot || useTenantId) { + // This test is for building non-transactional mutable global indexes with direct api + if (localIndex || transactional || !directApi || useSnapshot || useTenantId || !mutable) { return; } // This tests the cases where a column having a null value is overwritten with a not null value and vice versa; @@ -236,6 +236,16 @@ public class IndexToolIT extends ParallelStatsEnabledIT { TestUtil.doMajorCompaction(conn, dataTableFullName); actualRowCount = IndexScrutiny.scrutinizeIndex(conn, dataTableFullName, indexTableFullName); assertEquals(NROWS, actualRowCount); + indexTool = runIndexTool(directApi, useSnapshot, schemaName, dataTableName, indexTableName, null, + 0, IndexTool.IndexVerifyType.ONLY, new String[0]); + assertEquals(NROWS, indexTool.getJob().getCounters().findCounter(INPUT_RECORDS).getValue()); + assertEquals(NROWS, indexTool.getJob().getCounters().findCounter(SCANNED_DATA_ROW_COUNT).getValue()); + assertEquals(0, indexTool.getJob().getCounters().findCounter(REBUILT_INDEX_ROW_COUNT).getValue()); + assertEquals(NROWS, indexTool.getJob().getCounters().findCounter(BEFORE_REBUILD_VALID_INDEX_ROW_COUNT).getValue()); + assertEquals(0, indexTool.getJob().getCounters().findCounter(BEFORE_REBUILD_EXPIRED_INDEX_ROW_COUNT).getValue()); + assertEquals(0, indexTool.getJob().getCounters().findCounter(BEFORE_REBUILD_INVALID_INDEX_ROW_COUNT).getValue()); + assertEquals(0, indexTool.getJob().getCounters().findCounter(BEFORE_REBUILD_MISSING_INDEX_ROW_COUNT).getValue()); + dropIndexToolTables(conn); } } diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/GlobalIndexCheckerIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/GlobalIndexCheckerIT.java index d0cdb3d..30f9b34 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/GlobalIndexCheckerIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/GlobalIndexCheckerIT.java @@ -269,6 +269,44 @@ public class GlobalIndexCheckerIT extends BaseUniqueNamesOwnClusterIT { } @Test + public void testUnverifiedValuesAreNotVisible() throws Exception { + if (async) { + // No need to run the same test twice one for async = true and the other for async = false + return; + } + try (Connection conn = DriverManager.getConnection(getUrl())) { + String dataTableName = generateUniqueName(); + conn.createStatement().execute("create table " + dataTableName + + " (id varchar(10) not null primary key, val1 varchar(10), val2 varchar(10), val3 varchar(10))" + tableDDLOptions); + String indexTableName = generateUniqueName(); + conn.createStatement().execute("CREATE INDEX " + indexTableName + " on " + + dataTableName + " (val1) include (val2, val3)"); + + // Configure IndexRegionObserver to fail the data write phase + IndexRegionObserver.setFailDataTableUpdatesForTesting(true); + conn.createStatement().execute("upsert into " + dataTableName + " values ('a', 'ab','abc', 'abcd')"); + commitWithException(conn); + // The above upsert will create an unverified index row + // Configure IndexRegionObserver to allow the data write phase + IndexRegionObserver.setFailDataTableUpdatesForTesting(false); + // Insert the same row with missing value for val3 + conn.createStatement().execute("upsert into " + dataTableName + " (id, val1, val2) values ('a', 'ab','abc')"); + conn.commit(); + // At this moment val3 in the data table row has null value + String selectSql = "SELECT val3 from " + dataTableName + " WHERE val1 = 'ab'"; + // Verify that we will read from the index table + assertExplainPlan(conn, selectSql, dataTableName, indexTableName); + ResultSet rs = conn.createStatement().executeQuery(selectSql); + // Verify that we do not read from the unverified row + assertTrue(rs.next()); + assertEquals(null, rs.getString(1)); + assertFalse(rs.next()); + // Add rows and check everything is still okay + verifyTableHealth(conn, dataTableName, indexTableName); + } + } + + @Test public void testOnePhaseOverwiteFollowingTwoPhaseWrite() throws Exception { try (Connection conn = DriverManager.getConnection(getUrl())) { String dataTableName = generateUniqueName(); diff --git a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/IndexRebuildRegionScanner.java b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/IndexRebuildRegionScanner.java index e726aa7..e0c61d8 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/IndexRebuildRegionScanner.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/IndexRebuildRegionScanner.java @@ -718,6 +718,10 @@ public class IndexRebuildRegionScanner extends BaseRegionScanner { logToIndexToolOutputTable(dataRow.getRow(), indexRow.getRow(), ts, getMaxTimestamp(indexRow), errorMsg); return false; } + if (actualCell.getTimestamp() < ts) { + // Skip older cells since a Phoenix index row is composed of cells with the same timestamp + continue; + } // Check all columns if (!CellUtil.matchingValue(actualCell, expectedCell)) { String errorMsg = "Not matching value for " + Bytes.toString(family) + ":" + @@ -725,9 +729,9 @@ public class IndexRebuildRegionScanner extends BaseRegionScanner { logToIndexToolOutputTable(dataRow.getRow(), indexRow.getRow(), ts, getMaxTimestamp(indexRow), errorMsg, CellUtil.cloneValue(expectedCell), CellUtil.cloneValue(actualCell)); return false; - } else if (!CellUtil.matchingTimestamp(actualCell, expectedCell)) { + } else if (actualCell.getTimestamp() != ts) { String errorMsg = "Not matching timestamp for " + Bytes.toString(family) + ":" + - Bytes.toString(qualifier) + " E: " + expectedCell.getTimestamp() + " A: " + + Bytes.toString(qualifier) + " E: " + ts + " A: " + actualCell.getTimestamp(); logToIndexToolOutputTable(dataRow.getRow(), indexRow.getRow(), ts, getMaxTimestamp(indexRow), errorMsg, null, null); diff --git a/phoenix-core/src/main/java/org/apache/phoenix/index/GlobalIndexChecker.java b/phoenix-core/src/main/java/org/apache/phoenix/index/GlobalIndexChecker.java index 5d6c83f..f3b1369 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/index/GlobalIndexChecker.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/index/GlobalIndexChecker.java @@ -441,7 +441,47 @@ public class GlobalIndexChecker extends BaseRegionObserver { return true; } + /** + * An index row is composed of cells with the same timestamp. However, if there are multiple versions of an + * index row, HBase can return an index row with cells from multiple versions, and thus it can return cells + * with different timestamps. This happens if the version of the row we are reading does not have a value + * (i.e., effectively has null value) for a column whereas an older version has a value for the column. + * In this case, we need to remove the older cells for correctness. + */ + private void removeOlderCells(List<Cell> cellList) { + Iterator<Cell> cellIterator = cellList.iterator(); + if (!cellIterator.hasNext()) { + return; + } + Cell cell = cellIterator.next(); + long maxTs = cell.getTimestamp(); + long ts; + boolean allTheSame = true; + while (cellIterator.hasNext()) { + cell = cellIterator.next(); + ts = cell.getTimestamp(); + if (ts != maxTs) { + if (ts > maxTs) { + maxTs = ts; + } + allTheSame = false; + } + } + if (allTheSame) { + return; + } + cellIterator = cellList.iterator(); + while (cellIterator.hasNext()) { + cell = cellIterator.next(); + if (cell.getTimestamp() != maxTs) { + cellIterator.remove(); + } + } + } + + private boolean verifyRowAndRemoveEmptyColumn(List<Cell> cellList) throws IOException { + removeOlderCells(cellList); long cellListSize = cellList.size(); Cell cell = null; if (cellListSize == 0) { @@ -488,7 +528,6 @@ public class GlobalIndexChecker extends BaseRegionObserver { */ private boolean verifyRowAndRepairIfNecessary(List<Cell> cellList) throws IOException { metricsSource.incrementIndexInspections(); - Cell cell = cellList.get(0); if (verifyRowAndRemoveEmptyColumn(cellList)) { return true; @@ -496,7 +535,7 @@ public class GlobalIndexChecker extends BaseRegionObserver { long repairStart = EnvironmentEdgeManager.currentTimeMillis(); byte[] rowKey = CellUtil.cloneRow(cell); - long ts = getMaxTimestamp(cellList); + long ts = cellList.get(0).getTimestamp(); cellList.clear(); try {