gjacoby126 commented on a change in pull request #741: PHOENIX-5791 Eliminate false invalid row detection due to concurrent … URL: https://github.com/apache/phoenix/pull/741#discussion_r398801976
########## File path: phoenix-core/src/main/java/org/apache/phoenix/coprocessor/IndexRebuildRegionScanner.java ########## @@ -613,39 +605,137 @@ private boolean isDeleteFamilyVersion(Mutation mutation) { } return getMutationsWithSameTS(put, del); } + /** + * In this method, the actual list is repaired in memory using the expected list which is actually the output of + * rebuilding the index table row. The result of this repair is used only for verification. + */ + private void repairActualMutationList(List<Mutation> actualMutationList, List<Mutation> expectedMutationList) + throws IOException { + // Find the first (latest) actual unverified put mutation + List<Mutation> repairedMutationList = new ArrayList<>(expectedMutationList.size()); + for (Mutation actual : actualMutationList) { + if (actual instanceof Put && !isVerified((Put) actual)) { + long ts = getTimestamp(actual); + int expectedIndex; + int expectedListSize = expectedMutationList.size(); + for (expectedIndex = 0; expectedIndex < expectedListSize; expectedIndex++) { + if (getTimestamp(expectedMutationList.get(expectedIndex)) <= ts) { + if (expectedIndex > 0) { + expectedIndex--; + } + break; + } + } + if (expectedIndex == expectedListSize) { + continue; + } + for (; expectedIndex < expectedListSize; expectedIndex++) { + Mutation mutation = expectedMutationList.get(expectedIndex); + if (mutation instanceof Put) { + mutation = new Put((Put) mutation); + } else { + mutation = new Delete((Delete) mutation); + } + repairedMutationList.add(mutation); + } + // Since we repair the entire history, there is no need to more than once + break; + } + } + if (repairedMutationList.isEmpty()) { + return; + } + actualMutationList.addAll(repairedMutationList); + Collections.sort(actualMutationList, MUTATION_TS_DESC_COMPARATOR); + } + + private void cleanUpActualMutationList(List<Mutation> actualMutationList) + throws IOException { + Iterator<Mutation> iterator = actualMutationList.iterator(); + Mutation previous = null; + while (iterator.hasNext()) { + Mutation mutation = iterator.next(); + if ((mutation instanceof Put && !isVerified((Put) mutation)) || + (mutation instanceof Delete && isDeleteFamilyVersion(mutation))) { + iterator.remove(); + } else { + if (previous != null && getTimestamp(previous) == getTimestamp(mutation) && + ((previous instanceof Put && mutation instanceof Put) || + previous instanceof Delete && mutation instanceof Delete)) { + iterator.remove(); + } else { + previous = mutation; + } + } + } + } /** - * indexRow is the set of all cells of all the row version of an index row from the index table. These are actual - * cells. We group these cells based on timestamp and type (put vs delete), and form the actual set of - * index mutations. indexKeyToMutationMap is a map from an index row key to a set of mutations that are generated - * using the rebuild process (i.e., by replaying raw data table mutations). These sets are sets of expected - * index mutations, one set for each index row key. Since not all mutations in the index table have both phase - * (i.e., pre and post data phase) mutations, we cannot compare actual index mutations with expected one by one - * and expect to find them identical. We need to consider concurrent data mutation effects, data table row write - * failures, post index write failures. Thus, we need to allow some expected and actual mutations to be skipped - * during comparing actual mutations to index mutations. + * There are two types of verification: without repair and with repair. Without-repair verification is done before + * or after index rebuild. It is done before index rebuild to identify the rows to be rebuilt. It is done after + * index rebuild to verify the rows that have been rebuilt. With-repair verification can be done anytime using + * the “-v ONLY” option to check the consistency of the index table. Review comment: "Note that with-repair verification simulates read repair in-memory for the purpose of verification, but does not actually repair the data in the index." ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services