kadirozde commented on a change in pull request #741: PHOENIX-5791 Eliminate
false invalid row detection due to concurrent …
URL: https://github.com/apache/phoenix/pull/741#discussion_r399513548
##########
File path:
phoenix-core/src/main/java/org/apache/phoenix/coprocessor/IndexRebuildRegionScanner.java
##########
@@ -613,39 +605,137 @@ private boolean isDeleteFamilyVersion(Mutation
mutation) {
}
return getMutationsWithSameTS(put, del);
}
+ /**
+ * In this method, the actual list is repaired in memory using the
expected list which is actually the output of
+ * rebuilding the index table row. The result of this repair is used only
for verification.
+ */
+ private void repairActualMutationList(List<Mutation> actualMutationList,
List<Mutation> expectedMutationList)
+ throws IOException {
+ // Find the first (latest) actual unverified put mutation
+ List<Mutation> repairedMutationList = new
ArrayList<>(expectedMutationList.size());
+ for (Mutation actual : actualMutationList) {
+ if (actual instanceof Put && !isVerified((Put) actual)) {
+ long ts = getTimestamp(actual);
+ int expectedIndex;
+ int expectedListSize = expectedMutationList.size();
+ for (expectedIndex = 0; expectedIndex < expectedListSize;
expectedIndex++) {
+ if (getTimestamp(expectedMutationList.get(expectedIndex))
<= ts) {
+ if (expectedIndex > 0) {
+ expectedIndex--;
+ }
+ break;
+ }
+ }
+ if (expectedIndex == expectedListSize) {
+ continue;
+ }
+ for (; expectedIndex < expectedListSize; expectedIndex++) {
+ Mutation mutation =
expectedMutationList.get(expectedIndex);
+ if (mutation instanceof Put) {
+ mutation = new Put((Put) mutation);
+ } else {
+ mutation = new Delete((Delete) mutation);
+ }
+ repairedMutationList.add(mutation);
+ }
+ // Since we repair the entire history, there is no need to
more than once
+ break;
+ }
+ }
+ if (repairedMutationList.isEmpty()) {
+ return;
+ }
+ actualMutationList.addAll(repairedMutationList);
+ Collections.sort(actualMutationList, MUTATION_TS_DESC_COMPARATOR);
+ }
+
+ private void cleanUpActualMutationList(List<Mutation> actualMutationList)
+ throws IOException {
+ Iterator<Mutation> iterator = actualMutationList.iterator();
+ Mutation previous = null;
+ while (iterator.hasNext()) {
+ Mutation mutation = iterator.next();
+ if ((mutation instanceof Put && !isVerified((Put) mutation)) ||
+ (mutation instanceof Delete &&
isDeleteFamilyVersion(mutation))) {
+ iterator.remove();
+ } else {
+ if (previous != null && getTimestamp(previous) ==
getTimestamp(mutation) &&
+ ((previous instanceof Put && mutation instanceof Put)
||
+ previous instanceof Delete && mutation
instanceof Delete)) {
+ iterator.remove();
+ } else {
+ previous = mutation;
+ }
+ }
+ }
+ }
/**
- * indexRow is the set of all cells of all the row version of an index row
from the index table. These are actual
- * cells. We group these cells based on timestamp and type (put vs
delete), and form the actual set of
- * index mutations. indexKeyToMutationMap is a map from an index row key
to a set of mutations that are generated
- * using the rebuild process (i.e., by replaying raw data table
mutations). These sets are sets of expected
- * index mutations, one set for each index row key. Since not all
mutations in the index table have both phase
- * (i.e., pre and post data phase) mutations, we cannot compare actual
index mutations with expected one by one
- * and expect to find them identical. We need to consider concurrent data
mutation effects, data table row write
- * failures, post index write failures. Thus, we need to allow some
expected and actual mutations to be skipped
- * during comparing actual mutations to index mutations.
+ * There are two types of verification: without repair and with repair.
Without-repair verification is done before
+ * or after index rebuild. It is done before index rebuild to identify the
rows to be rebuilt. It is done after
+ * index rebuild to verify the rows that have been rebuilt. With-repair
verification can be done anytime using
+ * the “-v ONLY” option to check the consistency of the index table.
Review comment:
I will add the suggest comment
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services