wangweiming800 commented on a change in pull request #741: PHOENIX-5791
Eliminate false invalid row detection due to concurrent …
URL: https://github.com/apache/phoenix/pull/741#discussion_r397521193
##########
File path:
phoenix-core/src/main/java/org/apache/phoenix/coprocessor/IndexRebuildRegionScanner.java
##########
@@ -614,38 +606,132 @@ private boolean isDeleteFamilyVersion(Mutation
mutation) {
return getMutationsWithSameTS(put, del);
}
+ private void repairActualMutationList(List<Mutation> actualMutationList,
List<Mutation> expectedMutationList)
+ throws IOException {
+ // find the first (latest) actual unverified put mutation
+ Mutation actual = null;
+ for (Mutation mutation : actualMutationList) {
+ if (mutation instanceof Put && !isVerified((Put) mutation)) {
+ actual = mutation;
+ break;
+ }
+ }
+ if (actual == null) {
+ return;
+ }
+ long ts = getTimestamp(actual);
+ int expectedIndex;
+ int expectedListSize = expectedMutationList.size();
+ for (expectedIndex = 0; expectedIndex < expectedListSize;
expectedIndex++) {
+ if (getTimestamp(expectedMutationList.get(expectedIndex)) <= ts) {
+ if (expectedIndex > 0) {
+ expectedIndex--;
+ }
+ break;
+ }
+ }
+ if (expectedIndex == expectedListSize) {
+ return;
+ }
+ for (; expectedIndex < expectedListSize; expectedIndex++) {
+ Mutation mutation = expectedMutationList.get(expectedIndex);
+ if (mutation instanceof Put) {
+ mutation = new Put((Put) mutation);
+ } else {
+ mutation = new Delete((Delete) mutation);
+ }
+ actualMutationList.add(mutation);
+ }
+ Collections.sort(actualMutationList, MUTATION_TS_DESC_COMPARATOR);
+ }
+
+ private void cleanUpActualMutationList(List<Mutation> actualMutationList)
+ throws IOException {
+ Iterator<Mutation> iterator = actualMutationList.iterator();
+ Mutation previous = null;
+ while (iterator.hasNext()) {
+ Mutation mutation = iterator.next();
+ if ((mutation instanceof Put && !isVerified((Put) mutation)) ||
+ (mutation instanceof Delete &&
isDeleteFamilyVersion(mutation))) {
+ iterator.remove();
+ } else {
+ if (previous != null && getTimestamp(previous) ==
getTimestamp(mutation) &&
+ ((previous instanceof Put && mutation instanceof Put)
||
+ previous instanceof Delete && mutation
instanceof Delete)) {
+ iterator.remove();
+ } else {
+ previous = mutation;
+ }
+ }
+ }
+ }
+
/**
- * indexRow is the set of all cells of all the row version of an index row
from the index table. These are actual
- * cells. We group these cells based on timestamp and type (put vs
delete), and form the actual set of
- * index mutations. indexKeyToMutationMap is a map from an index row key
to a set of mutations that are generated
- * using the rebuild process (i.e., by replaying raw data table
mutations). These sets are sets of expected
- * index mutations, one set for each index row key. Since not all
mutations in the index table have both phase
- * (i.e., pre and post data phase) mutations, we cannot compare actual
index mutations with expected one by one
- * and expect to find them identical. We need to consider concurrent data
mutation effects, data table row write
- * failures, post index write failures. Thus, we need to allow some
expected and actual mutations to be skipped
- * during comparing actual mutations to index mutations.
+ * There are two types of verification: without repair and with repair.
Without-repair verification is done before
+ * or after index rebuild. It is done before index rebuild to identify the
rows to be rebuilt. It is done after
+ * index rebuild to verify the rows that have been rebuilt. With-repair
verification can be done anytime using
+ * the “-v ONLY” option to check the consistency of the index table.
+ *
+ * Unverified Rows
+ *
+ * For each mutable data table mutation during regular data table updates,
two operations are done on the data table.
+ * One is to read the existing row state, and the second is to update the
data table for this row. The processing of
+ * concurrent data mutations are serialized once for reading the existing
row states, and then serialized again
+ * for updating the data table. In other words, they go through locking
twice, i.e., [lock, read, unlock] and
+ * [lock, write, unlock]. Because of this two phase locking, for a pair of
concurrent mutations (for the same row),
+ * the same row state can be read from the data table. This means the same
existing index row can be made unverified
+ * twice with different timestamps, one for each concurrent mutation.
These unverified mutations can be repaired
+ * from the data table later during HBase scans using the index read
repair process. This is one of the reasons
+ * for having extra unverified rows in the index table. The other reason
is the data table write failures.
+ * When a data table write fails, it leaves an unverified index row
behind. These rows are never returned to clients,
+ * instead they are repaired, which means either they are rebuilt from
their data table rows or they are deleted if
+ * their data table rows do not exist.
+ *
+ * Delete Family Version Markers
+ *
+ * The family version delete markers are generated by the read repair to
remove extra unverified rows. They only
Review comment:
delete marks is not only generated by the read repair. If there is a column
delete on the indexed column in data table, there will be family version delete
marker as well. Will it be a problem?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services