kadirozde commented on a change in pull request #624: PHOENIX-5564 Restructure 
read repair to improve readability and corre…
URL: https://github.com/apache/phoenix/pull/624#discussion_r345968934
 
 

 ##########
 File path: 
phoenix-core/src/main/java/org/apache/phoenix/index/GlobalIndexChecker.java
 ##########
 @@ -275,45 +296,72 @@ private void repairIndexRows(byte[] indexRowKey, long 
ts, List<Cell> row) throws
             }
             // A single cell will be returned. We decode that here
             byte[] value = result.value();
-            long rowCount = PLong.INSTANCE.getCodec().decodeLong(new 
ImmutableBytesWritable(value), SortOrder.getDefault());
-            if (rowCount == 0) {
-                // This means there does not exist a data table row for this 
unverified index row
+            long code = PLong.INSTANCE.getCodec().decodeLong(new 
ImmutableBytesWritable(value), SortOrder.getDefault());
+            if (code == NO_DATA_ROW) {
+                // This means there does not exist a data table row for the 
data row key derived from
+                // this unverified index row. So, no index row has been built
                 // Delete the unverified row from index if it is old enough
                 deleteRowIfAgedEnough(indexRowKey, row, ts, false);
                 // Skip this unverified row (i.e., do not return it to the 
client). Just retuning empty row is
                 // sufficient to do that
                 row.clear();
                 return;
             }
-            // Close the current scanner as the newly build row will not be 
visible to it
+            // An index row has been built. Close the current scanner as the 
newly built row will not be visible to it
             scanner.close();
+            if (code == NO_INDEX_ROW) {
+                // This means there exists a data table row for the data row 
key derived from this unverified index row
+                // but the data table row does not point back to the index row.
+                // Delete the unverified row from index if it is old enough
+                deleteRowIfAgedEnough(indexRowKey, row, ts, false);
+                // Open a new scanner starting from the row after the current 
row
+                indexScan.withStartRow(indexRowKey, false);
+                scanner = region.getScanner(indexScan);
+                // Skip this unverified row (i.e., do not return it to the 
client). Just retuning empty row is
+                // sufficient to do that
+                row.clear();
+                return;
+            }
+            // code == INDEX_ROW_EXISTS
             // Open a new scanner starting from the current row
             indexScan.withStartRow(indexRowKey, true);
             scanner = region.getScanner(indexScan);
-            // Scan the newly build index row
             scanner.next(row);
             if (row.isEmpty()) {
+                // This means the index row has been deleted before opening 
the new scanner.
                 return;
             }
-            boolean indexRowExists = false;
             // Check if the index row still exist after rebuild
-            while (Bytes.compareTo(row.get(0).getRowArray(), 
row.get(0).getRowOffset(), row.get(0).getRowLength(),
-                    indexRowKey, 0, indexRowKey.length) == 0) {
-                indexRowExists = true;
+            if  (Bytes.compareTo(row.get(0).getRowArray(), 
row.get(0).getRowOffset(), row.get(0).getRowLength(),
+                    indexRowKey, 0, indexRowKey.length) != 0) {
+                // This means the index row has been deleted before opening 
the new scanner. We got a different row
+                // If this row is "verified" (or empty) then we are good to go.
                 if (verifyRowAndRemoveEmptyColumn(row)) {
-                    // The index row status is "verified". This row is good to 
return to the client. We are done here.
                     return;
                 }
-                // The index row is still "unverified" after rebuild. This 
means either that the data table row timestamp is
-                // lower than than the timestamp of the unverified index row 
(ts) and the index row that is built from
-                // the data table row is masked by this unverified row, or 
that the corresponding data table row does
-                // exist
+                // The row is "unverified". Rewind the scanner and let the row 
be scanned again
+                // so that it can be repaired
+                scanner.close();
+                scanner = region.getScanner(indexScan);
+                row.clear();
 
 Review comment:
   Great catch!

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to