kadirozde commented on a change in pull request #758: PHOENIX-5804: Implement 
strong verification with -v ONLY option for o…
URL: https://github.com/apache/phoenix/pull/758#discussion_r409752718
 
 

 ##########
 File path: 
phoenix-core/src/main/java/org/apache/phoenix/coprocessor/IndexerRegionScanner.java
 ##########
 @@ -0,0 +1,484 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.coprocessor;
+
+import static 
org.apache.phoenix.hbase.index.write.AbstractParallelWriterIndexCommitter.INDEX_WRITER_KEEP_ALIVE_TIME_CONF_KEY;
+import static org.apache.phoenix.query.QueryConstants.AGG_TIMESTAMP;
+import static org.apache.phoenix.query.QueryConstants.EMPTY_COLUMN_VALUE_BYTES;
+import static org.apache.phoenix.query.QueryConstants.SINGLE_COLUMN;
+import static org.apache.phoenix.query.QueryConstants.SINGLE_COLUMN_FAMILY;
+import static org.apache.phoenix.query.QueryConstants.UNGROUPED_AGG_ROW_KEY;
+import static 
org.apache.phoenix.query.QueryServices.INDEX_REBUILD_PAGE_SIZE_IN_ROWS;
+import static org.apache.phoenix.query.QueryServices.MUTATE_BATCH_SIZE_ATTRIB;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.concurrent.ExecutionException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Delete;
+
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.regionserver.Region;
+import org.apache.hadoop.hbase.regionserver.RegionScanner;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+
+import org.apache.phoenix.compile.ScanRanges;
+import org.apache.phoenix.filter.SkipScanFilter;
+import org.apache.phoenix.hbase.index.ValueGetter;
+import org.apache.phoenix.hbase.index.covered.update.ColumnReference;
+import org.apache.phoenix.hbase.index.parallel.EarlyExitFailure;
+import org.apache.phoenix.hbase.index.parallel.Task;
+import org.apache.phoenix.hbase.index.parallel.TaskBatch;
+import org.apache.phoenix.hbase.index.parallel.TaskRunner;
+import org.apache.phoenix.hbase.index.parallel.ThreadPoolBuilder;
+import org.apache.phoenix.hbase.index.parallel.ThreadPoolManager;
+import org.apache.phoenix.hbase.index.parallel.WaitForCompletionTaskRunner;
+import org.apache.phoenix.hbase.index.table.HTableFactory;
+import org.apache.phoenix.hbase.index.util.GenericKeyValueBuilder;
+import org.apache.phoenix.hbase.index.util.ImmutableBytesPtr;
+
+import org.apache.phoenix.index.IndexMaintainer;
+import org.apache.phoenix.index.PhoenixIndexCodec;
+import org.apache.phoenix.mapreduce.index.IndexTool;
+import org.apache.phoenix.mapreduce.index.IndexVerificationResultRepository;
+import org.apache.phoenix.query.KeyRange;
+import org.apache.phoenix.query.QueryServicesOptions;
+import org.apache.phoenix.schema.types.PLong;
+import org.apache.phoenix.schema.types.PVarbinary;
+import org.apache.phoenix.util.KeyValueUtil;
+import org.apache.phoenix.util.ServerUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Throwables;
+import com.google.common.collect.Maps;
+
+public class IndexerRegionScanner extends BaseRegionScanner {
+
+    private static final Logger LOGGER = 
LoggerFactory.getLogger(IndexerRegionScanner.class);
+    public static final String NUM_CONCURRENT_INDEX_VERIFY_THREADS_CONF_KEY = 
"index.verify.threads.max";
+    private static final int DEFAULT_CONCURRENT_INDEX_VERIFY_THREADS = 17;
+    public static final String INDEX_VERIFY_ROW_COUNTS_PER_TASK_CONF_KEY = 
"index.verify.threads.max";
+    private static final int DEFAULT_INDEX_VERIFY_ROW_COUNTS_PER_TASK = 2048;
+    private long pageSizeInRows = Long.MAX_VALUE;
+    private int rowCountPerTask;
+    private boolean hasMore;
+    private final int maxBatchSize;
+    private UngroupedAggregateRegionObserver.MutationList mutations;
+    private byte[] indexMetaData;
+    private Scan scan;
+    private RegionScanner innerScanner;
+    private Region region;
+    private IndexMaintainer indexMaintainer;
+    private Table indexHTable = null;
+    private IndexTool.IndexVerifyType verifyType = 
IndexTool.IndexVerifyType.NONE;
+    private boolean verify = false;
+    private Map<byte[], Put> indexKeyToDataPutMap;
+    private Map<byte[], Put> dataKeyToDataPutMap;
+    private TaskRunner pool;
+    private TaskBatch<Boolean> tasks;
+    private String exceptionMessage;
+    private HTableFactory hTableFactory;
+    private int indexTableTTL;
+    private IndexToolVerificationResult verificationResult;
+
+    private IndexVerificationResultRepository verificationResultRepository;
+
+    IndexerRegionScanner (final RegionScanner innerScanner, final Region 
region, final Scan scan,
+            final RegionCoprocessorEnvironment env) throws IOException {
+        super(innerScanner);
+        final Configuration config = env.getConfiguration();
+        if (scan.getAttribute(BaseScannerRegionObserver.INDEX_REBUILD_PAGING) 
!= null) {
+            pageSizeInRows = config.getLong(INDEX_REBUILD_PAGE_SIZE_IN_ROWS,
+                    
QueryServicesOptions.DEFAULT_INDEX_REBUILD_PAGE_SIZE_IN_ROWS);
+        }
+        maxBatchSize = config.getInt(MUTATE_BATCH_SIZE_ATTRIB, 
QueryServicesOptions.DEFAULT_MUTATE_BATCH_SIZE);
+        mutations = new 
UngroupedAggregateRegionObserver.MutationList(maxBatchSize);
+        indexMetaData = scan.getAttribute(PhoenixIndexCodec.INDEX_PROTO_MD);
+        if (indexMetaData == null) {
+            indexMetaData = scan.getAttribute(PhoenixIndexCodec.INDEX_MD);
+        }
+        List<IndexMaintainer> maintainers = 
IndexMaintainer.deserialize(indexMetaData, true);
+        indexMaintainer = maintainers.get(0);
+        this.scan = scan;
+        this.innerScanner = innerScanner;
+        this.region = region;
+        byte[] valueBytes = 
scan.getAttribute(BaseScannerRegionObserver.INDEX_REBUILD_VERIFY_TYPE);
+        if (valueBytes != null) {
+            verificationResult = new IndexToolVerificationResult(scan);
+            verifyType = IndexTool.IndexVerifyType.fromValue(valueBytes);
+            if (verifyType != IndexTool.IndexVerifyType.NONE) {
+                verify = true;
+                // Create the following objects only for rebuilds by IndexTool
+                hTableFactory = ServerUtil.getDelegateHTableFactory(env, 
ServerUtil.ConnectionType.INDEX_WRITER_CONNECTION);
+                indexHTable = hTableFactory.getTable(new 
ImmutableBytesPtr(indexMaintainer.getIndexTableName()));
+                indexTableTTL = 
indexHTable.getTableDescriptor().getColumnFamilies()[0].getTimeToLive();
+                verificationResultRepository =
+                        new 
IndexVerificationResultRepository(indexMaintainer.getIndexTableName(), 
hTableFactory);
+                indexKeyToDataPutMap = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
+                dataKeyToDataPutMap = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
+                pool = new 
WaitForCompletionTaskRunner(ThreadPoolManager.getExecutor(
+                        new ThreadPoolBuilder("IndexVerify",
+                                
env.getConfiguration()).setMaxThread(NUM_CONCURRENT_INDEX_VERIFY_THREADS_CONF_KEY,
+                                
DEFAULT_CONCURRENT_INDEX_VERIFY_THREADS).setCoreTimeout(
+                                INDEX_WRITER_KEEP_ALIVE_TIME_CONF_KEY), env));
+                rowCountPerTask = 
config.getInt(INDEX_VERIFY_ROW_COUNTS_PER_TASK_CONF_KEY,
+                        DEFAULT_INDEX_VERIFY_ROW_COUNTS_PER_TASK);
+            }
+        }
+    }
+
+    @Override
+    public HRegionInfo getRegionInfo() {
+        return region.getRegionInfo();
+    }
+
+    @Override
+    public boolean isFilterDone() { return false; }
+
+    @Override
+    public void close() throws IOException {
+        innerScanner.close();
+        if (verify) {
+            try {
+                
verificationResultRepository.logToIndexToolResultTable(verificationResult,
+                        verifyType, region.getRegionInfo().getRegionName());
+            } finally {
+                this.pool.stop("IndexerRegionScanner is closing");
+                hTableFactory.shutdown();
+                indexHTable.close();
+                verificationResultRepository.close();
+            }
+        }
+    }
+
+    private class SimpleValueGetter implements ValueGetter {
+        final ImmutableBytesWritable valuePtr = new ImmutableBytesWritable();
+        final Put put;
+        SimpleValueGetter (final Put put) {
+            this.put = put;
+        }
+        @Override
+        public ImmutableBytesWritable getLatestValue(ColumnReference ref, long 
ts) throws IOException {
+            List<Cell> cellList = put.get(ref.getFamily(), ref.getQualifier());
+            if (cellList == null || cellList.isEmpty()) {
+                return null;
+            }
+            Cell cell = cellList.get(0);
+            valuePtr.set(cell.getValueArray(), cell.getValueOffset(), 
cell.getValueLength());
+            return valuePtr;
+        }
+
+        @Override
+        public byte[] getRowKey() {
+            return put.getRow();
+        }
+
+    }
+
+    private byte[] getIndexRowKey(final Put dataRow) throws IOException {
+        ValueGetter valueGetter = new SimpleValueGetter(dataRow);
+        byte[] builtIndexRowKey = indexMaintainer.buildRowKey(valueGetter, new 
ImmutableBytesWritable(dataRow.getRow()),
+                null, null, HConstants.LATEST_TIMESTAMP);
+        return builtIndexRowKey;
+    }
+
+    private long getMaxTimestamp(Put put) {
+        long ts = 0;
+        for (List<Cell> cells : put.getFamilyCellMap().values()) {
+            if (cells == null) {
+                break;
+            }
+            for (Cell cell : cells) {
+                if (ts < cell.getTimestamp()) {
+                    ts = cell.getTimestamp();
+                }
+            }
+        }
+        return ts;
+    }
+
+    private boolean verifySingleIndexRow(Result indexRow, final Put dataRow) 
throws IOException {
 
 Review comment:
   This method should also check the max lookback window and update the max 
lookback counters 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to