kadirozde commented on a change in pull request #758: PHOENIX-5804: Implement strong verification with -v ONLY option for o… URL: https://github.com/apache/phoenix/pull/758#discussion_r409743871
########## File path: phoenix-core/src/main/java/org/apache/phoenix/coprocessor/IndexerRegionScanner.java ########## @@ -0,0 +1,484 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.phoenix.coprocessor; + +import static org.apache.phoenix.hbase.index.write.AbstractParallelWriterIndexCommitter.INDEX_WRITER_KEEP_ALIVE_TIME_CONF_KEY; +import static org.apache.phoenix.query.QueryConstants.AGG_TIMESTAMP; +import static org.apache.phoenix.query.QueryConstants.EMPTY_COLUMN_VALUE_BYTES; +import static org.apache.phoenix.query.QueryConstants.SINGLE_COLUMN; +import static org.apache.phoenix.query.QueryConstants.SINGLE_COLUMN_FAMILY; +import static org.apache.phoenix.query.QueryConstants.UNGROUPED_AGG_ROW_KEY; +import static org.apache.phoenix.query.QueryServices.INDEX_REBUILD_PAGE_SIZE_IN_ROWS; +import static org.apache.phoenix.query.QueryServices.MUTATE_BATCH_SIZE_ATTRIB; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.ExecutionException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; + +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.client.Delete; + +import org.apache.hadoop.hbase.client.Mutation; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.regionserver.Region; +import org.apache.hadoop.hbase.regionserver.RegionScanner; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; + +import org.apache.phoenix.compile.ScanRanges; +import org.apache.phoenix.filter.SkipScanFilter; +import org.apache.phoenix.hbase.index.ValueGetter; +import org.apache.phoenix.hbase.index.covered.update.ColumnReference; +import org.apache.phoenix.hbase.index.parallel.EarlyExitFailure; +import org.apache.phoenix.hbase.index.parallel.Task; +import org.apache.phoenix.hbase.index.parallel.TaskBatch; +import org.apache.phoenix.hbase.index.parallel.TaskRunner; +import org.apache.phoenix.hbase.index.parallel.ThreadPoolBuilder; +import org.apache.phoenix.hbase.index.parallel.ThreadPoolManager; +import org.apache.phoenix.hbase.index.parallel.WaitForCompletionTaskRunner; +import org.apache.phoenix.hbase.index.table.HTableFactory; +import org.apache.phoenix.hbase.index.util.GenericKeyValueBuilder; +import org.apache.phoenix.hbase.index.util.ImmutableBytesPtr; + +import org.apache.phoenix.index.IndexMaintainer; +import org.apache.phoenix.index.PhoenixIndexCodec; +import org.apache.phoenix.mapreduce.index.IndexTool; +import org.apache.phoenix.mapreduce.index.IndexVerificationResultRepository; +import org.apache.phoenix.query.KeyRange; +import org.apache.phoenix.query.QueryServicesOptions; +import org.apache.phoenix.schema.types.PLong; +import org.apache.phoenix.schema.types.PVarbinary; +import org.apache.phoenix.util.KeyValueUtil; +import org.apache.phoenix.util.ServerUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Throwables; +import com.google.common.collect.Maps; + +public class IndexerRegionScanner extends BaseRegionScanner { + + private static final Logger LOGGER = LoggerFactory.getLogger(IndexerRegionScanner.class); + public static final String NUM_CONCURRENT_INDEX_VERIFY_THREADS_CONF_KEY = "index.verify.threads.max"; + private static final int DEFAULT_CONCURRENT_INDEX_VERIFY_THREADS = 17; + public static final String INDEX_VERIFY_ROW_COUNTS_PER_TASK_CONF_KEY = "index.verify.threads.max"; + private static final int DEFAULT_INDEX_VERIFY_ROW_COUNTS_PER_TASK = 2048; + private long pageSizeInRows = Long.MAX_VALUE; + private int rowCountPerTask; + private boolean hasMore; + private final int maxBatchSize; + private UngroupedAggregateRegionObserver.MutationList mutations; + private byte[] indexMetaData; + private Scan scan; + private RegionScanner innerScanner; + private Region region; + private IndexMaintainer indexMaintainer; + private Table indexHTable = null; + private IndexTool.IndexVerifyType verifyType = IndexTool.IndexVerifyType.NONE; + private boolean verify = false; + private Map<byte[], Put> indexKeyToDataPutMap; + private Map<byte[], Put> dataKeyToDataPutMap; + private TaskRunner pool; + private TaskBatch<Boolean> tasks; + private String exceptionMessage; + private HTableFactory hTableFactory; + private int indexTableTTL; + private IndexToolVerificationResult verificationResult; + + private IndexVerificationResultRepository verificationResultRepository; + + IndexerRegionScanner (final RegionScanner innerScanner, final Region region, final Scan scan, + final RegionCoprocessorEnvironment env) throws IOException { + super(innerScanner); + final Configuration config = env.getConfiguration(); + if (scan.getAttribute(BaseScannerRegionObserver.INDEX_REBUILD_PAGING) != null) { + pageSizeInRows = config.getLong(INDEX_REBUILD_PAGE_SIZE_IN_ROWS, + QueryServicesOptions.DEFAULT_INDEX_REBUILD_PAGE_SIZE_IN_ROWS); + } + maxBatchSize = config.getInt(MUTATE_BATCH_SIZE_ATTRIB, QueryServicesOptions.DEFAULT_MUTATE_BATCH_SIZE); + mutations = new UngroupedAggregateRegionObserver.MutationList(maxBatchSize); + indexMetaData = scan.getAttribute(PhoenixIndexCodec.INDEX_PROTO_MD); + if (indexMetaData == null) { + indexMetaData = scan.getAttribute(PhoenixIndexCodec.INDEX_MD); + } + List<IndexMaintainer> maintainers = IndexMaintainer.deserialize(indexMetaData, true); + indexMaintainer = maintainers.get(0); + this.scan = scan; + this.innerScanner = innerScanner; + this.region = region; + byte[] valueBytes = scan.getAttribute(BaseScannerRegionObserver.INDEX_REBUILD_VERIFY_TYPE); + if (valueBytes != null) { + verificationResult = new IndexToolVerificationResult(scan); + verifyType = IndexTool.IndexVerifyType.fromValue(valueBytes); + if (verifyType != IndexTool.IndexVerifyType.NONE) { + verify = true; + // Create the following objects only for rebuilds by IndexTool + hTableFactory = ServerUtil.getDelegateHTableFactory(env, ServerUtil.ConnectionType.INDEX_WRITER_CONNECTION); + indexHTable = hTableFactory.getTable(new ImmutableBytesPtr(indexMaintainer.getIndexTableName())); + indexTableTTL = indexHTable.getTableDescriptor().getColumnFamilies()[0].getTimeToLive(); + verificationResultRepository = + new IndexVerificationResultRepository(indexMaintainer.getIndexTableName(), hTableFactory); + indexKeyToDataPutMap = Maps.newTreeMap(Bytes.BYTES_COMPARATOR); + dataKeyToDataPutMap = Maps.newTreeMap(Bytes.BYTES_COMPARATOR); + pool = new WaitForCompletionTaskRunner(ThreadPoolManager.getExecutor( + new ThreadPoolBuilder("IndexVerify", + env.getConfiguration()).setMaxThread(NUM_CONCURRENT_INDEX_VERIFY_THREADS_CONF_KEY, + DEFAULT_CONCURRENT_INDEX_VERIFY_THREADS).setCoreTimeout( + INDEX_WRITER_KEEP_ALIVE_TIME_CONF_KEY), env)); + rowCountPerTask = config.getInt(INDEX_VERIFY_ROW_COUNTS_PER_TASK_CONF_KEY, + DEFAULT_INDEX_VERIFY_ROW_COUNTS_PER_TASK); + } + } + } + + @Override + public HRegionInfo getRegionInfo() { + return region.getRegionInfo(); + } + + @Override + public boolean isFilterDone() { return false; } + + @Override + public void close() throws IOException { + innerScanner.close(); + if (verify) { + try { + verificationResultRepository.logToIndexToolResultTable(verificationResult, + verifyType, region.getRegionInfo().getRegionName()); + } finally { + this.pool.stop("IndexerRegionScanner is closing"); + hTableFactory.shutdown(); + indexHTable.close(); + verificationResultRepository.close(); + } + } + } + + private class SimpleValueGetter implements ValueGetter { + final ImmutableBytesWritable valuePtr = new ImmutableBytesWritable(); + final Put put; + SimpleValueGetter (final Put put) { + this.put = put; + } + @Override + public ImmutableBytesWritable getLatestValue(ColumnReference ref, long ts) throws IOException { + List<Cell> cellList = put.get(ref.getFamily(), ref.getQualifier()); + if (cellList == null || cellList.isEmpty()) { + return null; + } + Cell cell = cellList.get(0); + valuePtr.set(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); + return valuePtr; + } + + @Override + public byte[] getRowKey() { + return put.getRow(); + } + + } + + private byte[] getIndexRowKey(final Put dataRow) throws IOException { + ValueGetter valueGetter = new SimpleValueGetter(dataRow); + byte[] builtIndexRowKey = indexMaintainer.buildRowKey(valueGetter, new ImmutableBytesWritable(dataRow.getRow()), + null, null, HConstants.LATEST_TIMESTAMP); + return builtIndexRowKey; + } + + private long getMaxTimestamp(Put put) { Review comment: You can remove this method and use the one from IndexRebuildRegionScanner ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
