[ 
https://issues.apache.org/jira/browse/PHOENIX-2890?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15585153#comment-15585153
 ] 

ASF GitHub Bot commented on PHOENIX-2890:
-----------------------------------------

Github user chrajeshbabu commented on a diff in the pull request:

    https://github.com/apache/phoenix/pull/210#discussion_r83827393
  
    --- Diff: 
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/index/IndexTool.java ---
    @@ -167,50 +180,152 @@ private void printHelpAndExit(Options options, int 
exitCode) {
             formatter.printHelp("help", options);
             System.exit(exitCode);
         }
    +    
    +    class JobFactory {
    +        Connection connection;
    +        Configuration configuration;
    +        private Path outputPath;
     
    -    @Override
    -    public int run(String[] args) throws Exception {
    -        Connection connection = null;
    -        try {
    -            CommandLine cmdLine = null;
    -            try {
    -                cmdLine = parseOptions(args);
    -            } catch (IllegalStateException e) {
    -                printHelpAndExit(e.getMessage(), getOptions());
    +        public JobFactory(Connection connection, Configuration 
configuration, Path outputPath) {
    +            this.connection = connection;
    +            this.configuration = configuration;
    +            this.outputPath = outputPath;
    +
    +        }
    +
    +        public Job getJob(String schemaName, String indexTable, String 
dataTable, boolean useDirectApi) throws Exception {
    +            if (indexTable == null) {
    +                return configureJobForPartialBuild(schemaName, dataTable);
    +            } else {
    +                return configureJobForAysncIndex(schemaName, indexTable, 
dataTable, useDirectApi);
                 }
    -            final Configuration configuration = 
HBaseConfiguration.addHbaseResources(getConf());
    -            final String schemaName = 
cmdLine.getOptionValue(SCHEMA_NAME_OPTION.getOpt());
    -            final String dataTable = 
cmdLine.getOptionValue(DATA_TABLE_OPTION.getOpt());
    -            final String indexTable = 
cmdLine.getOptionValue(INDEX_TABLE_OPTION.getOpt());
    +        }
    +        
    +        private Job configureJobForPartialBuild(String schemaName, String 
dataTable) throws Exception {
                 final String qDataTable = 
SchemaUtil.getQualifiedTableName(schemaName, dataTable);
    -            final String qIndexTable = 
SchemaUtil.getQualifiedTableName(schemaName, indexTable);
    -
    +            final PTable pdataTable = PhoenixRuntime.getTable(connection, 
qDataTable);
                 connection = ConnectionUtil.getInputConnection(configuration);
    -            if (!isValidIndexTable(connection, qDataTable, indexTable)) {
    -                throw new IllegalArgumentException(String.format(
    -                    " %s is not an index table for %s ", qIndexTable, 
qDataTable));
    +            long minDisableTimestamp = HConstants.LATEST_TIMESTAMP;
    +            PTable indexWithMinDisableTimestamp = null;
    +            
    +            //Get Indexes in building state, minDisabledTimestamp 
    +            List<String> disableIndexes = new ArrayList<String>();
    +            List<PTable> disabledPIndexes = new ArrayList<PTable>();
    +            for (PTable index : pdataTable.getIndexes()) {
    +                if (index.getIndexState().equals(PIndexState.BUILDING)) {
    +                    disableIndexes.add(index.getTableName().getString());
    +                    disabledPIndexes.add(index);
    +                    if (minDisableTimestamp > 
index.getIndexDisableTimestamp()) {
    +                        minDisableTimestamp = 
index.getIndexDisableTimestamp();
    +                        indexWithMinDisableTimestamp = index;
    +                    }
    +                }
    +            }
    +            
    +            if (indexWithMinDisableTimestamp == null) {
    +                throw new Exception("There is no index for a datatable to 
be rebuild:" + qDataTable);
                 }
    +            if (minDisableTimestamp == 0) {
    +                throw new Exception("It seems Index " + 
indexWithMinDisableTimestamp
    +                        + " has disable timestamp as 0 , please run 
IndexTool with IndexName to build it first");
    +                // TODO probably we can initiate the job by ourself or can 
skip them while making the list for partial build with a warning
    +            }
    +            
    +            long maxTimestamp = getMaxRebuildAsyncDate(schemaName, 
disableIndexes);
    +            
    +            //serialize index maintaienr in job conf with Base64 TODO: 
Need to find better way to serialize them in conf.
    +            List<IndexMaintainer> maintainers = 
Lists.newArrayListWithExpectedSize(disabledPIndexes.size());
    +            for (PTable index : disabledPIndexes) {
    +                maintainers.add(index.getIndexMaintainer(pdataTable, 
connection.unwrap(PhoenixConnection.class)));
    +            }
    +            ImmutableBytesWritable indexMetaDataPtr = new 
ImmutableBytesWritable(ByteUtil.EMPTY_BYTE_ARRAY);
    +            IndexMaintainer.serializeAdditional(pdataTable, 
indexMetaDataPtr, disabledPIndexes, connection.unwrap(PhoenixConnection.class));
    +            PhoenixConfigurationUtil.setIndexMaintainers(configuration, 
indexMetaDataPtr);
    +            
    +            //Prepare raw scan 
    +            Scan scan = IndexManagementUtil.newLocalStateScan(maintainers);
    +            scan.setTimeRange(minDisableTimestamp - 1, maxTimestamp);
    +            scan.setRaw(true);
    +            scan.setCacheBlocks(false);
    --- End diff --
    
    Why not cache blocks? For every next call to server we need to read data 
from disk in this case. But mostly recent data reside all together in blocks.


> Extend IndexTool to allow incremental index rebuilds
> ----------------------------------------------------
>
>                 Key: PHOENIX-2890
>                 URL: https://issues.apache.org/jira/browse/PHOENIX-2890
>             Project: Phoenix
>          Issue Type: Improvement
>            Reporter: Ankit Singhal
>            Assignee: Ankit Singhal
>            Priority: Minor
>             Fix For: 4.9.0
>
>         Attachments: PHOENIX-2890.patch, PHOENIX-2890_wip.patch
>
>
> Currently , IndexTool is used for initial index rebuild but I think we should 
> extend it to be used for recovering index from last disabled timestamp too. 
> In general terms if we run IndexTool on already existing/new index, then it 
> should follow the same semantics as followed by background Index rebuilding 
> thread.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to