[
https://issues.apache.org/jira/browse/PHOENIX-5592?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17238980#comment-17238980
]
ASF GitHub Bot commented on PHOENIX-5592:
-----------------------------------------
jpisaac commented on a change in pull request #975:
URL: https://github.com/apache/phoenix/pull/975#discussion_r530563942
##########
File path:
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/util/PhoenixMapReduceUtil.java
##########
@@ -125,6 +127,20 @@ public static void setInput(final Job job, final Class<?
extends DBWritable> inp
PhoenixConfigurationUtil.setSelectColumnNames(configuration,
fieldNames);
}
+ /**
+ *
+ * @param job MR job instance
+ * @param tool ViewTtlTool for Phoenix TTL deletion MR job
Review comment:
nit: ViewTTL in comments and method names
##########
File path:
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixTTLTool.java
##########
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.PosixParser;
+import org.apache.commons.cli.ParseException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobPriority;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.phoenix.mapreduce.util.ConnectionUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.Connection;
+import java.util.Properties;
+
+public class PhoenixTTLTool extends Configured implements Tool {
+ private static final Logger LOGGER =
LoggerFactory.getLogger(PhoenixTTLTool.class);
+
+ public static enum MR_COUNTER_METRICS {
+ VIEW_FAILED,
+ VIEW_SUCCEED,
+ VIEW_INDEX_FAILED,
+ VIEW_INDEX_SUCCEED
+ }
+
+ public static final String DELETE_ALL_VIEWS = "DELETE_ALL_VIEWS";
+ public static final int DEFAULT_MAPPER_SPLIT_SIZE = 10;
+ public static final int DEFAULT_QUERY_BATCH_SIZE = 100;
+
+ private static final Option DELETE_ALL_VIEWS_OPTION = new Option("a",
"all", false,
+ "Delete all views from all tables.");
+ private static final Option VIEW_NAME_OPTION = new Option("v", "view",
true,
+ "Delete Phoenix View Name");
+ private static final Option TENANT_ID_OPTION = new Option("i", "id", true,
+ "Delete an view based on the tenant id.");
+ private static final Option JOB_PRIORITY_OPTION = new Option("p",
"job-priority", true,
+ "Define job priority from 0(highest) to 4");
+ private static final Option SPLIT_SIZE_OPTION = new Option("s",
"split-size-per-mapper", true,
+ "Define split size for each mapper.");
+ private static final Option BATCH_SIZE_OPTION = new Option("b",
"batch-size-for-query-more", true,
+ "Define batch size for fetching views metadata from syscat.");
+ private static final Option RUN_FOREGROUND_OPTION = new Option("runfg",
+ "run-foreground", false, "If specified, runs ViewTTLTool " +
+ "in Foreground. Default - Runs the build in background");
+
+ private static final Option HELP_OPTION = new Option("h", "help", false,
"Help");
+
+ private Configuration configuration;
+ private Connection connection;
+ private String viewName;
+ private String tenantId;
+ private String jobName;
+ private boolean isDeletingAllViews;
+ private JobPriority jobPriority;
+ private boolean isForeground;
+ private int splitSize;
+ private int batchSize;
+ private Job job;
+
+ public void parseArgs(String[] args) {
+ CommandLine cmdLine;
+ try {
+ cmdLine = parseOptions(args);
+ } catch (IllegalStateException e) {
+ printHelpAndExit(e.getMessage(), getOptions());
+ throw e;
+ }
+
+ if (getConf() == null) {
+ setConf(HBaseConfiguration.create());
+ }
+
+ if (cmdLine.hasOption(DELETE_ALL_VIEWS_OPTION.getOpt())) {
+ this.isDeletingAllViews = true;
+ } else if (cmdLine.hasOption(VIEW_NAME_OPTION.getOpt())) {
+ viewName = cmdLine.getOptionValue(VIEW_NAME_OPTION.getOpt());
+ this.isDeletingAllViews = false;
+ }
+
+ if (cmdLine.hasOption(TENANT_ID_OPTION.getOpt())) {
+ tenantId = cmdLine.getOptionValue((TENANT_ID_OPTION.getOpt()));
+ }
+
+ if (cmdLine.hasOption(SPLIT_SIZE_OPTION.getOpt())) {
+ splitSize =
Integer.valueOf(cmdLine.getOptionValue(SPLIT_SIZE_OPTION.getOpt()));
+ } else {
+ splitSize = DEFAULT_MAPPER_SPLIT_SIZE;
+ }
+
+ if (cmdLine.hasOption(BATCH_SIZE_OPTION.getOpt())) {
+ batchSize =
Integer.valueOf(cmdLine.getOptionValue(SPLIT_SIZE_OPTION.getOpt()));
+ } else {
+ batchSize = DEFAULT_QUERY_BATCH_SIZE;
+ }
+
+ isForeground = cmdLine.hasOption(RUN_FOREGROUND_OPTION.getOpt());
+ }
+
+ public String getJobPriority() {
+ return this.jobPriority.toString();
+ }
+
+ private JobPriority getJobPriority(CommandLine cmdLine) {
+ String jobPriorityOption =
cmdLine.getOptionValue(JOB_PRIORITY_OPTION.getOpt());
+ if (jobPriorityOption == null) {
+ return JobPriority.NORMAL;
+ }
+
+ switch (jobPriorityOption) {
+ case "0" : return JobPriority.VERY_HIGH;
+ case "1" : return JobPriority.HIGH;
+ case "2" : return JobPriority.NORMAL;
+ case "3" : return JobPriority.LOW;
+ case "4" : return JobPriority.VERY_LOW;
+ default:
+ return JobPriority.NORMAL;
+ }
+ }
+
+ public Job getJob() {
+ return this.job;
+ }
+
+ public boolean isDeletingAllViews() {
+ return this.isDeletingAllViews;
+ }
+
+ public String getTenantId() {
+ return this.tenantId;
+ }
+
+ public String getViewName() {
+ return this.viewName;
+ }
+
+ public int getSplitSize() {
+ return this.splitSize;
+ }
+
+ public int getBatchSize() {
+ return this.batchSize;
+ }
+
+ public CommandLine parseOptions(String[] args) {
+ final Options options = getOptions();
+ CommandLineParser parser = new PosixParser();
+ CommandLine cmdLine = null;
+ try {
+ cmdLine = parser.parse(options, args);
+ } catch (ParseException e) {
+ printHelpAndExit("Error parsing command line options: " +
e.getMessage(), options);
+ }
+
+ if (!cmdLine.hasOption(DELETE_ALL_VIEWS_OPTION.getOpt()) &&
+ !cmdLine.hasOption(VIEW_NAME_OPTION.getOpt()) &&
+ !cmdLine.hasOption(TENANT_ID_OPTION.getOpt())) {
+ throw new IllegalStateException("No deletion job is specified, " +
+ "please indicate deletion job for ALL/TABLE/VIEW/TENANT
level");
+ }
+
+ if (cmdLine.hasOption(HELP_OPTION.getOpt())) {
+ printHelpAndExit(options, 0);
+ }
+
+ this.jobPriority = getJobPriority(cmdLine);
+
+ return cmdLine;
+ }
+
+ private Options getOptions() {
+ final Options options = new Options();
+ options.addOption(DELETE_ALL_VIEWS_OPTION);
+ options.addOption(VIEW_NAME_OPTION);
+ options.addOption(TENANT_ID_OPTION);
+ options.addOption(HELP_OPTION);
+ options.addOption(JOB_PRIORITY_OPTION);
+ options.addOption(RUN_FOREGROUND_OPTION);
+ options.addOption(SPLIT_SIZE_OPTION);
+ options.addOption(BATCH_SIZE_OPTION);
+
+ return options;
+ }
+
+ private void printHelpAndExit(String errorMessage, Options options) {
+ System.err.println(errorMessage);
+ LOGGER.error(errorMessage);
+ printHelpAndExit(options, 1);
+ }
+
+ private void printHelpAndExit(Options options, int exitCode) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp("help", options);
+ System.exit(exitCode);
+ }
+
+ public void setJobName(String jobName) {
+ this.jobName = jobName;
+ }
+
+ public String getJobName() {
+ if (this.jobName == null) {
+ String jobName;
+ if (this.isDeletingAllViews) {
+ jobName = DELETE_ALL_VIEWS;
+ } else if (this.getViewName() != null) {
+ jobName = this.getViewName();
+ } else {
+ jobName = this.tenantId;
+ }
+ this.jobName = "ViewTTLTool-" + jobName + "-";
Review comment:
nit: "PhoenixTTLTool" here and few more messages below
##########
File path:
phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixTTLToolIT.java
##########
@@ -0,0 +1,730 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.RowFilter;
+import org.apache.hadoop.hbase.filter.CompareFilter;
+import org.apache.hadoop.hbase.filter.RegexStringComparator;
+import org.apache.phoenix.mapreduce.PhoenixTTLTool;
+import org.apache.phoenix.mapreduce.util.PhoenixMultiInputUtil;
+import org.apache.phoenix.query.HBaseFactoryProvider;
+import org.junit.Test;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.Statement;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class PhoenixTTLToolIT extends ParallelStatsDisabledIT {
+
+ private final long PHOENIX_TTL_EXPIRE_IN_A_MILLISECOND = 1;
+ private final long PHOENIX_TTL_EXPIRE_IN_A_DAY = 1000 * 60 * 60 * 24;
+
+ private final String VIEW_PREFIX1 = "V01";
+ private final String VIEW_PREFIX2 = "V02";
+ private final String UPSERT_TO_GLOBAL_VIEW_QUERY = "UPSERT INTO %s
(PK1,A,B,C,D) VALUES(1,1,1,1,1)";
+ private final String UPSERT_TO_LEAF_VIEW_QUERY = "UPSERT INTO %s
(PK1,A,B,C,D,E,F) VALUES(1,1,1,1,1,1,1)";
+ private final String VIEW_DDL_WITH_ID_PREFIX_AND_TTL = "CREATE VIEW %s (" +
+ "PK1 BIGINT PRIMARY KEY,A BIGINT, B BIGINT, C BIGINT, D BIGINT)" +
+ " AS SELECT * FROM %s WHERE ID = '%s' PHOENIX_TTL = %d";
+ private final String VIEW_INDEX_DDL = "CREATE INDEX %s ON %s(%s)";
+ private final String TENANT_VIEW_DDL = "CREATE VIEW %s (E BIGINT, F
BIGINT) AS SELECT * FROM %s";
+
+ private void verifyNumberOfRowsFromHBaseLevel(String tableName, String
regrex, int expectedRows)
+ throws Exception {
+ try (Table table =
HBaseFactoryProvider.getHConnectionFactory().createConnection(config).getTable(tableName))
{
+ Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new
RegexStringComparator(regrex));
+ Scan scan = new Scan();
+ scan.setFilter(filter);
+ assertEquals(expectedRows, getRowCount(table,scan));
+ }
+ }
+
+ private void verifyNumberOfRows(String tableName, String tenantId, int
expectedRows,
+ Connection conn) throws Exception {
+ String query = "SELECT COUNT(*) FROM " + tableName;
+ if (tenantId != null) {
+ query = query + " WHERE TENANT_ID = '" + tenantId + "'";
+ }
+ try (Statement stm = conn.createStatement()) {
+
+ ResultSet rs = stm.executeQuery(query);
+ assertTrue(rs.next());
+ assertEquals(expectedRows, rs.getInt(1));
+ }
+ }
+
+ private long getRowCount(Table table, Scan scan) throws Exception {
+ ResultScanner scanner = table.getScanner(scan);
+ int count = 0;
+ for (Result dummy : scanner) {
+ count++;
+ }
+ scanner.close();
+ return count;
+ }
+
+ private void createMultiTenantTable(Connection conn, String tableName)
throws Exception {
+ String ddl = "CREATE TABLE " + tableName +
+ " (TENANT_ID CHAR(10) NOT NULL, ID CHAR(10) NOT NULL, NUM
BIGINT CONSTRAINT " +
+ "PK PRIMARY KEY (TENANT_ID,ID)) MULTI_TENANT=true,
COLUMN_ENCODED_BYTES = 0";
+
+ try (Statement stmt = conn.createStatement()) {
+ stmt.execute(ddl);
+ }
+ }
+
+ /*
+ BaseMultiTenantTable
+ GlobalView1 with TTL(1 ms)
+ Index1 Index2
+
+ Creating 2 tenantViews and Upserting data.
+ After running the MR job, it should delete all data.
+ */
+ @Test
+ public void testTenantViewOnGlobalViewWithMoreThanOneIndex() throws
Exception {
+ String schema = generateUniqueName();
+ String baseTableFullName = schema + "." + generateUniqueName();
+ String indexTable1 = generateUniqueName() + "_IDX";
+ String indexTable2 = generateUniqueName() + "_IDX";
+ String globalViewName = schema + "." + generateUniqueName();
+ String tenant1 = generateUniqueName();
+ String tenant2 = generateUniqueName();
+ String tenantView1 = schema + "." + generateUniqueName();
+ String tenantView2 = schema + "." + generateUniqueName();
+ String indexTable = "_IDX_" + baseTableFullName;
+
+ try (Connection globalConn = DriverManager.getConnection(getUrl());
+ Connection tenant1Connection =
PhoenixMultiInputUtil.buildTenantConnection(getUrl(), tenant1);
+ Connection tenant2Connection =
PhoenixMultiInputUtil.buildTenantConnection(getUrl(), tenant2)) {
+
+ createMultiTenantTable(globalConn, baseTableFullName);
+
globalConn.createStatement().execute(String.format(VIEW_DDL_WITH_ID_PREFIX_AND_TTL,
+ globalViewName, baseTableFullName, VIEW_PREFIX1,
PHOENIX_TTL_EXPIRE_IN_A_MILLISECOND));
+
+ globalConn.createStatement().execute(String.format(VIEW_INDEX_DDL,
indexTable1, globalViewName, "A,B"));
+ globalConn.createStatement().execute(String.format(VIEW_INDEX_DDL,
indexTable2, globalViewName, "C,D"));
+
+
tenant1Connection.createStatement().execute(String.format(TENANT_VIEW_DDL,tenantView1,
globalViewName));
+
tenant2Connection.createStatement().execute(String.format(TENANT_VIEW_DDL,tenantView2,
globalViewName));
+
+
tenant1Connection.createStatement().execute(String.format(UPSERT_TO_LEAF_VIEW_QUERY,
tenantView1));
+ tenant1Connection.commit();
+ verifyNumberOfRows(baseTableFullName, tenant1, 1, globalConn);
+
tenant2Connection.createStatement().execute(String.format(UPSERT_TO_LEAF_VIEW_QUERY,
tenantView2));
+ tenant2Connection.commit();
+ verifyNumberOfRows(baseTableFullName, tenant2, 1, globalConn);
+
+ verifyNumberOfRowsFromHBaseLevel(indexTable, ".*" + tenant1 +
".*", 2);
Review comment:
nit: add comments on why 2 rows are expected
##########
File path:
phoenix-core/src/main/java/org/apache/phoenix/mapreduce/util/DefaultMultiViewSplitStrategy.java
##########
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.mapreduce.util;
+
+import com.google.common.collect.Lists;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.phoenix.mapreduce.PhoenixMultiViewInputSplit;
+
+import java.util.List;
+
+import static
org.apache.phoenix.mapreduce.ViewTTLTool.DEFAULT_MAPPER_SPLIT_SIZE;
+
+public class DefaultMultiViewSplitStrategy implements MultiViewSplitStrategy {
+
+ public List<InputSplit> generateSplits(List<ViewInfoWritable> views,
Configuration configuration) {
+ int numViewsInSplit =
PhoenixConfigurationUtil.getMultiViewSplitSize(configuration);
+
+ if (numViewsInSplit < 1) {
Review comment:
Sorry, my bad!!
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> MapReduce job to asynchronously delete rows where the VIEW_TTL has expired.
> ---------------------------------------------------------------------------
>
> Key: PHOENIX-5592
> URL: https://issues.apache.org/jira/browse/PHOENIX-5592
> Project: Phoenix
> Issue Type: Sub-task
> Reporter: Xinyi Yan
> Assignee: Xinyi Yan
> Priority: Major
> Fix For: 4.16.0
>
> Time Spent: 7h
> Remaining Estimate: 0h
>
> Find the entities from the system catalog which views have TTLs defined on
> them. Spawn an MR job, whose splitter will create a map task. The mapper
> issues “select count(1) from the entity and Inject TTL attributes and Delete
> Hint during Map task. Since the current Phoniex MR framework doesn't support
> this feature, PhoenixMultiViewReader, PhoenixMultiViewInputSplit,
> PhoenixMultiViewInputFormat, and a few MR related util classes need to
> implement.
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)