vincentpoon commented on a change in pull request #575: PHOENIX-5456
IndexScrutinyTool slow for indexes on multitenant tables…
URL: https://github.com/apache/phoenix/pull/575#discussion_r319313301
##########
File path:
phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolIT.java
##########
@@ -32,914 +31,621 @@
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
-import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.TreeSet;
-import java.util.UUID;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Job;
-import org.apache.phoenix.jdbc.PhoenixConnection;
import org.apache.phoenix.mapreduce.CsvBulkImportUtil;
import org.apache.phoenix.mapreduce.index.IndexScrutinyTableOutput;
import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.OutputFormat;
import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
-import org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters;
import org.apache.phoenix.mapreduce.index.SourceTargetColumnNames;
import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
-import org.apache.phoenix.query.BaseTest;
-import org.apache.phoenix.query.ConnectionQueryServices;
import org.apache.phoenix.schema.PTable;
import org.apache.phoenix.util.EnvironmentEdgeManager;
import org.apache.phoenix.util.PhoenixRuntime;
import org.apache.phoenix.util.PropertiesUtil;
-import org.apache.phoenix.util.ReadOnlyProps;
import org.apache.phoenix.util.SchemaUtil;
import org.junit.After;
import org.junit.Before;
-import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.experimental.categories.Category;
-import org.junit.experimental.runners.Enclosed;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
/**
* Tests for the {@link IndexScrutinyTool}
*/
@Category(NeedsOwnMiniClusterTest.class)
-@RunWith(Enclosed.class)
-public class IndexScrutinyToolIT {
-
- abstract public static class SharedIndexToolIT extends BaseTest {
- protected String outputDir;
-
- @BeforeClass public static void doSetup() throws Exception {
- Map<String, String> serverProps = Maps.newHashMap();
- //disable major compactions
- serverProps.put(HConstants.MAJOR_COMPACTION_PERIOD, "0");
- Map<String, String> clientProps = Maps.newHashMap();
- setUpTestDriver(new
ReadOnlyProps(serverProps.entrySet().iterator()),
- new ReadOnlyProps(clientProps.entrySet().iterator()));
- }
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolIT extends IndexScrutinyToolBaseIT {
+ private String dataTableDdl;
+ private String indexTableDdl;
- protected List<Job> runScrutiny(String[] cmdArgs) throws Exception {
- IndexScrutinyTool scrutiny = new IndexScrutinyTool();
- Configuration conf = new
Configuration(getUtility().getConfiguration());
- scrutiny.setConf(conf);
- int status = scrutiny.run(cmdArgs);
- assertEquals(0, status);
- for (Job job : scrutiny.getJobs()) {
- assertTrue(job.waitForCompletion(true));
- }
- return scrutiny.getJobs();
- }
-
- protected String[] getArgValues(String schemaName, String dataTable,
String indxTable, Long batchSize,
- SourceTable sourceTable, boolean outputInvalidRows,
OutputFormat outputFormat, Long maxOutputRows, String tenantId, Long
scrutinyTs) {
- final List<String> args = Lists.newArrayList();
- if (schemaName != null) {
- args.add("-s");
- args.add(schemaName);
- }
- args.add("-dt");
- args.add(dataTable);
- args.add("-it");
- args.add(indxTable);
-
- // TODO test snapshot reads
- // if(useSnapshot) {
- // args.add("-snap");
- // }
-
- if (OutputFormat.FILE.equals(outputFormat)) {
- args.add("-op");
- outputDir = "/tmp/" + UUID.randomUUID().toString();
- args.add(outputDir);
- }
+ private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
- args.add("-t");
- args.add(String.valueOf(scrutinyTs));
- args.add("-run-foreground");
- if (batchSize != null) {
- args.add("-b");
- args.add(String.valueOf(batchSize));
- }
+ private static final String
+ INDEX_UPSERT_SQL =
+ "UPSERT INTO %s (\"0:NAME\", \":ID\", \"0:ZIP\",
\"0:EMPLOY_DATE\") values (?,?,?,?)";
- // default to using data table as the source table
- args.add("-src");
- if (sourceTable == null) {
- args.add(SourceTable.DATA_TABLE_SOURCE.name());
- } else {
- args.add(sourceTable.name());
- }
- if (outputInvalidRows) {
- args.add("-o");
- }
- if (outputFormat != null) {
- args.add("-of");
- args.add(outputFormat.name());
- }
- if (maxOutputRows != null) {
- args.add("-om");
- args.add(maxOutputRows.toString());
- }
- if (tenantId != null) {
- args.add("-tenant");
- args.add(tenantId);
- }
- return args.toArray(new String[0]);
- }
-
- protected long getCounterValue(Counters counters,
Enum<PhoenixScrutinyJobCounters> counter) {
- return counters.findCounter(counter).getValue();
- }
-
- protected int countRows(Connection conn, String tableFullName) throws
SQLException {
- ResultSet count = conn.createStatement().executeQuery("select
count(*) from " + tableFullName);
- count.next();
- int numRows = count.getInt(1);
- return numRows;
- }
-
- }
+ private static final String DELETE_SQL = "DELETE FROM %s ";
- @RunWith(Parameterized.class) public static class
IndexScrutinyToolNonTenantIT extends SharedIndexToolIT {
+ private String schemaName;
+ private String dataTableName;
+ private String dataTableFullName;
+ private String indexTableName;
+ private String indexTableFullName;
- private String dataTableDdl;
- private String indexTableDdl;
+ private Connection conn;
- private static final String UPSERT_SQL = "UPSERT INTO %s
VALUES(?,?,?,?)";
+ private PreparedStatement dataTableUpsertStmt;
- private static final String
- INDEX_UPSERT_SQL =
- "UPSERT INTO %s (\"0:NAME\", \":ID\", \"0:ZIP\",
\"0:EMPLOY_DATE\") values (?,?,?,?)";
+ private PreparedStatement indexTableUpsertStmt;
- private static final String DELETE_SQL = "DELETE FROM %s ";
+ private long testTime;
+ private Properties props;
- private String schemaName;
- private String dataTableName;
- private String dataTableFullName;
- private String indexTableName;
- private String indexTableFullName;
-
- private Connection conn;
-
- private PreparedStatement dataTableUpsertStmt;
-
- private PreparedStatement indexTableUpsertStmt;
-
- private long testTime;
- private Properties props;
-
- @Parameterized.Parameters public static Collection<Object[]> data() {
- return Arrays.asList(new Object[][] { { "CREATE TABLE %s (ID
INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP,
EMPLOYER VARCHAR)",
- "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE
(ZIP)" }, { "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR,
ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR) SALT_BUCKETS=2",
- "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)"
}, { "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP
INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR) SALT_BUCKETS=2",
- "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE
(ZIP)" } });
- }
+ @Parameterized.Parameters public static Collection<Object[]> data() {
+ return Arrays.asList(new Object[][] { { "CREATE TABLE %s (ID INTEGER
NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP,
EMPLOYER VARCHAR)",
+ "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE
(ZIP)" }, { "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR,
ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR) SALT_BUCKETS=2",
+ "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" }, {
"CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER,
EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR) SALT_BUCKETS=2",
Review comment:
I think we should be able to add "MULTI_TENANT=TRUE" without anything
failing. But I guess "testColumnsForSelectQueryOnMultiTenantTable" in the
other class already tests what we want.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services