[GitHub] [phoenix] vincentpoon commented on a change in pull request #575: PHOENIX-5456 IndexScrutinyTool slow for indexes on multitenant tables…

GitBox Thu, 29 Aug 2019 16:55:08 -0700

vincentpoon commented on a change in pull request #575: PHOENIX-5456 
IndexScrutinyTool slow for indexes on multitenant tables…
URL: https://github.com/apache/phoenix/pull/575#discussion_r319313301


 ##########
 File path: 
phoenix-core/src/it/java/org/apache/phoenix/end2end/IndexScrutinyToolIT.java
 ##########
 @@ -32,914 +31,621 @@
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
-import java.util.Map;
 import java.util.Properties;
 import java.util.Random;
 import java.util.TreeSet;
-import java.util.UUID;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.Job;
-import org.apache.phoenix.jdbc.PhoenixConnection;
 import org.apache.phoenix.mapreduce.CsvBulkImportUtil;
 import org.apache.phoenix.mapreduce.index.IndexScrutinyTableOutput;
 import org.apache.phoenix.mapreduce.index.IndexScrutinyTool;
 import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.OutputFormat;
 import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
-import org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters;
 import org.apache.phoenix.mapreduce.index.SourceTargetColumnNames;
 import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
-import org.apache.phoenix.query.BaseTest;
-import org.apache.phoenix.query.ConnectionQueryServices;
 import org.apache.phoenix.schema.PTable;
 import org.apache.phoenix.util.EnvironmentEdgeManager;
 import org.apache.phoenix.util.PhoenixRuntime;
 import org.apache.phoenix.util.PropertiesUtil;
-import org.apache.phoenix.util.ReadOnlyProps;
 import org.apache.phoenix.util.SchemaUtil;
 import org.junit.After;
 import org.junit.Before;
-import org.junit.BeforeClass;
 import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
-import org.junit.experimental.runners.Enclosed;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
 import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 
 /**
  * Tests for the {@link IndexScrutinyTool}
  */
 @Category(NeedsOwnMiniClusterTest.class)
-@RunWith(Enclosed.class)
-public class IndexScrutinyToolIT {
-
-    abstract public static class SharedIndexToolIT extends BaseTest {
-        protected String outputDir;
-
-        @BeforeClass public static void doSetup() throws Exception {
-            Map<String, String> serverProps = Maps.newHashMap();
-            //disable major compactions
-            serverProps.put(HConstants.MAJOR_COMPACTION_PERIOD, "0");
-            Map<String, String> clientProps = Maps.newHashMap();
-            setUpTestDriver(new 
ReadOnlyProps(serverProps.entrySet().iterator()),
-                    new ReadOnlyProps(clientProps.entrySet().iterator()));
-        }
+@RunWith(Parameterized.class)
+public class IndexScrutinyToolIT extends IndexScrutinyToolBaseIT {
+    private String dataTableDdl;
+    private String indexTableDdl;
 
-        protected List<Job> runScrutiny(String[] cmdArgs) throws Exception {
-            IndexScrutinyTool scrutiny = new IndexScrutinyTool();
-            Configuration conf = new 
Configuration(getUtility().getConfiguration());
-            scrutiny.setConf(conf);
-            int status = scrutiny.run(cmdArgs);
-            assertEquals(0, status);
-            for (Job job : scrutiny.getJobs()) {
-                assertTrue(job.waitForCompletion(true));
-            }
-            return scrutiny.getJobs();
-        }
-
-        protected String[] getArgValues(String schemaName, String dataTable, 
String indxTable, Long batchSize,
-                SourceTable sourceTable, boolean outputInvalidRows, 
OutputFormat outputFormat, Long maxOutputRows, String tenantId, Long 
scrutinyTs) {
-            final List<String> args = Lists.newArrayList();
-            if (schemaName != null) {
-                args.add("-s");
-                args.add(schemaName);
-            }
-            args.add("-dt");
-            args.add(dataTable);
-            args.add("-it");
-            args.add(indxTable);
-
-            // TODO test snapshot reads
-            // if(useSnapshot) {
-            // args.add("-snap");
-            // }
-
-            if (OutputFormat.FILE.equals(outputFormat)) {
-                args.add("-op");
-                outputDir = "/tmp/" + UUID.randomUUID().toString();
-                args.add(outputDir);
-            }
+    private static final String UPSERT_SQL = "UPSERT INTO %s VALUES(?,?,?,?)";
 
-            args.add("-t");
-            args.add(String.valueOf(scrutinyTs));
-            args.add("-run-foreground");
-            if (batchSize != null) {
-                args.add("-b");
-                args.add(String.valueOf(batchSize));
-            }
+    private static final String
+            INDEX_UPSERT_SQL =
+            "UPSERT INTO %s (\"0:NAME\", \":ID\", \"0:ZIP\", 
\"0:EMPLOY_DATE\") values (?,?,?,?)";
 
-            // default to using data table as the source table
-            args.add("-src");
-            if (sourceTable == null) {
-                args.add(SourceTable.DATA_TABLE_SOURCE.name());
-            } else {
-                args.add(sourceTable.name());
-            }
-            if (outputInvalidRows) {
-                args.add("-o");
-            }
-            if (outputFormat != null) {
-                args.add("-of");
-                args.add(outputFormat.name());
-            }
-            if (maxOutputRows != null) {
-                args.add("-om");
-                args.add(maxOutputRows.toString());
-            }
-            if (tenantId != null) {
-                args.add("-tenant");
-                args.add(tenantId);
-            }
-            return args.toArray(new String[0]);
-        }
-
-        protected long getCounterValue(Counters counters, 
Enum<PhoenixScrutinyJobCounters> counter) {
-            return counters.findCounter(counter).getValue();
-        }
-
-        protected int countRows(Connection conn, String tableFullName) throws 
SQLException {
-            ResultSet count = conn.createStatement().executeQuery("select 
count(*) from " + tableFullName);
-            count.next();
-            int numRows = count.getInt(1);
-            return numRows;
-        }
-
-    }
+    private static final String DELETE_SQL = "DELETE FROM %s ";
 
-    @RunWith(Parameterized.class) public static class 
IndexScrutinyToolNonTenantIT extends SharedIndexToolIT {
+    private String schemaName;
+    private String dataTableName;
+    private String dataTableFullName;
+    private String indexTableName;
+    private String indexTableFullName;
 
-        private String dataTableDdl;
-        private String indexTableDdl;
+    private Connection conn;
 
-        private static final String UPSERT_SQL = "UPSERT INTO %s 
VALUES(?,?,?,?)";
+    private PreparedStatement dataTableUpsertStmt;
 
-        private static final String
-                INDEX_UPSERT_SQL =
-                "UPSERT INTO %s (\"0:NAME\", \":ID\", \"0:ZIP\", 
\"0:EMPLOY_DATE\") values (?,?,?,?)";
+    private PreparedStatement indexTableUpsertStmt;
 
-        private static final String DELETE_SQL = "DELETE FROM %s ";
+    private long testTime;
+    private Properties props;
 
-        private String schemaName;
-        private String dataTableName;
-        private String dataTableFullName;
-        private String indexTableName;
-        private String indexTableFullName;
-
-        private Connection conn;
-
-        private PreparedStatement dataTableUpsertStmt;
-
-        private PreparedStatement indexTableUpsertStmt;
-
-        private long testTime;
-        private Properties props;
-
-        @Parameterized.Parameters public static Collection<Object[]> data() {
-            return Arrays.asList(new Object[][] { { "CREATE TABLE %s (ID 
INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, 
EMPLOYER VARCHAR)",
-                    "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE 
(ZIP)" }, { "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, 
ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR) SALT_BUCKETS=2",
-                    "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" 
}, { "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP 
INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR) SALT_BUCKETS=2",
-                    "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE 
(ZIP)" } });
-        }
+    @Parameterized.Parameters public static Collection<Object[]> data() {
+        return Arrays.asList(new Object[][] { { "CREATE TABLE %s (ID INTEGER 
NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, EMPLOY_DATE TIMESTAMP, 
EMPLOYER VARCHAR)",
+                "CREATE LOCAL INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE 
(ZIP)" }, { "CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, 
ZIP INTEGER, EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR) SALT_BUCKETS=2",
+                "CREATE INDEX %s ON %s (NAME, EMPLOY_DATE) INCLUDE (ZIP)" }, { 
"CREATE TABLE %s (ID INTEGER NOT NULL PRIMARY KEY, NAME VARCHAR, ZIP INTEGER, 
EMPLOY_DATE TIMESTAMP, EMPLOYER VARCHAR) SALT_BUCKETS=2",
 
 Review comment:
   I think we should be able to add "MULTI_TENANT=TRUE" without anything 
failing.  But I guess "testColumnsForSelectQueryOnMultiTenantTable" in the 
other class already tests what we want.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] [phoenix] vincentpoon commented on a change in pull request #575: PHOENIX-5456 IndexScrutinyTool slow for indexes on multitenant tables…

Reply via email to