[GitHub] [druid] jon-wei commented on a change in pull request #9714: More Hadoop integration tests

GitBox Mon, 27 Apr 2020 21:29:54 -0700


jon-wei commented on a change in pull request #9714:
URL: https://github.com/apache/druid/pull/9714#discussion_r416317706




##########
File path: 
integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java
##########
@@ -19,87 +19,147 @@
 
 package org.apache.druid.tests.hadoop;
 
-import com.google.inject.Inject;
+import com.google.common.collect.ImmutableList;
+import org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec;
+import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
+import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.common.logger.Logger;
-import org.apache.druid.testing.IntegrationTestingConfig;
 import org.apache.druid.testing.guice.DruidTestModuleFactory;
-import org.apache.druid.testing.utils.ITRetryUtil;
 import org.apache.druid.tests.TestNGGroup;
-import org.apache.druid.tests.indexer.AbstractIndexerTest;
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
+import org.apache.druid.tests.indexer.AbstractITBatchIndexTest;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Guice;
 import org.testng.annotations.Test;
 
+import java.io.Closeable;
+import java.util.UUID;
+import java.util.function.Function;
+
+/**
+ * IMPORTANT:
+ * To run this test, you must:
+ * 1) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and 
wikipedia_index_data3.json
+ *    located in integration-tests/src/test/resources/data/batch_index/json to 
your HDFS at the location set in step 1.
+ *    If using the Docker-based Hadoop container, this is automatically done 
by the integration tests.
+ * 2) Provide -Doverride.config.path=<PATH_TO_FILE> with HDFS configs set. See
+ *    integration-tests/docker/environment-configs/override-examples/hdfs for 
env vars to provide.
+ * 3) Run the test with -Dstart.hadoop.docker=true 
-Dextra.datasource.name.suffix='' in the mvn command
+ */
 @Test(groups = TestNGGroup.HADOOP_INDEX)
 @Guice(moduleFactory = DruidTestModuleFactory.class)
-public class ITHadoopIndexTest extends AbstractIndexerTest
+public class ITHadoopIndexTest extends AbstractITBatchIndexTest
 {
   private static final Logger LOG = new Logger(ITHadoopIndexTest.class);
+
   private static final String BATCH_TASK = "/hadoop/batch_hadoop_indexer.json";
   private static final String BATCH_QUERIES_RESOURCE = 
"/hadoop/batch_hadoop_queries.json";
   private static final String BATCH_DATASOURCE = "batchHadoop";
-  private boolean dataLoaded = false;
 
-  @Inject
-  private IntegrationTestingConfig config;
+  private static final String INDEX_TASK = 
"/hadoop/wikipedia_hadoop_index_task.json";
+  private static final String INDEX_QUERIES_RESOURCE = 
"/indexer/wikipedia_index_queries.json";
+  private static final String INDEX_DATASOURCE = "wikipedia_hadoop_index_test";
 
-  @BeforeClass
-  public void beforeClass()
-  {
-    loadData(config.getProperty("hadoopTestDir") + "/batchHadoop1");
-    dataLoaded = true;
-  }
+  private static final String REINDEX_TASK = 
"/hadoop/wikipedia_hadoop_reindex_task.json";
+  private static final String REINDEX_QUERIES_RESOURCE = 
"/indexer/wikipedia_reindex_queries.json";
+  private static final String REINDEX_DATASOURCE = 
"wikipedia_hadoop_reindex_test";
 
-  @Test
-  public void testHadoopIndex() throws Exception
+  @DataProvider
+  public static Object[][] resources()
   {
-    queryHelper.testQueriesFromFile(BATCH_QUERIES_RESOURCE, 2);
+    return new Object[][]{
+        {new HashedPartitionsSpec(3, null, null)},
+        {new HashedPartitionsSpec(null, 3, ImmutableList.of("page"))},
+        {new HashedPartitionsSpec(null, 3, ImmutableList.of("page", "user"))},
+        {new SingleDimensionPartitionsSpec(1000, null, null, false)},
+        {new SingleDimensionPartitionsSpec(1000, null, "page", false)},
+        {new SingleDimensionPartitionsSpec(1000, null, null, true)},
+
+        //{new HashedPartitionsSpec(null, 3, null)} // this results in a bug 
where the segments have 0 rows
+    };
   }
 
-  private void loadData(String hadoopDir)
+  @Test
+  public void testLegacyITHadoopIndexTest() throws Exception
   {
-    String indexerSpec;
+    try (
+        final Closeable ignored0 = unloader(BATCH_DATASOURCE + 
config.getExtraDatasourceNameSuffix());
+    ) {
+      final Function<String, String> specPathsTransform = spec -> {
+        try {
+          String path = "/batch_index/tsv";

Review comment:
       For the first comment, I had already moved the batch_hadoop.data file to 
integration-tests/src/test/resources/data/batch_index/tsv as you suggest so 
we're good there.
   
   I got rid of `hadoopTestDir` and updated the README.md file with new 
instructions




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [druid] jon-wei commented on a change in pull request #9714: More Hadoop integration tests

Reply via email to