[GitHub] [druid] maytasm commented on a change in pull request #9714: More Hadoop integration tests

GitBox Thu, 16 Apr 2020 17:59:35 -0700

maytasm commented on a change in pull request #9714: More Hadoop integration 
tests
URL: https://github.com/apache/druid/pull/9714#discussion_r409931970


 ##########
 File path: 
integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java
 ##########
 @@ -19,87 +19,147 @@
 
 package org.apache.druid.tests.hadoop;
 
-import com.google.inject.Inject;
+import com.google.common.collect.ImmutableList;
+import org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec;
+import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
+import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.common.logger.Logger;
-import org.apache.druid.testing.IntegrationTestingConfig;
 import org.apache.druid.testing.guice.DruidTestModuleFactory;
-import org.apache.druid.testing.utils.ITRetryUtil;
 import org.apache.druid.tests.TestNGGroup;
-import org.apache.druid.tests.indexer.AbstractIndexerTest;
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
+import org.apache.druid.tests.indexer.AbstractITBatchIndexTest;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Guice;
 import org.testng.annotations.Test;
 
+import java.io.Closeable;
+import java.util.UUID;
+import java.util.function.Function;
+
+/**
+ * IMPORTANT:
+ * To run this test, you must:
+ * 1) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and 
wikipedia_index_data3.json
+ *    located in integration-tests/src/test/resources/data/batch_index/json to 
your HDFS at the location set in step 1.
+ *    If using the Docker-based Hadoop container, this is automatically done 
by the integration tests.
+ * 2) Provide -Doverride.config.path=<PATH_TO_FILE> with HDFS configs set. See
+ *    integration-tests/docker/environment-configs/override-examples/hdfs for 
env vars to provide.
+ * 3) Run the test with -Dstart.hadoop.docker=true 
-Dextra.datasource.name.suffix='' in the mvn command
+ */
 @Test(groups = TestNGGroup.HADOOP_INDEX)
 @Guice(moduleFactory = DruidTestModuleFactory.class)
-public class ITHadoopIndexTest extends AbstractIndexerTest
+public class ITHadoopIndexTest extends AbstractITBatchIndexTest
 {
   private static final Logger LOG = new Logger(ITHadoopIndexTest.class);
+
   private static final String BATCH_TASK = "/hadoop/batch_hadoop_indexer.json";
   private static final String BATCH_QUERIES_RESOURCE = 
"/hadoop/batch_hadoop_queries.json";
   private static final String BATCH_DATASOURCE = "batchHadoop";
-  private boolean dataLoaded = false;
 
-  @Inject
-  private IntegrationTestingConfig config;
+  private static final String INDEX_TASK = 
"/hadoop/wikipedia_hadoop_index_task.json";
+  private static final String INDEX_QUERIES_RESOURCE = 
"/indexer/wikipedia_index_queries.json";
+  private static final String INDEX_DATASOURCE = "wikipedia_hadoop_index_test";
 
-  @BeforeClass
-  public void beforeClass()
-  {
-    loadData(config.getProperty("hadoopTestDir") + "/batchHadoop1");
-    dataLoaded = true;
-  }
+  private static final String REINDEX_TASK = 
"/hadoop/wikipedia_hadoop_reindex_task.json";
+  private static final String REINDEX_QUERIES_RESOURCE = 
"/indexer/wikipedia_reindex_queries.json";
+  private static final String REINDEX_DATASOURCE = 
"wikipedia_hadoop_reindex_test";
 
-  @Test
-  public void testHadoopIndex() throws Exception
+  @DataProvider
+  public static Object[][] resources()
   {
-    queryHelper.testQueriesFromFile(BATCH_QUERIES_RESOURCE, 2);
+    return new Object[][]{
+        {new HashedPartitionsSpec(3, null, null)},
+        {new HashedPartitionsSpec(null, 3, ImmutableList.of("page"))},
+        {new HashedPartitionsSpec(null, 3, ImmutableList.of("page", "user"))},
+        {new SingleDimensionPartitionsSpec(1000, null, null, false)},
+        {new SingleDimensionPartitionsSpec(1000, null, "page", false)},
+        {new SingleDimensionPartitionsSpec(1000, null, null, true)},
+
+        //{new HashedPartitionsSpec(null, 3, null)} // this results in a bug 
where the segments have 0 rows
+    };
   }
 
-  private void loadData(String hadoopDir)
+  @Test
+  public void testLegacyITHadoopIndexTest() throws Exception
   {
-    String indexerSpec;
+    try (
+        final Closeable ignored0 = unloader(BATCH_DATASOURCE + 
config.getExtraDatasourceNameSuffix());
+    ) {
+      final Function<String, String> specPathsTransform = spec -> {
+        try {
+          String path = "/batch_index/tsv";
 
 Review comment:
   Maybe also just get rid of the hadoopTestDir in the DockerConfigProvider. I 
think it's no longer needed. If using hadoop container then everything is 
automatically setup. If running your own hadoop then they should copy to 
/batch_index/tsv since the path is hardcoded in the specPathsTransform anyway

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [druid] maytasm commented on a change in pull request #9714: More Hadoop integration tests

Reply via email to