This is an automated email from the ASF dual-hosted git repository.
maytasm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 4e8570b Add integration tests for all InputFormat (#10088)
4e8570b is described below
commit 4e8570b71b6b8b6ab1299faed051d7287395013a
Author: Maytas Monsereenusorn <[email protected]>
AuthorDate: Wed Jul 8 12:50:29 2020 -0700
Add integration tests for all InputFormat (#10088)
* Add integration tests for Avro OCF InputFormat
* Add integration tests for Avro OCF InputFormat
* add tests
* fix bug
* fix bug
* fix failing tests
* add comments
* address comments
* address comments
* address comments
* fix test data
* reduce resource needed for IT
* remove bug fix
* fix checkstyle
* add bug fix
---
.travis.yml | 17 +++-
.../quantiles/DoublesSketchComplexMetricSerde.java | 17 ++--
.../DoublesSketchComplexMetricSerdeTest.java | 41 ++++++++-
integration-tests/README.md | 4 +-
integration-tests/script/copy_resources.sh | 6 +-
.../java/org/apache/druid/tests/TestNGGroup.java | 2 +
.../druid/tests/hadoop/ITHadoopIndexTest.java | 4 +-
.../tests/indexer/AbstractITBatchIndexTest.java | 3 +
.../AbstractLocalInputSourceParallelIndexTest.java | 91 +++++++++++++++++++
.../ITLocalInputSourceAllInputFormatTest.java | 99 +++++++++++++++++++++
.../batch_index/avro/wikipedia_index_data1.avro | Bin 0 -> 1066 bytes
.../batch_index/avro/wikipedia_index_data2.avro | Bin 0 -> 1055 bytes
.../batch_index/avro/wikipedia_index_data3.avro | Bin 0 -> 1151 bytes
.../data/batch_index/csv/wikipedia_index_data1.csv | 4 +
.../data/batch_index/csv/wikipedia_index_data2.csv | 4 +
.../data/batch_index/csv/wikipedia_index_data3.csv | 5 ++
.../{tsv => hadoop_tsv}/batch_hadoop.data | 0
.../data/batch_index/tsv/wikipedia_index_data1.tsv | 4 +
.../data/batch_index/tsv/wikipedia_index_data2.tsv | 4 +
.../data/batch_index/tsv/wikipedia_index_data3.tsv | 5 ++
.../resources/hadoop/batch_hadoop_indexer.json | 2 +-
.../wikipedia_local_input_source_index_task.json | 86 ++++++++++++++++++
22 files changed, 377 insertions(+), 21 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index 2900b49..af0192d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -313,6 +313,14 @@ jobs:
docker exec -it druid-$v sh -c 'dmesg | tail -3' ;
done
+ - &integration_input_format
+ name: "(Compile=openjdk8, Run=openjdk8) input format integration test"
+ jdk: openjdk8
+ services: *integration_test_services
+ env: TESTNG_GROUPS='-Dgroups=input-format' JVM_RUNTIME='-Djvm.runtime=8'
+ script: *run_integration_test
+ after_failure: *integration_test_diags
+
- &integration_perfect_rollup_parallel_batch_index
name: "(Compile=openjdk8, Run=openjdk8) perfect rollup parallel batch
index integration test"
jdk: openjdk8
@@ -389,7 +397,7 @@ jobs:
name: "(Compile=openjdk8, Run=openjdk8) other integration test"
jdk: openjdk8
services: *integration_test_services
- env:
TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-dee
[...]
+ env:
TESTNG_GROUPS='-DexcludedGroups=batch-index,input-format,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-g
[...]
script: *run_integration_test
after_failure: *integration_test_diags
# END - Integration tests for Compile with Java 8 and Run with Java 8
@@ -400,6 +408,11 @@ jobs:
jdk: openjdk8
env: TESTNG_GROUPS='-Dgroups=batch-index' JVM_RUNTIME='-Djvm.runtime=11'
+ - <<: *integration_input_format
+ name: "(Compile=openjdk8, Run=openjdk11) input format integration test"
+ jdk: openjdk8
+ env: TESTNG_GROUPS='-Dgroups=input-format' JVM_RUNTIME='-Djvm.runtime=11'
+
- <<: *integration_perfect_rollup_parallel_batch_index
name: "(Compile=openjdk8, Run=openjdk11) perfect rollup parallel batch
index integration test"
jdk: openjdk8
@@ -423,7 +436,7 @@ jobs:
- <<: *integration_tests
name: "(Compile=openjdk8, Run=openjdk11) other integration test"
jdk: openjdk8
- env:
TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-dee
[...]
+ env:
TESTNG_GROUPS='-DexcludedGroups=batch-index,input-format,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index,security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-g
[...]
# END - Integration tests for Compile with Java 8 and Run with Java 11
- name: "security vulnerabilities"
diff --git
a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java
b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java
index e2c922e..d97b5f8 100644
---
a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java
+++
b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerde.java
@@ -19,11 +19,11 @@
package org.apache.druid.query.aggregation.datasketches.quantiles;
+import com.google.common.primitives.Doubles;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.quantiles.DoublesSketch;
import org.apache.datasketches.quantiles.UpdateDoublesSketch;
import org.apache.druid.data.input.InputRow;
-import org.apache.druid.java.util.common.IAE;
import org.apache.druid.segment.GenericColumnSerializer;
import org.apache.druid.segment.column.ColumnBuilder;
import org.apache.druid.segment.data.GenericIndexed;
@@ -75,18 +75,13 @@ public class DoublesSketchComplexMetricSerde extends
ComplexMetricSerde
// Autodetection of the input format: empty string, number, or
base64 encoded sketch
// A serialized DoublesSketch, as currently implemented, always has
0 in the first 6 bits.
// This corresponds to "A" in base64, so it is not a digit
+ final Double doubleValue;
if (objectString.isEmpty()) {
return DoublesSketchOperations.EMPTY_SKETCH;
- } else if (Character.isDigit(objectString.charAt(0))) {
- try {
- double doubleValue = Double.parseDouble(objectString);
- UpdateDoublesSketch sketch =
DoublesSketch.builder().setK(MIN_K).build();
- sketch.update(doubleValue);
- return sketch;
- }
- catch (NumberFormatException e) {
- throw new IAE("Expected a string with a number, received value "
+ objectString);
- }
+ } else if ((doubleValue = Doubles.tryParse(objectString)) != null) {
+ UpdateDoublesSketch sketch =
DoublesSketch.builder().setK(MIN_K).build();
+ sketch.update(doubleValue);
+ return sketch;
}
} else if (object instanceof Number) { // this is for reindexing
UpdateDoublesSketch sketch =
DoublesSketch.builder().setK(MIN_K).build();
diff --git
a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java
b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java
index 5280fbd..e198c77 100644
---
a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java
+++
b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/DoublesSketchComplexMetricSerdeTest.java
@@ -42,7 +42,33 @@ public class DoublesSketchComplexMetricSerdeTest
}
@Test
- public void testExtractorOnNumber()
+ public void testExtractorOnPositiveNumber()
+ {
+ final DoublesSketchComplexMetricSerde serde = new
DoublesSketchComplexMetricSerde();
+ final ComplexMetricExtractor extractor = serde.getExtractor();
+ final DoublesSketch sketch = (DoublesSketch) extractor.extractValue(
+ new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo",
"777")),
+ "foo"
+ );
+ Assert.assertEquals(1, sketch.getRetainedItems());
+ Assert.assertEquals(777d, sketch.getMaxValue(), 0.01d);
+ }
+
+ @Test
+ public void testExtractorOnNegativeNumber()
+ {
+ final DoublesSketchComplexMetricSerde serde = new
DoublesSketchComplexMetricSerde();
+ final ComplexMetricExtractor extractor = serde.getExtractor();
+ final DoublesSketch sketch = (DoublesSketch) extractor.extractValue(
+ new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo",
"-133")),
+ "foo"
+ );
+ Assert.assertEquals(1, sketch.getRetainedItems());
+ Assert.assertEquals(-133d, sketch.getMaxValue(), 0.01d);
+ }
+
+ @Test
+ public void testExtractorOnDecimalNumber()
{
final DoublesSketchComplexMetricSerde serde = new
DoublesSketchComplexMetricSerde();
final ComplexMetricExtractor extractor = serde.getExtractor();
@@ -53,4 +79,17 @@ public class DoublesSketchComplexMetricSerdeTest
Assert.assertEquals(1, sketch.getRetainedItems());
Assert.assertEquals(3.1d, sketch.getMaxValue(), 0.01d);
}
+
+ @Test
+ public void testExtractorOnLeadingDecimalNumber()
+ {
+ final DoublesSketchComplexMetricSerde serde = new
DoublesSketchComplexMetricSerde();
+ final ComplexMetricExtractor extractor = serde.getExtractor();
+ final DoublesSketch sketch = (DoublesSketch) extractor.extractValue(
+ new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo",
".1")),
+ "foo"
+ );
+ Assert.assertEquals(1, sketch.getRetainedItems());
+ Assert.assertEquals(0.1d, sketch.getMaxValue(), 0.01d);
+ }
}
diff --git a/integration-tests/README.md b/integration-tests/README.md
index 43061f5..8157826 100644
--- a/integration-tests/README.md
+++ b/integration-tests/README.md
@@ -279,8 +279,8 @@ credentials/configs may need to be set in the same file as
your Druid's Hadoop c
If you are running ITHadoopIndexTest with your own Druid + Hadoop cluster,
please follow the below steps:
- Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and
wikipedia_index_data3.json
located in integration-tests/src/test/resources/data/batch_index/json to
your HDFS at /batch_index/json/
-- Copy batch_hadoop.data located in
integration-tests/src/test/resources/data/batch_index/tsv to your HDFS
- at /batch_index/tsv/
+- Copy batch_hadoop.data located in
integration-tests/src/test/resources/data/batch_index/hadoop_tsv to your HDFS
+ at /batch_index/hadoop_tsv/
If using the Docker-based Hadoop container, the steps above are automatically
done by the integration tests.
When running the Hadoop tests, you must set
`-Dextra.datasource.name.suffix=''`, due to
https://github.com/apache/druid/issues/9788.
diff --git a/integration-tests/script/copy_resources.sh
b/integration-tests/script/copy_resources.sh
index eb3a1b5..0324a69 100755
--- a/integration-tests/script/copy_resources.sh
+++ b/integration-tests/script/copy_resources.sh
@@ -49,11 +49,13 @@ mv $SHARED_DIR/docker/lib/druid-hdfs-storage-*
$SHARED_DIR/docker/extensions/dru
mkdir -p $SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
mv $SHARED_DIR/docker/lib/druid-kinesis-indexing-service-*
$SHARED_DIR/docker/extensions/druid-kinesis-indexing-service
# For druid-parquet-extensions
+# Using cp so that this extensions is included when running Druid without
loadList and as a option for the loadList
mkdir -p $SHARED_DIR/docker/extensions/druid-parquet-extensions
-mv $SHARED_DIR/docker/lib/druid-parquet-extensions-*
$SHARED_DIR/docker/extensions/druid-parquet-extensions
+cp $SHARED_DIR/docker/lib/druid-parquet-extensions-*
$SHARED_DIR/docker/extensions/druid-parquet-extensions
# For druid-orc-extensions
+# Using cp so that this extensions is included when running Druid without
loadList and as a option for the loadList
mkdir -p $SHARED_DIR/docker/extensions/druid-orc-extensions
-mv $SHARED_DIR/docker/lib/druid-orc-extensions-*
$SHARED_DIR/docker/extensions/druid-orc-extensions
+cp $SHARED_DIR/docker/lib/druid-orc-extensions-*
$SHARED_DIR/docker/extensions/druid-orc-extensions
# Pull Hadoop dependency if needed
if [ -n "$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" ] && [
"$DRUID_INTEGRATION_TEST_START_HADOOP_DOCKER" == true ]
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java
b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java
index bc8e613..b76b035 100644
--- a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java
@@ -27,6 +27,8 @@ public class TestNGGroup
{
public static final String BATCH_INDEX = "batch-index";
+ public static final String INPUT_FORMAT = "input-format";
+
public static final String KAFKA_INDEX = "kafka-index";
public static final String KAFKA_INDEX_SLOW = "kafka-index-slow";
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java
index 1d37e1d..23b8ac0 100644
---
a/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java
+++
b/integration-tests/src/test/java/org/apache/druid/tests/hadoop/ITHadoopIndexTest.java
@@ -42,8 +42,8 @@ import java.util.function.Function;
* 1) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and
wikipedia_index_data3.json
* located in integration-tests/src/test/resources/data/batch_index/json to
your HDFS at /batch_index/json/
* If using the Docker-based Hadoop container, this is automatically done
by the integration tests.
- * 2) Copy batch_hadoop.data located in
integration-tests/src/test/resources/data/batch_index/tsv to your HDFS
- * at /batch_index/tsv/
+ * 2) Copy batch_hadoop.data located in
integration-tests/src/test/resources/data/batch_index/hadoop_tsv to your HDFS
+ * at /batch_index/hadoop_tsv/
* If using the Docker-based Hadoop container, this is automatically done
by the integration tests.
* 2) Provide -Doverride.config.path=<PATH_TO_FILE> with HDFS configs set. See
* integration-tests/docker/environment-configs/override-examples/hdfs for
env vars to provide.
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java
index 5c64dcd..993ee6b 100644
---
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java
+++
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java
@@ -50,6 +50,9 @@ public abstract class AbstractITBatchIndexTest extends
AbstractIndexerTest
{
public enum InputFormatDetails
{
+ AVRO("avro_ocf", ".avro", "/avro"),
+ CSV("csv", ".csv", "/csv"),
+ TSV("tsv", ".tsv", "/tsv"),
ORC("orc", ".orc", "/orc"),
JSON("json", ".json", "/json"),
PARQUET("parquet", ".parquet", "/parquet");
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractLocalInputSourceParallelIndexTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractLocalInputSourceParallelIndexTest.java
new file mode 100644
index 0000000..a00bb3d
--- /dev/null
+++
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractLocalInputSourceParallelIndexTest.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.tests.indexer;
+
+import com.google.common.collect.ImmutableMap;
+import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
+import org.apache.druid.java.util.common.StringUtils;
+
+import javax.annotation.Nonnull;
+import java.io.Closeable;
+import java.util.Map;
+import java.util.UUID;
+import java.util.function.Function;
+
+public abstract class AbstractLocalInputSourceParallelIndexTest extends
AbstractITBatchIndexTest
+{
+ private static final String INDEX_TASK =
"/indexer/wikipedia_local_input_source_index_task.json";
+ private static final String INDEX_QUERIES_RESOURCE =
"/indexer/wikipedia_index_queries.json";
+
+ public void doIndexTest(InputFormatDetails inputFormatDetails) throws
Exception
+ {
+ doIndexTest(inputFormatDetails, ImmutableMap.of());
+ }
+
+ public void doIndexTest(InputFormatDetails inputFormatDetails, @Nonnull
Map<String, Object> extraInputFormatMap) throws Exception
+ {
+ final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
+ Map inputFormatMap = new ImmutableMap.Builder<String,
Object>().putAll(extraInputFormatMap)
+ .put("type",
inputFormatDetails.getInputFormatType())
+ .build();
+ try (
+ final Closeable ignored1 = unloader(indexDatasource +
config.getExtraDatasourceNameSuffix());
+ ) {
+ final Function<String, String> sqlInputSourcePropsTransform = spec -> {
+ try {
+ spec = StringUtils.replace(
+ spec,
+ "%%PARTITIONS_SPEC%%",
+ jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null,
null))
+ );
+ spec = StringUtils.replace(
+ spec,
+ "%%INPUT_SOURCE_FILTER%%",
+ "*" + inputFormatDetails.getFileExtension()
+ );
+ spec = StringUtils.replace(
+ spec,
+ "%%INPUT_SOURCE_BASE_DIR%%",
+ "/resources/data/batch_index" +
inputFormatDetails.getFolderSuffix()
+ );
+ spec = StringUtils.replace(
+ spec,
+ "%%INPUT_FORMAT%%",
+ jsonMapper.writeValueAsString(inputFormatMap)
+ );
+ return spec;
+ }
+ catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ };
+
+ doIndexTest(
+ indexDatasource,
+ INDEX_TASK,
+ sqlInputSourcePropsTransform,
+ INDEX_QUERIES_RESOURCE,
+ false,
+ true,
+ true
+ );
+ }
+ }
+}
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java
new file mode 100644
index 0000000..a0c1014
--- /dev/null
+++
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.tests.indexer;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.apache.druid.testing.guice.DruidTestModuleFactory;
+import org.apache.druid.tests.TestNGGroup;
+import org.testng.annotations.Guice;
+import org.testng.annotations.Test;
+
+import java.util.List;
+import java.util.Map;
+
+@Test(groups = TestNGGroup.INPUT_FORMAT)
+@Guice(moduleFactory = DruidTestModuleFactory.class)
+public class ITLocalInputSourceAllInputFormatTest extends
AbstractLocalInputSourceParallelIndexTest
+{
+ @Test
+ public void testAvroInputFormatIndexDataIngestionSpecWithSchema() throws
Exception
+ {
+ List fieldList = ImmutableList.of(
+ ImmutableMap.of("name", "timestamp", "type", "string"),
+ ImmutableMap.of("name", "page", "type", "string"),
+ ImmutableMap.of("name", "language", "type", "string"),
+ ImmutableMap.of("name", "user", "type", "string"),
+ ImmutableMap.of("name", "unpatrolled", "type", "string"),
+ ImmutableMap.of("name", "newPage", "type", "string"),
+ ImmutableMap.of("name", "robot", "type", "string"),
+ ImmutableMap.of("name", "anonymous", "type", "string"),
+ ImmutableMap.of("name", "namespace", "type", "string"),
+ ImmutableMap.of("name", "continent", "type", "string"),
+ ImmutableMap.of("name", "country", "type", "string"),
+ ImmutableMap.of("name", "region", "type", "string"),
+ ImmutableMap.of("name", "city", "type", "string"),
+ ImmutableMap.of("name", "added", "type", "int"),
+ ImmutableMap.of("name", "deleted", "type", "int"),
+ ImmutableMap.of("name", "delta", "type", "int")
+ );
+ Map schema = ImmutableMap.of("namespace", "org.apache.druid.data.input",
+ "type", "record",
+ "name", "wikipedia",
+ "fields", fieldList);
+ doIndexTest(InputFormatDetails.AVRO, ImmutableMap.of("schema", schema));
+ }
+
+ @Test
+ public void testAvroInputFormatIndexDataIngestionSpecWithoutSchema() throws
Exception
+ {
+ doIndexTest(InputFormatDetails.AVRO);
+ }
+
+ @Test
+ public void testJsonInputFormatIndexDataIngestionSpecWithSchema() throws
Exception
+ {
+ doIndexTest(InputFormatDetails.JSON);
+ }
+
+ @Test
+ public void testTsvInputFormatIndexDataIngestionSpecWithSchema() throws
Exception
+ {
+ doIndexTest(InputFormatDetails.TSV,
ImmutableMap.of("findColumnsFromHeader", true));
+ }
+
+ @Test
+ public void testParquetInputFormatIndexDataIngestionSpecWithSchema() throws
Exception
+ {
+ doIndexTest(InputFormatDetails.PARQUET);
+ }
+
+ @Test
+ public void testOrcInputFormatIndexDataIngestionSpecWithSchema() throws
Exception
+ {
+ doIndexTest(InputFormatDetails.ORC);
+ }
+
+ @Test
+ public void testCsvInputFormatIndexDataIngestionSpecWithSchema() throws
Exception
+ {
+ doIndexTest(InputFormatDetails.CSV,
ImmutableMap.of("findColumnsFromHeader", true));
+ }
+}
diff --git
a/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data1.avro
b/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data1.avro
new file mode 100644
index 0000000..5ed0d60
Binary files /dev/null and
b/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data1.avro
differ
diff --git
a/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data2.avro
b/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data2.avro
new file mode 100644
index 0000000..aa25803
Binary files /dev/null and
b/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data2.avro
differ
diff --git
a/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data3.avro
b/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data3.avro
new file mode 100644
index 0000000..2e0f9f2
Binary files /dev/null and
b/integration-tests/src/test/resources/data/batch_index/avro/wikipedia_index_data3.avro
differ
diff --git
a/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data1.csv
b/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data1.csv
new file mode 100644
index 0000000..c2e2b07
--- /dev/null
+++
b/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data1.csv
@@ -0,0 +1,4 @@
+timestamp,page,language,user,unpatrolled,newPage,robot,anonymous,namespace,continent,country,region,city,added,deleted,delta
+2013-08-31T01:02:33Z,Gypsy
Danger,en,nuclear,TRUE,TRUE,FALSE,FALSE,article,North America,United States,Bay
Area,San Francisco,57,200,-143
+2013-08-31T03:32:45Z,Striker
Eureka,en,speed,FALSE,TRUE,TRUE,FALSE,wikipedia,Australia,Australia,Cantebury,Syndey,459,129,330
+2013-08-31T07:11:21Z,Cherno
Alpha,ru,masterYi,FALSE,TRUE,TRUE,FALSE,article,Asia,Russia,Oblast,Moscow,123,12,111
diff --git
a/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data2.csv
b/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data2.csv
new file mode 100644
index 0000000..cbed281
--- /dev/null
+++
b/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data2.csv
@@ -0,0 +1,4 @@
+timestamp,page,language,user,unpatrolled,newPage,robot,anonymous,namespace,continent,country,region,city,added,deleted,delta
+2013-08-31T11:58:39Z,Crimson
Typhoon,zh,triplets,TRUE,FALSE,TRUE,FALSE,wikipedia,Asia,China,Shanxi,Taiyuan,905,5,900
+2013-08-31T12:41:27Z,Coyote
Tango,ja,stringer,TRUE,FALSE,TRUE,FALSE,wikipedia,Asia,Japan,Kanto,Tokyo,1,10,-9
+2013-09-01T01:02:33Z,Gypsy
Danger,en,nuclear,TRUE,TRUE,FALSE,FALSE,article,North America,United States,Bay
Area,San Francisco,57,200,-143
diff --git
a/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data3.csv
b/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data3.csv
new file mode 100644
index 0000000..51d6d21
--- /dev/null
+++
b/integration-tests/src/test/resources/data/batch_index/csv/wikipedia_index_data3.csv
@@ -0,0 +1,5 @@
+timestamp,page,language,user,unpatrolled,newPage,robot,anonymous,namespace,continent,country,region,city,added,deleted,delta
+2013-09-01T03:32:45Z,Striker
Eureka,en,speed,FALSE,TRUE,TRUE,FALSE,wikipedia,Australia,Australia,Cantebury,Syndey,459,129,330
+2013-09-01T07:11:21Z,Cherno
Alpha,ru,masterYi,FALSE,TRUE,TRUE,FALSE,article,Asia,Russia,Oblast,Moscow,123,12,111
+2013-09-01T11:58:39Z,Crimson
Typhoon,zh,triplets,TRUE,FALSE,TRUE,FALSE,wikipedia,Asia,China,Shanxi,Taiyuan,905,5,900
+2013-09-01T12:41:27Z,Coyote
Tango,ja,stringer,TRUE,FALSE,TRUE,FALSE,wikipedia,Asia,Japan,Kanto,Tokyo,1,10,-9
diff --git
a/integration-tests/src/test/resources/data/batch_index/tsv/batch_hadoop.data
b/integration-tests/src/test/resources/data/batch_index/hadoop_tsv/batch_hadoop.data
similarity index 100%
rename from
integration-tests/src/test/resources/data/batch_index/tsv/batch_hadoop.data
rename to
integration-tests/src/test/resources/data/batch_index/hadoop_tsv/batch_hadoop.data
diff --git
a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv
b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv
new file mode 100644
index 0000000..b13d9ad
--- /dev/null
+++
b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data1.tsv
@@ -0,0 +1,4 @@
+timestamp page language user unpatrolled newPage robot
anonymous namespace continent country region city added
deleted delta
+2013-08-31T01:02:33Z Gypsy Danger en nuclear true true false
false article North America United States Bay Area San Francisco
57 200 -143
+2013-08-31T03:32:45Z Striker Eureka en speed false true true
false wikipedia Australia Australia Cantebury Syndey
459 129 330
+2013-08-31T07:11:21Z Cherno Alpha ru masterYi false true
true false article Asia Russia Oblast Moscow 123 12 111
diff --git
a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv
b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv
new file mode 100644
index 0000000..e271291
--- /dev/null
+++
b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data2.tsv
@@ -0,0 +1,4 @@
+timestamp page language user unpatrolled newPage robot
anonymous namespace continent country region city added
deleted delta
+2013-08-31T11:58:39Z Crimson Typhoon zh triplets true false
true false wikipedia Asia China Shanxi Taiyuan 905 5
900
+2013-08-31T12:41:27Z Coyote Tango ja stringer true false
true false wikipedia Asia Japan Kanto Tokyo 1 10
-9
+2013-09-01T01:02:33Z Gypsy Danger en nuclear true true false
false article North America United States Bay Area San Francisco
57 200 -143
\ No newline at end of file
diff --git
a/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv
b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv
new file mode 100644
index 0000000..b91eae4
--- /dev/null
+++
b/integration-tests/src/test/resources/data/batch_index/tsv/wikipedia_index_data3.tsv
@@ -0,0 +1,5 @@
+timestamp page language user unpatrolled newPage robot
anonymous namespace continent country region city added
deleted delta
+2013-09-01T03:32:45Z Striker Eureka en speed false true true
false wikipedia Australia Australia Cantebury Syndey
459 129 330
+2013-09-01T07:11:21Z Cherno Alpha ru masterYi false true
true false article Asia Russia Oblast Moscow 123 12 111
+2013-09-01T11:58:39Z Crimson Typhoon zh triplets true false
true false wikipedia Asia China Shanxi Taiyuan 905 5
900
+2013-09-01T12:41:27Z Coyote Tango ja stringer true false
true false wikipedia Asia Japan Kanto Tokyo 1 10
-9
\ No newline at end of file
diff --git
a/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json
b/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json
index 465a223..a6710db 100644
--- a/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json
+++ b/integration-tests/src/test/resources/hadoop/batch_hadoop_indexer.json
@@ -53,7 +53,7 @@
"type": "hadoop",
"inputSpec": {
"type": "static",
- "paths": "/batch_index/tsv/batch_hadoop.data"
+ "paths": "/batch_index/hadoop_tsv/batch_hadoop.data"
}
},
"tuningConfig": {
diff --git
a/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json
b/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json
new file mode 100644
index 0000000..67650f8
--- /dev/null
+++
b/integration-tests/src/test/resources/indexer/wikipedia_local_input_source_index_task.json
@@ -0,0 +1,86 @@
+{
+ "type": "index_parallel",
+ "spec": {
+ "dataSchema": {
+ "dataSource": "%%DATASOURCE%%",
+ "timestampSpec": {
+ "column": "timestamp"
+ },
+ "dimensionsSpec": {
+ "dimensions": [
+ "page",
+ {"type": "string", "name": "language", "createBitmapIndex": false},
+ "user",
+ "unpatrolled",
+ "newPage",
+ "robot",
+ "anonymous",
+ "namespace",
+ "continent",
+ "country",
+ "region",
+ "city"
+ ]
+ },
+ "metricsSpec": [
+ {
+ "type": "count",
+ "name": "count"
+ },
+ {
+ "type": "doubleSum",
+ "name": "added",
+ "fieldName": "added"
+ },
+ {
+ "type": "doubleSum",
+ "name": "deleted",
+ "fieldName": "deleted"
+ },
+ {
+ "type": "doubleSum",
+ "name": "delta",
+ "fieldName": "delta"
+ },
+ {
+ "name": "thetaSketch",
+ "type": "thetaSketch",
+ "fieldName": "user"
+ },
+ {
+ "name": "quantilesDoublesSketch",
+ "type": "quantilesDoublesSketch",
+ "fieldName": "delta"
+ },
+ {
+ "name": "HLLSketchBuild",
+ "type": "HLLSketchBuild",
+ "fieldName": "user"
+ }
+ ],
+ "granularitySpec": {
+ "segmentGranularity": "DAY",
+ "queryGranularity": "second",
+ "intervals" : [ "2013-08-31/2013-09-02" ]
+ }
+ },
+ "ioConfig": {
+ "type": "index_parallel",
+ "inputSource": {
+ "type": "local",
+ "filter" : "%%INPUT_SOURCE_FILTER%%",
+ "baseDir": "%%INPUT_SOURCE_BASE_DIR%%"
+ },
+ "inputFormat": %%INPUT_FORMAT%%
+ },
+ "tuningConfig": {
+ "type": "index_parallel",
+ "maxNumConcurrentSubTasks": 10,
+ "splitHintSpec": {
+ "type": "maxSize",
+ "maxSplitSize": 1
+ },
+ "partitionsSpec": %%PARTITIONS_SPEC%%
+ }
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]