This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 18f3d0a8a5 Add auto generated query tests for tpch (#11511)
18f3d0a8a5 is described below
commit 18f3d0a8a5e1faa6ddcc74562b9e3c464945ce54
Author: Saurabh Dubey <[email protected]>
AuthorDate: Wed Sep 6 09:35:46 2023 +0530
Add auto generated query tests for tpch (#11511)
Co-authored-by: Saurabh Dubey <[email protected]>
---
.../pinot/integration/tests/tpch/Constants.java | 21 +-
.../tpch/TPCHGeneratedQueryIntegrationTest.java | 218 ++++++++
.../tests/tpch/TblToAvroMultiValue.java | 220 ++++++++
.../integration/tests/tpch/generator/Column.java | 97 ++++
.../{Constants.java => generator/ColumnType.java} | 35 +-
.../PinotQueryBasedColumnDataProvider.java | 76 +++
.../tests/tpch/generator/QuerySkeleton.java | 94 ++++
.../RelatedTable.java} | 35 +-
.../SampleColumnDataProvider.java} | 29 +-
.../tests/tpch/generator/TPCHQueryGeneratorV2.java | 553 +++++++++++++++++++++
.../tpch/{Constants.java => generator/Table.java} | 41 +-
.../segment/index/readers/DocIdDictionary.java | 5 +
12 files changed, 1341 insertions(+), 83 deletions(-)
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
index 7fbae29c08..b09a959946 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
@@ -29,16 +29,29 @@ public final class Constants {
"customer", "lineitem", "nation", "orders", "part", "partsupp",
"region", "supplier"
};
static final String AVRO_FILE_SUFFIX = ".avro";
- private static final String TPCH_TABLE_RESOURCE_FOLDER_PREFIX =
"examples/batch/tpch/";
+ private static final String TPCH_TABLE_RESOURCE_FOLDER_PREFIX =
"examples/batch/tpch";
private Constants() {
}
static String getTableResourceFolder(String tableName) {
- return TPCH_TABLE_RESOURCE_FOLDER_PREFIX + tableName;
+ return getTableResourceFolder(tableName, false);
+ }
+
+ static String getTableResourceFolder(String tableName, boolean
useMultiValue) {
+ String path = TPCH_TABLE_RESOURCE_FOLDER_PREFIX;
+ if (useMultiValue) {
+ path += "MultiValue";
+ }
+
+ return path + File.separator + tableName;
}
static String getTableAvroFilePath(String tableName) {
- return getTableResourceFolder(tableName) + File.separator
- + "rawdata" + File.separator + tableName + AVRO_FILE_SUFFIX;
+ return getTableAvroFilePath(tableName, false);
+ }
+
+ static String getTableAvroFilePath(String tableName, boolean multiValue) {
+ String path = getTableResourceFolder(tableName, multiValue);
+ return path + File.separator + "rawdata" + File.separator + tableName +
AVRO_FILE_SUFFIX;
}
}
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/TPCHGeneratedQueryIntegrationTest.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/TPCHGeneratedQueryIntegrationTest.java
new file mode 100644
index 0000000000..7303ef9d45
--- /dev/null
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/TPCHGeneratedQueryIntegrationTest.java
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests.tpch;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.sql.ResultSet;
+import java.sql.SQLTimeoutException;
+import java.sql.Statement;
+import java.util.Collections;
+import java.util.Objects;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.client.Connection;
+import org.apache.pinot.client.ResultSetGroup;
+import org.apache.pinot.integration.tests.BaseClusterIntegrationTest;
+import org.apache.pinot.integration.tests.ClusterIntegrationTestUtils;
+import
org.apache.pinot.integration.tests.tpch.generator.PinotQueryBasedColumnDataProvider;
+import
org.apache.pinot.integration.tests.tpch.generator.SampleColumnDataProvider;
+import org.apache.pinot.integration.tests.tpch.generator.TPCHQueryGeneratorV2;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.tools.utils.JarUtils;
+import org.apache.pinot.util.TestUtils;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+
+/**
+ * Integration test that tests Pinot using TPCH data.
+ * Data is loaded into Pinot and H2 from /resources/examples/batch/tpch. The
dataset size is very small, please follow
+ * REAME.md to generate a larger dataset for better testing.
+ * Queries are executed against Pinot and H2, and the results are compared.
+ */
+public class TPCHGeneratedQueryIntegrationTest extends
BaseClusterIntegrationTest {
+ private static final int NUM_TPCH_QUERIES = 1000;
+ private static TPCHQueryGeneratorV2 _tpchQueryGeneratorV2;
+ private static final Boolean USE_MULTI_VALUE = false;
+
+ @BeforeClass
+ public void setUp()
+ throws Exception {
+ TestUtils.ensureDirectoriesExistAndEmpty(_tempDir, _segmentDir, _tarDir);
+
+ // Start the Pinot cluster
+ startZk();
+ startController();
+ startBroker();
+ startServer();
+
+ setUpH2Connection();
+ for (String tableName : Constants.TPCH_TABLE_NAMES) {
+ File tableSegmentDir = new File(_segmentDir, tableName);
+ File tarDir = new File(_tarDir, tableName);
+ String tableResourceFolder = Constants.getTableResourceFolder(tableName,
USE_MULTI_VALUE);
+ URL resourceUrl =
getClass().getClassLoader().getResource(tableResourceFolder);
+ Assert.assertNotNull(resourceUrl, "Unable to find resource from: " +
tableResourceFolder);
+ File resourceFile;
+ if ("jar".equals(resourceUrl.getProtocol())) {
+ String[] splits = resourceUrl.getFile().split("!");
+ File tempUnpackDir = new File(_tempDir.getAbsolutePath() +
File.separator + splits[1]);
+ TestUtils.ensureDirectoriesExistAndEmpty(tempUnpackDir);
+ JarUtils.copyResourcesToDirectory(splits[0], splits[1].substring(1),
tempUnpackDir.getAbsolutePath());
+ resourceFile = tempUnpackDir;
+ } else {
+ resourceFile = new File(resourceUrl.getFile());
+ }
+ File dataFile =
+ new File(Objects.requireNonNull(
+
getClass().getClassLoader().getResource(Constants.getTableAvroFilePath(tableName,
USE_MULTI_VALUE)))
+ .getFile());
+ Assert.assertTrue(dataFile.exists(), "Unable to load resource file from
URL: " + dataFile);
+ File schemaFile = new File(resourceFile.getPath(), tableName +
"_schema.json");
+ File tableFile = new File(resourceFile.getPath(), tableName +
"_offline_table_config.json");
+ // Pinot
+ TestUtils.ensureDirectoriesExistAndEmpty(tableSegmentDir, tarDir);
+ Schema schema = createSchema(schemaFile);
+ addSchema(schema);
+ TableConfig tableConfig = createTableConfig(tableFile);
+ addTableConfig(tableConfig);
+
ClusterIntegrationTestUtils.buildSegmentsFromAvro(Collections.singletonList(dataFile),
tableConfig, schema, 0,
+ tableSegmentDir, tarDir);
+ uploadSegments(tableName, tarDir);
+ // H2
+
ClusterIntegrationTestUtils.setUpH2TableWithAvro(Collections.singletonList(dataFile),
tableName, _h2Connection);
+ }
+
+ SampleColumnDataProvider sampleColumnDataProvider =
+ new PinotQueryBasedColumnDataProvider(new
PinotQueryBasedColumnDataProvider.PinotConnectionProvider() {
+ @Override
+ public Connection getConnection() {
+ return getPinotConnection();
+ }
+ });
+ _tpchQueryGeneratorV2 = new TPCHQueryGeneratorV2(sampleColumnDataProvider);
+ _tpchQueryGeneratorV2.init();
+ }
+
+ @Test(dataProvider = "QueryDataProvider", enabled = false)
+ public void testTPCHQueries(String[] pinotAndH2Queries)
+ throws Exception {
+ testQueriesSucceed(pinotAndH2Queries[1], pinotAndH2Queries[0]);
+ }
+
+ protected long testQueriesSucceed(String pinotQuery, String h2Query)
+ throws Exception {
+ ResultSetGroup pinotResultSetGroup =
getPinotConnection().execute(pinotQuery);
+ org.apache.pinot.client.ResultSet resultTableResultSet =
pinotResultSetGroup.getResultSet(0);
+ if (CollectionUtils.isNotEmpty(pinotResultSetGroup.getExceptions())) {
+ Assert.fail(pinotResultSetGroup.getExceptions().get(0).toString());
+ }
+
+ int numRows = resultTableResultSet.getRowCount();
+ int numColumns = resultTableResultSet.getColumnCount();
+
+ // h2 response
+ Assert.assertNotNull(_h2Connection);
+ Statement h2statement =
_h2Connection.createStatement(ResultSet.TYPE_FORWARD_ONLY,
ResultSet.CONCUR_READ_ONLY);
+ h2statement.setQueryTimeout(10);
+
+ try {
+ h2statement.execute(h2Query);
+ } catch (SQLTimeoutException e) {
+ Assert.fail("H2 query timed out!");
+ }
+ ResultSet h2ResultSet = h2statement.getResultSet();
+
+ // compare results.
+ Assert.assertEquals(numColumns,
h2ResultSet.getMetaData().getColumnCount());
+ if (h2ResultSet.first()) {
+ for (int i = 0; i < numRows; i++) {
+ for (int c = 0; c < numColumns; c++) {
+ String h2Value = h2ResultSet.getString(c + 1);
+ String pinotValue = resultTableResultSet.getString(i, c);
+ boolean error = ClusterIntegrationTestUtils.fuzzyCompare(h2Value,
pinotValue, pinotValue);
+ if (error) {
+ Assert.fail(
+ String.format("Value: %d does not match at (%d, %d), expected
h2 value: %s actual Pinot value: %s", c,
+ i, c, h2Value, pinotValue));
+ }
+ }
+ if (!h2ResultSet.next() && i != numRows - 1) {
+ Assert.fail(String.format("H2 result set is smaller than Pinot
result set after: %d rows", i));
+ }
+ }
+ }
+ Assert.assertFalse(h2ResultSet.next(),
+ String.format("Pinot result set is smaller than H2 result set after:
%d rows!", numRows));
+
+ return numRows;
+ }
+
+ @Override
+ protected long getCurrentCountStarResult() {
+ return getPinotConnection().execute("SELECT COUNT(*) FROM
orders").getResultSet(0).getLong(0);
+ }
+
+ @Override
+ protected long getCountStarResult() {
+ return 9999L;
+ }
+
+ @Override
+ protected boolean useMultiStageQueryEngine() {
+ return true;
+ }
+
+ @AfterClass
+ public void tearDown()
+ throws Exception {
+ // unload all TPCH tables.
+ for (String table : Constants.TPCH_TABLE_NAMES) {
+ dropOfflineTable(table);
+ }
+
+ // stop components and clean up
+ stopServer();
+ stopBroker();
+ stopController();
+ stopZk();
+
+ FileUtils.deleteDirectory(_tempDir);
+ }
+
+ @DataProvider(name = "QueryDataProvider")
+ public static Object[][] queryDataProvider()
+ throws IOException {
+ Object[][] queries = new Object[NUM_TPCH_QUERIES][];
+ for (int i = 0; i < NUM_TPCH_QUERIES; i++) {
+ queries[i] = new Object[2];
+ String query = _tpchQueryGeneratorV2.generateRandomQuery();
+ queries[i][0] = query;
+ queries[i][1] = query;
+ }
+
+ return queries;
+ }
+}
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/TblToAvroMultiValue.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/TblToAvroMultiValue.java
new file mode 100644
index 0000000000..014e285bd4
--- /dev/null
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/TblToAvroMultiValue.java
@@ -0,0 +1,220 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests.tpch;
+
+import com.google.common.collect.ImmutableMap;
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.Map;
+import org.apache.avro.Schema;
+import org.apache.avro.SchemaBuilder;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumWriter;
+
+
+public class TblToAvroMultiValue {
+ private static final String LONG_TYPE = "long";
+ private static final String DOUBLE_TYPE = "double";
+ private static final String STRING_TYPE = "string";
+ private static final String FOLDER_PATH =
"/Users/saurabh.dubey/Documents/workspace/stuff/tpch/data/s001/";
+ private static final String TBL_FILE_SUFFIX = ".tbl";
+ private static final Map<String, String[]> TABLE_NAME_FIELDS_MAP =
ImmutableMap.of(
+ // One example record:
+ // {"c_custkey":{"long":1},"c_name":{"string":"Customer#000000001"},
+ // "c_address":{"string":"IVhzIApeRb ot,c,E"},"c_nationkey":{"long":15},
+ //
"c_phone":{"string":"25-989-741-2988"},"c_acctbal":{"double":711.56},"c_mktsegment":{"string":"BUILDING"},
+ // "c_comment":{"string":"to the even, regular platelets. regular,
ironic epitaphs nag e"}}
+ "customer", new String[]{
+ "c_custkey", LONG_TYPE, "false", "c_name", STRING_TYPE, "true",
"c_address", STRING_TYPE, "true",
+ "c_nationkey", LONG_TYPE, "false", "c_phone", STRING_TYPE, "false",
"c_acctbal", DOUBLE_TYPE, "false",
+ "c_mktsegment", STRING_TYPE, "true", "c_comment", STRING_TYPE,
"false"
+ },
+ // One example record:
+ //
{"l_orderkey":{"long":600000},"l_partkey":{"long":12916},"l_suppkey":{"long":917},
+ //
"l_linenumber":{"long":2},"l_quantity":{"long":1},"l_extendedprice":{"double":1828.91},
+ //
"l_discount":{"double":0.03},"l_tax":{"double":0.0},"l_returnflag":{"string":"N"},
+ //
"l_linestatus":{"string":"O"},"l_shipdate":{"string":"1998-04-13"},"l_commitdate":{"string":"1998-05-24"},
+ //
"l_receiptdate":{"string":"1998-04-30"},"l_shipinstruct":{"string":"DELIVER IN
PERSON"},
+ // "l_shipmode":{"string":"RAIL"},"l_comment":{"string":" wake braids.
"}}
+ "lineitem", new String[]{
+ "l_orderkey", LONG_TYPE, "false", "l_partkey", LONG_TYPE, "false",
"l_suppkey", LONG_TYPE, "false",
+ "l_linenumber", LONG_TYPE, "true", "l_quantity", LONG_TYPE, "true",
"l_extendedprice", DOUBLE_TYPE, "true",
+ "l_discount", DOUBLE_TYPE, "true", "l_tax", DOUBLE_TYPE, "false",
"l_returnflag", STRING_TYPE, "false",
+ "l_linestatus", STRING_TYPE, "false", "l_shipdate", STRING_TYPE,
"false", "l_commitdate", STRING_TYPE,
+ "false", "l_receiptdate", STRING_TYPE, "false", "l_shipinstruct",
STRING_TYPE, "false", "l_shipmode",
+ STRING_TYPE, "false", "l_comment", STRING_TYPE, "false"
+ },
+ // One example record:
+ //
{"n_nationkey":{"long":0},"n_name":{"string":"ALGERIA"},"n_regionkey":{"long":0},
+ // "n_comment":{"string":" haggle. carefully final deposits detect slyly
agai"}}
+ "nation", new String[]{
+ "n_nationkey", LONG_TYPE, "false", "n_name", STRING_TYPE, "true",
"n_regionkey", LONG_TYPE, "false",
+ "n_comment", STRING_TYPE, "false"
+ },
+ // One example record:
+ //
{"o_orderkey":{"long":1},"o_custkey":{"long":370},"o_orderstatus":{"string":"O"},
+ //
"o_totalprice":{"double":173665.47},"o_orderdate":{"string":"1996-01-02"},
+ //
"o_orderpriority":{"string":"5-LOW"},"o_clerk":{"string":"Clerk#000000951"},"o_shippriority":{"long":0},
+ // "o_comment":{"string":"ly final dependencies: slyly bold "}}
+ "orders", new String[]{
+ "o_orderkey", LONG_TYPE, "false", "o_custkey", LONG_TYPE, "false",
"o_orderstatus", STRING_TYPE, "true",
+ "o_totalprice", DOUBLE_TYPE, "true", "o_orderdate", STRING_TYPE,
"false", "o_orderpriority", STRING_TYPE,
+ "false", "o_clerk", STRING_TYPE, "true", "o_shippriority",
LONG_TYPE, "true", "o_comment", STRING_TYPE, "true"
+ },
+ // One example record:
+ // {"p_partkey":{"long":1},"p_name":{"string":"goldenrod lavender spring
chocolate lace"},
+ // "p_mfgr":{"string":"Manufacturer#1"},"p_brand":{"string":"Brand#13"},
+ // "p_type":{"string":"PROMO BRUSHED
STEEL"},"p_size":{"long":7},"p_container":{"string":"JUMBO PKG"},
+ // "p_retailprice":{"double":901.0},"p_comment":{"string":"ly final
dependencies: slyly bold "}}
+ "part", new String[]{
+ "p_partkey", LONG_TYPE, "false", "p_name", STRING_TYPE, "true",
"p_mfgr", STRING_TYPE, "true", "p_brand",
+ STRING_TYPE, "true", "p_type", STRING_TYPE, "true", "p_size",
LONG_TYPE, "true", "p_container", STRING_TYPE,
+ "true", "p_retailprice", DOUBLE_TYPE, "true", "p_comment",
STRING_TYPE, "true"
+ },
+ // One example record:
+ //
{"ps_partkey":{"long":1},"ps_suppkey":{"long":2},"ps_availqty":{"long":997},
+ // "ps_supplycost":{"double":6.02},"ps_comment":{"string":"ly final
dependencies: slyly bold "}}
+ "partsupp", new String[]{
+ "ps_partkey", LONG_TYPE, "false", "ps_suppkey", LONG_TYPE, "false",
"ps_availqty", LONG_TYPE, "true",
+ "ps_supplycost", DOUBLE_TYPE, "true", "ps_comment", STRING_TYPE,
"true"
+ },
+ // One example record:
+ //
{"r_regionkey":{"long":0},"r_name":{"string":"AFRICA"},"r_comment":{"string":"lar
deposits. blithe"}}
+ "region", new String[]{
+ "r_regionkey", LONG_TYPE, "false", "r_name", STRING_TYPE, "true",
"r_comment", STRING_TYPE, "false"
+ },
+ // One example record:
+ // {"s_suppkey":{"long":1},"s_name":{"string":"Supplier#000000001"},
+ // "s_address":{"string":" N kD4on9OM
Ipw3,gf0JBoQDd7tgrzrddZ"},"s_nationkey":{"long":15},
+ // "s_phone":{"string":"25-989-741-2988"},"s_acctbal":{"double":711.56},
+ // "s_comment":{"string":" deposits eat slyly ironic, even instructions.
express foxes detect slyly.
+ // blithely even accounts abov"}}
+ "supplier", new String[]{
+ "s_suppkey", LONG_TYPE, "false", "s_name", STRING_TYPE, "true",
"s_address", STRING_TYPE, "true",
+ "s_nationkey", LONG_TYPE, "false", "s_phone", STRING_TYPE, "true",
"s_acctbal", DOUBLE_TYPE, "false",
+ "s_comment", STRING_TYPE, "false"
+ });
+
+ private TblToAvroMultiValue() {
+ }
+
+ public static void main(String[] args)
+ throws IOException {
+// String tpchTblFileFolder = args[0];
+ String tpchTblFileFolder = FOLDER_PATH;
+ for (Map.Entry<String, String[]> entry : TABLE_NAME_FIELDS_MAP.entrySet())
{
+ String tableName = entry.getKey();
+ String[] fieldsAndTypes = entry.getValue();
+ SchemaBuilder.FieldAssembler<Schema> schemaFields =
SchemaBuilder.record(tableName).fields();
+ for (int i = 0; i < fieldsAndTypes.length; i += 3) {
+ String fieldName = fieldsAndTypes[i];
+ String fieldType = fieldsAndTypes[i + 1];
+ Boolean isMultiValue = Boolean.parseBoolean(fieldsAndTypes[i + 2]);
+ switch (fieldType) {
+ case LONG_TYPE:
+ if (isMultiValue) {
+
schemaFields.name(fieldName).type().array().items().longType().noDefault();
+ } else {
+
schemaFields.name(fieldName).type().unionOf().longType().and().nullType().endUnion().noDefault();
+ }
+ break;
+ case DOUBLE_TYPE:
+ if (isMultiValue) {
+
schemaFields.name(fieldName).type().array().items().doubleType().noDefault();
+ } else {
+
schemaFields.name(fieldName).type().unionOf().doubleType().and().nullType().endUnion().noDefault();
+ }
+ break;
+ case STRING_TYPE:
+ if (isMultiValue) {
+
schemaFields.name(fieldName).type().array().items().stringType().noDefault();
+ } else {
+
schemaFields.name(fieldName).type().unionOf().stringType().and().nullType().endUnion().noDefault();
+ }
+
+ break;
+ default:
+ throw new IllegalStateException("Unsupported field type: " +
fieldType);
+ }
+ }
+ Schema schema = schemaFields.endRecord();
+
+ // Open Avro data file for writing
+ DatumWriter<GenericRecord> datumWriter = new
GenericDatumWriter<>(schema);
+ DataFileWriter<GenericRecord> dataFileWriter = new
DataFileWriter<>(datumWriter);
+ Path avroFilePath = Paths.get(tpchTblFileFolder + tableName +
"_multivalue" + Constants.AVRO_FILE_SUFFIX);
+ Files.deleteIfExists(avroFilePath);
+ OutputStream outputStream = Files.newOutputStream(avroFilePath);
+ dataFileWriter.create(schema, outputStream);
+
+ // Read TPC-H .tbl files and convert to Avro format
+ try (
+ BufferedReader reader = new BufferedReader(new
FileReader(tpchTblFileFolder + tableName + TBL_FILE_SUFFIX))) {
+ String line;
+ while ((line = reader.readLine()) != null) {
+ String[] fields = line.split("\\|");
+ GenericRecord record = new GenericData.Record(schema);
+ for (int i = 0; i < fields.length; i++) {
+ String fieldName = fieldsAndTypes[3 * i];
+ String fieldType = fieldsAndTypes[3 * i + 1];
+ Boolean isMultiValue = Boolean.parseBoolean(fieldsAndTypes[3 * i +
2]);
+ switch (fieldType) {
+ case LONG_TYPE:
+ if (isMultiValue) {
+ record.put(fieldName,
Arrays.asList(Long.parseLong(fields[i])));
+ } else {
+ record.put(fieldName, Long.parseLong(fields[i]));
+ }
+ break;
+ case DOUBLE_TYPE:
+ if (isMultiValue) {
+ record.put(fieldName,
Arrays.asList(Double.parseDouble(fields[i])));
+ } else {
+ record.put(fieldName, Double.parseDouble(fields[i]));
+ }
+ break;
+ case STRING_TYPE:
+ if (isMultiValue) {
+ record.put(fieldName, Arrays.asList(fields[i]));
+ } else {
+ record.put(fieldName, fields[i]);
+ }
+ break;
+ default:
+ throw new IllegalStateException("Unsupported field type: " +
fieldType);
+ }
+ }
+ dataFileWriter.append(record);
+ }
+ }
+
+ // Close Avro data file
+ dataFileWriter.close();
+ }
+ }
+}
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/Column.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/Column.java
new file mode 100644
index 0000000000..549136ef50
--- /dev/null
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/Column.java
@@ -0,0 +1,97 @@
+package org.apache.pinot.integration.tests.tpch.generator;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.UUID;
+
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+public class Column {
+ private static final Random RANDOM = new Random();
+ private final String _columnName;
+ private final ColumnType _columnType;
+ private List<String> _sampleValues;
+ private boolean _isMultiValue;
+
+ public Column(String columnName, ColumnType columnType) {
+ _columnName = columnName;
+ _columnType = columnType;
+ _sampleValues = new ArrayList<>();
+ generateSampleValues();
+ }
+
+ private void generateSampleValues() {
+ switch (_columnType) {
+ case STRING:
+ for (int i = 0; i < 10; i++) {
+ _sampleValues.add(UUID.randomUUID().toString());
+ }
+ break;
+ case NUMERIC:
+ for (int i = 0; i < 10; i++) {
+ _sampleValues.add(String.valueOf(RANDOM.nextInt(1000)));
+ }
+ break;
+ default:
+ throw new IllegalStateException("Unsupported column type: " +
_columnType);
+ }
+ }
+
+ public boolean isMultiValue() {
+ return _isMultiValue;
+ }
+
+ public void setMultiValue(boolean isMultiValue) {
+ _isMultiValue = isMultiValue;
+ }
+
+ public void setSampleValues(List<String> sampleValues) {
+ _sampleValues = sampleValues;
+ }
+
+ public String getColumnName() {
+ return _columnName;
+ }
+
+ public String getColumnNameForPredicate(String prefix) {
+ return prefix + _columnName;
+ }
+
+ public String getColumnNameForPinotPredicate(String prefix) {
+ if (isMultiValue()) {
+ return "arrayToMv(" + prefix + _columnName + ")";
+ } else {
+ return prefix + _columnName;
+ }
+ }
+
+ public ColumnType getColumnType() {
+ return _columnType;
+ }
+
+ public String getRandomStringValue() {
+ return _sampleValues.get(RANDOM.nextInt(_sampleValues.size()));
+ }
+
+ public long getRandomNumericValue() {
+ return (long) Double.parseDouble(getRandomStringValue());
+ }
+}
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/ColumnType.java
similarity index 51%
copy from
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
copy to
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/ColumnType.java
index 7fbae29c08..9783446959 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/ColumnType.java
@@ -1,3 +1,8 @@
+package org.apache.pinot.integration.tests.tpch.generator;
+
+import java.util.List;
+
+
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -16,29 +21,15 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.pinot.integration.tests.tpch;
-
-import java.io.File;
-
-
-/**
- * Constants used in TPCH integration tests.
- */
-public final class Constants {
- static final String[] TPCH_TABLE_NAMES = {
- "customer", "lineitem", "nation", "orders", "part", "partsupp",
"region", "supplier"
- };
- static final String AVRO_FILE_SUFFIX = ".avro";
- private static final String TPCH_TABLE_RESOURCE_FOLDER_PREFIX =
"examples/batch/tpch/";
- private Constants() {
- }
+public enum ColumnType {
+ NUMERIC(List.of("=", "!=", "<", ">", "<=", ">=", "<>"), List.of("max",
"min", "avg", "sum", "count")),
+ STRING(List.of("=", "!=", "LIKE"), List.of());
- static String getTableResourceFolder(String tableName) {
- return TPCH_TABLE_RESOURCE_FOLDER_PREFIX + tableName;
- }
+ List<String> _operators;
+ List<String> _aggregations;
- static String getTableAvroFilePath(String tableName) {
- return getTableResourceFolder(tableName) + File.separator
- + "rawdata" + File.separator + tableName + AVRO_FILE_SUFFIX;
+ ColumnType(List<String> operators, List<String> aggregateFunctions) {
+ _operators = operators;
+ _aggregations = aggregateFunctions;
}
}
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/PinotQueryBasedColumnDataProvider.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/PinotQueryBasedColumnDataProvider.java
new file mode 100644
index 0000000000..15f66557d2
--- /dev/null
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/PinotQueryBasedColumnDataProvider.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests.tpch.generator;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.pinot.client.ResultSet;
+import org.codehaus.jettison.json.JSONArray;
+import org.codehaus.jettison.json.JSONException;
+
+
+public class PinotQueryBasedColumnDataProvider implements
SampleColumnDataProvider {
+ public interface PinotConnectionProvider {
+ org.apache.pinot.client.Connection getConnection();
+ }
+
+ private static final String COUNT_START_QUERY_FORMAT = "SELECT COUNT(*) FROM
%s";
+ private static final String QUERY_FORMAT = "SELECT %s FROM %s WHERE $docId
IN (%s)";
+ private final PinotConnectionProvider _pinotConnectionProvider;
+
+ public PinotQueryBasedColumnDataProvider(PinotConnectionProvider
connectionProvider) {
+ _pinotConnectionProvider = connectionProvider;
+ }
+
+ @Override
+ public Pair<Boolean, List<String>> getSampleValues(String tableName, String
columnName)
+ throws JSONException {
+ String countStarQuery = String.format(COUNT_START_QUERY_FORMAT, tableName);
+ boolean isMultiValue = false;
+ int count =
_pinotConnectionProvider.getConnection().execute(countStarQuery).getResultSet(0).getInt(0);
+
+ StringBuilder randomDocIds = new StringBuilder();
+
+ for (int i = 0; i < 10; i++) {
+ randomDocIds.append((int) (Math.random() * count));
+ if (i != 9) {
+ randomDocIds.append(", ");
+ }
+ }
+
+ String query = String.format(QUERY_FORMAT, columnName, tableName,
randomDocIds);
+
+ List<String> columnValues = new ArrayList<>();
+ ResultSet resultSet =
_pinotConnectionProvider.getConnection().execute(query).getResultSet(0);
+
+ for (int i = 0; i < resultSet.getRowCount(); i++) {
+ if (resultSet.getColumnDataType(0).contains("ARRAY")) {
+ String array = resultSet.getString(i, 0);
+ JSONArray jsnobject = new JSONArray(array);
+ columnValues.add(jsnobject.get(0).toString());
+ isMultiValue = true;
+ } else {
+ columnValues.add(resultSet.getString(i, 0));
+ }
+ }
+
+ return Pair.of(isMultiValue, columnValues);
+ }
+}
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/QuerySkeleton.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/QuerySkeleton.java
new file mode 100644
index 0000000000..067505aa97
--- /dev/null
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/QuerySkeleton.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests.tpch.generator;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+public class QuerySkeleton {
+ private final List<String> _projections;
+ private final List<String> _predicates;
+ private final List<String> _groupByColumns;
+ private final List<String> _orderByColumns;
+ private final List<String> _tables;
+
+ public QuerySkeleton() {
+ _projections = new ArrayList<>();
+ _predicates = new ArrayList<>();
+ _groupByColumns = new ArrayList<>();
+ _orderByColumns = new ArrayList<>();
+ _tables = new ArrayList<>();
+ }
+
+ public void addTable(String table) {
+ _tables.add(table);
+ }
+
+ public void addProjection(String projection) {
+ _projections.add(projection);
+ }
+
+ public QuerySkeleton addPredicate(String predicate) {
+ _predicates.add(predicate);
+ return this;
+ }
+
+ public QuerySkeleton addGroupByColumn(String groupByColumn) {
+ _groupByColumns.add(groupByColumn);
+ return this;
+ }
+
+ public QuerySkeleton addOrderByColumn(String orderByColumn) {
+ _orderByColumns.add(orderByColumn);
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder query = new StringBuilder();
+ query.append("SELECT ");
+ _projections.forEach(predicate -> query.append(predicate).append(", "));
+ query.delete(query.length() - 2, query.length());
+
+ query.append(" FROM ");
+ _tables.forEach(table -> query.append(table).append(", "));
+ query.delete(query.length() - 2, query.length());
+
+ if (_predicates.size() > 0) {
+ query.append(" WHERE ");
+ _predicates.forEach(predicate -> query.append(predicate).append(" AND
"));
+ query.delete(query.length() - 5, query.length());
+ }
+
+ if (_groupByColumns.size() > 0) {
+ query.append(" GROUP BY ");
+ _groupByColumns.forEach(groupByColumn ->
query.append(groupByColumn).append(", "));
+ query.delete(query.length() - 2, query.length());
+ }
+
+ if (_orderByColumns.size() > 0) {
+ query.append(" ORDER BY ");
+ _orderByColumns.forEach(orderByColumn ->
query.append(orderByColumn).append(", "));
+ query.delete(query.length() - 2, query.length());
+ }
+
+ return query.toString();
+ }
+}
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/RelatedTable.java
similarity index 52%
copy from
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
copy to
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/RelatedTable.java
index 7fbae29c08..6796f3a283 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/RelatedTable.java
@@ -1,3 +1,5 @@
+package org.apache.pinot.integration.tests.tpch.generator;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -16,29 +18,26 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.pinot.integration.tests.tpch;
-
-import java.io.File;
+class RelatedTable {
+ private final String _foreignTableName;
+ private final String _foreignTableKey;
+ private final String _localTableKey;
+ public RelatedTable(String foreignTableName, String foreignTableKey, String
localTableKey) {
+ _foreignTableName = foreignTableName;
+ _foreignTableKey = foreignTableKey;
+ _localTableKey = localTableKey;
+ }
-/**
- * Constants used in TPCH integration tests.
- */
-public final class Constants {
- static final String[] TPCH_TABLE_NAMES = {
- "customer", "lineitem", "nation", "orders", "part", "partsupp",
"region", "supplier"
- };
- static final String AVRO_FILE_SUFFIX = ".avro";
- private static final String TPCH_TABLE_RESOURCE_FOLDER_PREFIX =
"examples/batch/tpch/";
- private Constants() {
+ public String getForeignTableName() {
+ return _foreignTableName;
}
- static String getTableResourceFolder(String tableName) {
- return TPCH_TABLE_RESOURCE_FOLDER_PREFIX + tableName;
+ public String getForeignTableKey() {
+ return _foreignTableKey;
}
- static String getTableAvroFilePath(String tableName) {
- return getTableResourceFolder(tableName) + File.separator
- + "rawdata" + File.separator + tableName + AVRO_FILE_SUFFIX;
+ public String getLocalTableKey() {
+ return _localTableKey;
}
}
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/SampleColumnDataProvider.java
similarity index 51%
copy from
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
copy to
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/SampleColumnDataProvider.java
index 7fbae29c08..a322bc6680 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/SampleColumnDataProvider.java
@@ -16,29 +16,14 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.pinot.integration.tests.tpch;
+package org.apache.pinot.integration.tests.tpch.generator;
-import java.io.File;
+import java.util.List;
+import org.apache.commons.lang3.tuple.Pair;
+import org.codehaus.jettison.json.JSONException;
-/**
- * Constants used in TPCH integration tests.
- */
-public final class Constants {
- static final String[] TPCH_TABLE_NAMES = {
- "customer", "lineitem", "nation", "orders", "part", "partsupp",
"region", "supplier"
- };
- static final String AVRO_FILE_SUFFIX = ".avro";
- private static final String TPCH_TABLE_RESOURCE_FOLDER_PREFIX =
"examples/batch/tpch/";
- private Constants() {
- }
-
- static String getTableResourceFolder(String tableName) {
- return TPCH_TABLE_RESOURCE_FOLDER_PREFIX + tableName;
- }
-
- static String getTableAvroFilePath(String tableName) {
- return getTableResourceFolder(tableName) + File.separator
- + "rawdata" + File.separator + tableName + AVRO_FILE_SUFFIX;
- }
+public interface SampleColumnDataProvider {
+ Pair<Boolean, List<String>> getSampleValues(String tableName, String
columnName)
+ throws JSONException;
}
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/TPCHQueryGeneratorV2.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/TPCHQueryGeneratorV2.java
new file mode 100644
index 0000000000..678dee8a3e
--- /dev/null
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/TPCHQueryGeneratorV2.java
@@ -0,0 +1,553 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests.tpch.generator;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import org.apache.commons.lang3.tuple.Pair;
+import org.codehaus.jettison.json.JSONException;
+
+
+public class TPCHQueryGeneratorV2 {
+ private static final Map<String, Table> TABLES_MAP = new HashMap<>();
+ private static final List<String> TABLE_NAMES =
+ List.of("nation", "region", "supplier", "customer", "part", "partsupp",
"orders", "lineitem");
+ private static final String[] JOIN_TYPES = {
+ "INNER JOIN", "LEFT JOIN", "RIGHT JOIN"
+ };
+ private final SampleColumnDataProvider _sampleColumnDataProvider;
+ private final Random _random = new Random();
+
+ public TPCHQueryGeneratorV2() {
+ _sampleColumnDataProvider = null;
+ }
+
+ public TPCHQueryGeneratorV2(SampleColumnDataProvider
sampleColumnDataProvider) {
+ _sampleColumnDataProvider = sampleColumnDataProvider;
+ }
+
+ private static Table getRandomTable() {
+ Random random = new Random();
+ int index = random.nextInt(TABLES_MAP.size());
+ return TABLES_MAP.get(TABLE_NAMES.get(index));
+ }
+
+ private void addRelation(String t1, String t2, String t1Key, String t2Key) {
+ TABLES_MAP.get(t1).addRelation(t2, t2Key, t1Key);
+ TABLES_MAP.get(t2).addRelation(t1, t1Key, t2Key);
+ }
+
+ public void init() {
+ TABLES_MAP.put("nation", new Table("nation",
+ List.of(new Column("n_nationkey", ColumnType.NUMERIC), new
Column("n_name", ColumnType.STRING),
+ new Column("n_regionkey", ColumnType.NUMERIC), new
Column("n_comment", ColumnType.STRING))));
+
+ TABLES_MAP.put("region", new Table("region",
+ List.of(new Column("r_regionkey", ColumnType.NUMERIC), new
Column("r_name", ColumnType.STRING),
+ new Column("r_comment", ColumnType.STRING))));
+
+ TABLES_MAP.put("supplier", new Table("supplier",
+ List.of(new Column("s_suppkey", ColumnType.NUMERIC), new
Column("s_name", ColumnType.STRING),
+ new Column("s_address", ColumnType.STRING), new
Column("s_nationkey", ColumnType.NUMERIC),
+ new Column("s_phone", ColumnType.STRING), new Column("s_acctbal",
ColumnType.NUMERIC),
+ new Column("s_comment", ColumnType.STRING))));
+
+ TABLES_MAP.put("customer", new Table("customer",
+ List.of(new Column("c_custkey", ColumnType.NUMERIC), new
Column("c_name", ColumnType.STRING),
+ new Column("c_address", ColumnType.STRING), new
Column("c_nationkey", ColumnType.NUMERIC),
+ new Column("c_phone", ColumnType.STRING), new Column("c_acctbal",
ColumnType.NUMERIC),
+ new Column("c_mktsegment", ColumnType.STRING), new
Column("c_comment", ColumnType.STRING))));
+
+ TABLES_MAP.put("part", new Table("part",
+ List.of(new Column("p_partkey", ColumnType.NUMERIC), new
Column("p_name", ColumnType.STRING),
+ new Column("p_mfgr", ColumnType.STRING), new Column("p_brand",
ColumnType.STRING),
+ new Column("p_type", ColumnType.STRING), new Column("p_size",
ColumnType.NUMERIC),
+ new Column("p_container", ColumnType.STRING), new
Column("p_retailprice", ColumnType.NUMERIC),
+ new Column("p_comment", ColumnType.STRING))));
+
+ TABLES_MAP.put("partsupp", new Table("partsupp",
+ List.of(new Column("ps_partkey", ColumnType.NUMERIC), new
Column("ps_suppkey", ColumnType.NUMERIC),
+ new Column("ps_availqty", ColumnType.NUMERIC), new
Column("ps_supplycost", ColumnType.NUMERIC),
+ new Column("ps_comment", ColumnType.STRING))));
+
+ TABLES_MAP.put("orders", new Table("orders",
+ List.of(new Column("o_orderkey", ColumnType.NUMERIC), new
Column("o_custkey", ColumnType.NUMERIC),
+ new Column("o_orderstatus", ColumnType.STRING), new
Column("o_totalprice", ColumnType.NUMERIC),
+ new Column("o_orderdate", ColumnType.STRING), new
Column("o_orderpriority", ColumnType.STRING),
+ new Column("o_clerk", ColumnType.STRING), new
Column("o_shippriority", ColumnType.STRING),
+ new Column("o_comment", ColumnType.STRING))));
+
+ TABLES_MAP.put("lineitem", new Table("lineitem",
+ List.of(new Column("l_orderkey", ColumnType.NUMERIC), new
Column("l_partkey", ColumnType.NUMERIC),
+ new Column("l_suppkey", ColumnType.NUMERIC), new
Column("l_linenumber", ColumnType.NUMERIC),
+ new Column("l_quantity", ColumnType.NUMERIC), new
Column("l_extendedprice", ColumnType.NUMERIC),
+ new Column("l_discount", ColumnType.NUMERIC), new Column("l_tax",
ColumnType.NUMERIC),
+ new Column("l_returnflag", ColumnType.STRING), new
Column("l_linestatus", ColumnType.STRING),
+ new Column("l_shipdate", ColumnType.STRING), new
Column("l_commitdate", ColumnType.STRING),
+ new Column("l_receiptdate", ColumnType.STRING), new
Column("l_shipinstruct", ColumnType.STRING),
+ new Column("l_shipmode", ColumnType.STRING), new
Column("l_comment", ColumnType.STRING))));
+
+ addRelation("nation", "region", "n_regionkey", "r_regionkey");
+ addRelation("supplier", "nation", "s_nationkey", "n_nationkey");
+ addRelation("supplier", "customer", "s_nationkey", "c_nationkey");
+ addRelation("supplier", "partsupp", "s_suppkey", "ps_suppkey");
+ addRelation("customer", "nation", "c_nationkey", "n_nationkey");
+ addRelation("orders", "customer", "o_custkey", "c_custkey");
+ addRelation("lineitem", "orders", "l_orderkey", "o_orderkey");
+ addRelation("lineitem", "part", "l_partkey", "p_partkey");
+ addRelation("lineitem", "supplier", "l_suppkey", "s_suppkey");
+ addRelation("lineitem", "partsupp", "l_partkey", "ps_partkey");
+ addRelation("lineitem", "partsupp", "l_suppkey", "ps_partkey");
+ addRelation("part", "partsupp", "p_partkey", "ps_partkey");
+
+ if (_sampleColumnDataProvider != null) {
+ TABLES_MAP.forEach((tableName, table) -> {
+ table.getColumns().forEach(column -> {
+ Pair<Boolean, List<String>> sampleValues = null;
+ try {
+ sampleValues =
_sampleColumnDataProvider.getSampleValues(tableName, column.getColumnName());
+ } catch (JSONException e) {
+ throw new RuntimeException(e);
+ }
+ column.setSampleValues(sampleValues.getRight());
+ column.setMultiValue(sampleValues.getLeft());
+ });
+ });
+ }
+ }
+
+ private List<String> getRandomProjections(Table t1) {
+ Random random = new Random();
+ int numColumns = random.nextInt(t1.getColumns().size()) + 1;
+ List<String> selectedColumns = new ArrayList<>();
+
+ while (selectedColumns.size() < numColumns) {
+ String columnName =
t1.getColumns().get(random.nextInt(t1.getColumns().size())).getColumnName();
+ if (!selectedColumns.contains(columnName)) {
+ selectedColumns.add(columnName);
+ }
+ }
+
+ return selectedColumns;
+ }
+
+ private String generateInnerQueryForPredicate(Table t1, Column c) {
+ QuerySkeleton innerQuery = new QuerySkeleton();
+
+ Random random = new Random();
+ List<String> predicates = new ArrayList<>();
+ innerQuery.addTable(t1.getTableName());
+ // Limit to maximum of 1 join
+ if (random.nextBoolean()) {
+ RelatedTable relatedTable =
t1.getRelatedTables().get(random.nextInt(t1.getRelatedTables().size()));
+ if (relatedTable != null) {
+ innerQuery.addTable(relatedTable.getForeignTableName());
+ predicates.add(relatedTable.getLocalTableKey() + "=" +
relatedTable.getForeignTableKey());
+
+ List<String> inp =
getRandomPredicates(TABLES_MAP.get(relatedTable.getForeignTableName()), false);
+ predicates.addAll(inp);
+ }
+ }
+ String aggregation =
c.getColumnType()._aggregations.get(random.nextInt(c.getColumnType()._aggregations.size()));
+ innerQuery.addProjection(aggregation + "(" + c.getColumnName() + ")");
+
+ List<String> inp = getRandomPredicates(t1, false);
+
+ predicates.addAll(inp);
+
+ predicates.forEach(innerQuery::addPredicate);
+ return innerQuery.toString();
+ }
+
+ private String getRandomValueForPredicate(Table t1, Column c, boolean
useNextedQueries) {
+ Random random = new Random();
+ if (random.nextBoolean() && useNextedQueries &&
c.getColumnType()._aggregations.size() > 0) {
+ // Use nested query for predicate
+ String nestedQueries = generateInnerQueryForPredicate(t1, c);
+ return "(" + nestedQueries + ")";
+ } else {
+ if (c.getColumnType() == ColumnType.STRING) {
+ return "'" + c.getRandomStringValue() + "'";
+ } else {
+ return String.valueOf(c.getRandomNumericValue());
+ }
+ }
+ }
+
+ private List<String> getRandomPredicates(Table t1, boolean useNestedQueries)
{
+ Random random = new Random();
+ int predicateCount = random.nextInt(5) + 1;
+ List<String> predicates = new ArrayList<>();
+ List<String> results = new ArrayList<>();
+ while (predicates.size() < predicateCount) {
+ Column column =
t1.getColumns().get(random.nextInt(t1.getColumns().size()));
+ predicates.add(column.getColumnName());
+ ColumnType columnType = column.getColumnType();
+ String operator =
columnType._operators.get(random.nextInt(columnType._operators.size()));
+ String value = getRandomValueForPredicate(t1, column, useNestedQueries);
+ String predicateBuilder = column.getColumnName() + " " + operator + " "
+ value + " ";
+ results.add(predicateBuilder);
+ }
+
+ return results;
+ }
+
+ private List<String> getRandomPredicates(Table t1) {
+ return getRandomPredicates(t1, true);
+ }
+
+ private List<String> getRandomOrderBys(Table t1) {
+ Random random = new Random();
+ int orderByCount = random.nextInt(2) + 1;
+ List<String> orderBys = new ArrayList<>();
+ List<String> results = new ArrayList<>();
+ while (orderBys.size() < orderByCount) {
+ Column column =
t1.getColumns().get(random.nextInt(t1.getColumns().size()));
+ orderBys.add(column.getColumnName());
+ String name = column.getColumnName();
+ StringBuilder orderByBuilder = new StringBuilder();
+ orderByBuilder.append(name).append(" ");
+ if (random.nextBoolean()) {
+ orderByBuilder.append(" DESC ");
+ }
+ results.add(orderByBuilder.toString());
+ }
+
+ return results;
+ }
+
+ public String generateSelectionOnlyQuery(boolean includePredicates, boolean
includeOrderBy) {
+ QuerySkeleton querySkeleton = new QuerySkeleton();
+ Table t1 = getRandomTable();
+ querySkeleton.addTable(t1.getTableName());
+
+ getRandomProjections(t1).forEach(querySkeleton::addProjection);
+
+ if (includePredicates) {
+ List<String> predicates = getRandomPredicates(t1);
+ predicates.forEach(querySkeleton::addPredicate);
+ }
+
+ if (includeOrderBy) {
+ getRandomOrderBys(t1).forEach(querySkeleton::addOrderByColumn);
+ }
+
+ return querySkeleton.toString();
+ }
+
+ private List<String> getRandomOrderBys(Table t1, List<String> groupByCols) {
+ List<String> result = new ArrayList<>();
+ if (groupByCols.size() == 0) {
+ return result;
+ }
+ Random random = new Random();
+ List<String> orderBys = new ArrayList<>();
+ int orderByCount = random.nextInt(groupByCols.size()) + 1;
+ while (orderBys.size() < orderByCount) {
+ String column = groupByCols.get(random.nextInt(groupByCols.size()));
+
+ if (groupByCols.contains(column)) {
+ orderBys.add(column);
+ if (random.nextBoolean()) {
+ result.add(column + " DESC");
+ } else {
+ result.add(column);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public String selectionOnlyWithJoins(boolean includePredicates, boolean
includeOrderBy) {
+ QuerySkeleton querySkeleton = new QuerySkeleton();
+
+ Table t1;
+ while (true) {
+ t1 = getRandomTable();
+ if (t1.getRelatedTables().size() > 0) {
+ break;
+ }
+ }
+
+ Random random = new Random();
+ RelatedTable rt =
t1.getRelatedTables().get(random.nextInt(t1.getRelatedTables().size()));
+ Table t2 = TABLES_MAP.get(rt.getForeignTableName());
+ getRandomProjections(t1).forEach(querySkeleton::addProjection);
+ getRandomProjections(t2).forEach(querySkeleton::addProjection);
+
+ String t2NameWithJoin =
+ t1.getTableName() + " " +
JOIN_TYPES[random.nextInt(JOIN_TYPES.length)] + " " + t2.getTableName() + " ON "
+ + rt.getLocalTableKey() + " = " + rt.getForeignTableKey() + " ";
+ querySkeleton.addTable(t2NameWithJoin);
+
+ if (includePredicates) {
+ List<String> predicates = getRandomPredicates(t1);
+ predicates.forEach(querySkeleton::addPredicate);
+
+ predicates = getRandomPredicates(t2);
+ predicates.forEach(querySkeleton::addPredicate);
+ }
+
+ if (includeOrderBy) {
+ getRandomOrderBys(t1).forEach(querySkeleton::addOrderByColumn);
+ }
+
+ return querySkeleton.toString();
+ }
+
+ private Pair<List<String>, List<String>> getGroupByAndAggregates(Table t1) {
+ Random random = new Random();
+ int numColumns = random.nextInt(t1.getColumns().size()) + 1;
+ List<String> selectedColumns = new ArrayList<>();
+ List<String> groupByColumns = new ArrayList<>();
+ List<String> resultProjections = new ArrayList<>();
+
+ while (selectedColumns.size() < numColumns) {
+ Column column =
t1.getColumns().get(random.nextInt(t1.getColumns().size()));
+ String columnName = column.getColumnName();
+ if (!selectedColumns.contains(columnName)) {
+ if (random.nextBoolean() &&
column.getColumnType()._aggregations.size() > 0) {
+ // Use as aggregation
+ String aggregation =
+
column.getColumnType()._aggregations.get(random.nextInt(column.getColumnType()._aggregations.size()));
+ resultProjections.add(aggregation + "(" + columnName + ")");
+ } else {
+ // Use as group by
+ groupByColumns.add(columnName);
+ resultProjections.add(columnName);
+ }
+ selectedColumns.add(columnName);
+ }
+ }
+
+ return Pair.of(resultProjections, groupByColumns);
+ }
+
+ public String selectionOnlyWithGroupBy(boolean includePredicates, boolean
includeOrderBy) {
+ QuerySkeleton querySkeleton = new QuerySkeleton();
+
+ Table t1 = getRandomTable();
+ Pair<List<String>, List<String>> cols = getGroupByAndAggregates(t1);
+ cols.getLeft().forEach(querySkeleton::addProjection);
+
+ querySkeleton.addTable(t1.getTableName());
+
+ cols.getRight().forEach(querySkeleton::addGroupByColumn);
+ if (includePredicates) {
+ List<String> preds = getRandomPredicates(t1);
+ preds.forEach(querySkeleton::addPredicate);
+ }
+
+ if (includeOrderBy && cols.getRight().size() > 0) {
+ getRandomOrderBys(t1,
cols.getRight()).forEach(querySkeleton::addOrderByColumn);
+ }
+
+ return querySkeleton.toString();
+ }
+
+ public String selectionOnlyGroupByWithJoins(boolean includePredicates,
boolean includeOrderBy) {
+ QuerySkeleton querySkeleton = new QuerySkeleton();
+ Table t1;
+ while (true) {
+ t1 = getRandomTable();
+ if (t1.getRelatedTables().size() > 0) {
+ break;
+ }
+ }
+
+ Random random = new Random();
+ RelatedTable rt =
t1.getRelatedTables().get(random.nextInt(t1.getRelatedTables().size()));
+ Table t2 = TABLES_MAP.get(rt.getForeignTableName());
+ Pair<List<String>, List<String>> groupByColumns =
getGroupByAndAggregates(t1);
+ groupByColumns.getLeft().forEach(querySkeleton::addProjection);
+
+ Pair<List<String>, List<String>> groupByColumnsT2 =
getGroupByAndAggregates(t2);
+ groupByColumnsT2.getLeft().forEach(querySkeleton::addProjection);
+
+ String tName =
+ t1.getTableName() + " " +
JOIN_TYPES[random.nextInt(JOIN_TYPES.length)] + " " + t2.getTableName() + " ON "
+ + " " + rt.getLocalTableKey() + " = " + " " +
rt.getForeignTableKey() + " ";
+
+ querySkeleton.addTable(tName);
+
+ groupByColumns.getRight().forEach(querySkeleton::addGroupByColumn);
+ groupByColumnsT2.getRight().forEach(querySkeleton::addGroupByColumn);
+
+ if (includePredicates) {
+ List<String> predicates = getRandomPredicates(t1);
+ predicates.forEach(querySkeleton::addPredicate);
+ }
+
+ if (includeOrderBy) {
+ getRandomOrderBys(t1,
groupByColumns.getRight()).forEach(querySkeleton::addOrderByColumn);
+ getRandomOrderBys(t2,
groupByColumnsT2.getRight()).forEach(querySkeleton::addOrderByColumn);
+ }
+
+ return querySkeleton.toString();
+ }
+
+ public String selectionOnlyMultiJoin(boolean includePredicates, boolean
includeOrderBy) {
+ QuerySkeleton querySkeleton = new QuerySkeleton();
+
+ List<String> predicates = new ArrayList<>();
+ List<Table> tables = new ArrayList<>();
+ Set<String> tableNames = new HashSet<>();
+
+ Random random = new Random();
+
+ // Start off with a random table with related tables
+ while (true) {
+ Table t1 = getRandomTable();
+ if (t1.getRelatedTables().size() > 0) {
+ tables.add(t1);
+ tableNames.add(t1.getTableName());
+ break;
+ }
+ }
+
+ // Add more tables
+ while (random.nextInt() % 8 != 0) {
+ int tableToAddIdx = random.nextInt(tables.size());
+ RelatedTable relatedTable = tables.get(tableToAddIdx).getRelatedTables()
+
.get(random.nextInt(tables.get(tableToAddIdx).getRelatedTables().size()));
+ if (!tableNames.contains(relatedTable.getForeignTableName())) {
+ tableNames.add(relatedTable.getForeignTableName());
+
tables.add(TPCHQueryGeneratorV2.TABLES_MAP.get(relatedTable.getForeignTableName()));
+ predicates.add(relatedTable.getLocalTableKey() + "=" +
relatedTable.getForeignTableKey());
+ }
+ }
+
+ for (Table item : tables) {
+ getRandomProjections(item).forEach(querySkeleton::addProjection);
+ }
+ for (Table value : tables) {
+ querySkeleton.addTable(value.getTableName());
+ }
+
+ if (predicates.size() > 0) {
+ for (String predicate : predicates) {
+ querySkeleton.addPredicate(predicate);
+ }
+ }
+
+ if (includePredicates) {
+ for (Table table : tables) {
+ List<String> preds = getRandomPredicates(table);
+ preds.forEach(querySkeleton::addPredicate);
+ }
+ }
+
+ if (includeOrderBy) {
+ for (Table table : tables) {
+ getRandomOrderBys(table).forEach(querySkeleton::addOrderByColumn);
+ }
+ }
+
+ return querySkeleton.toString();
+ }
+
+ public String selectionGroupByMultiJoin(boolean includePredicates, boolean
includeOrderBy) {
+ QuerySkeleton querySkeleton = new QuerySkeleton();
+
+ List<String> predicates = new ArrayList<>();
+ List<Table> tables = new ArrayList<>();
+ Set<String> tableNames = new HashSet<>();
+
+ Random random = new Random();
+
+ // Start off with a random table with related tables
+ while (true) {
+ Table t1 = getRandomTable();
+ if (t1.getRelatedTables().size() > 0) {
+ tables.add(t1);
+ tableNames.add(t1.getTableName());
+ break;
+ }
+ }
+
+ // Add more tables
+ while (random.nextInt() % 8 != 0) {
+ int tableToAddIdx = random.nextInt(tables.size());
+ RelatedTable relatedTable = tables.get(tableToAddIdx).getRelatedTables()
+
.get(random.nextInt(tables.get(tableToAddIdx).getRelatedTables().size()));
+ if (!tableNames.contains(relatedTable.getForeignTableName())) {
+ tableNames.add(relatedTable.getForeignTableName());
+
tables.add(TPCHQueryGeneratorV2.TABLES_MAP.get(relatedTable.getForeignTableName()));
+ predicates.add(relatedTable.getLocalTableKey() + "=" +
relatedTable.getForeignTableKey());
+ }
+ }
+
+ Map<String, List<String>> tableWiseGroupByCols = new HashMap<>();
+ for (Table value : tables) {
+ Pair<List<String>, List<String>> groupByAndAggregates =
getGroupByAndAggregates(value);
+ groupByAndAggregates.getLeft().forEach(querySkeleton::addProjection);
+ groupByAndAggregates.getRight().forEach(querySkeleton::addGroupByColumn);
+ tableWiseGroupByCols.put(value.getTableName(),
groupByAndAggregates.getRight());
+ }
+ for (Table table : tables) {
+ querySkeleton.addTable(table.getTableName());
+ }
+ predicates.forEach(querySkeleton::addPredicate);
+
+ if (includePredicates) {
+ for (Table table : tables) {
+ List<String> preds = getRandomPredicates(table);
+ preds.forEach(querySkeleton::addPredicate);
+ }
+ }
+
+ if (includeOrderBy) {
+ for (Table table : tables) {
+ getRandomOrderBys(table,
tableWiseGroupByCols.get(table.getTableName())).forEach(
+ querySkeleton::addOrderByColumn);
+ }
+ }
+
+ return querySkeleton.toString();
+ }
+
+ public String generateRandomQuery() {
+ Random random = new Random();
+ int queryType = random.nextInt(6);
+ boolean includePredicates = random.nextBoolean();
+ boolean includeOrderBy = true;
+ switch (queryType) {
+ case 0:
+ return generateSelectionOnlyQuery(includePredicates, includeOrderBy);
+ case 1:
+ return selectionOnlyWithJoins(includePredicates, includeOrderBy);
+ case 2:
+ return selectionOnlyWithGroupBy(includePredicates, includeOrderBy);
+ case 3:
+ return selectionOnlyGroupByWithJoins(includePredicates,
includeOrderBy);
+ case 4:
+ return selectionOnlyMultiJoin(includePredicates, includeOrderBy);
+ case 5:
+ return selectionGroupByMultiJoin(includePredicates, includeOrderBy);
+ default:
+ return generateSelectionOnlyQuery(includePredicates, includeOrderBy);
+ }
+ }
+}
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/Table.java
similarity index 51%
copy from
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
copy to
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/Table.java
index 7fbae29c08..90ff494b4e 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/Table.java
@@ -1,3 +1,9 @@
+package org.apache.pinot.integration.tests.tpch.generator;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -16,29 +22,30 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.pinot.integration.tests.tpch;
+class Table {
+ private final String _tableName;
+ private final List<Column> _columns;
+ private final List<RelatedTable> _relatedTables;
-import java.io.File;
+ public Table(String tableName, List<Column> columns) {
+ _tableName = tableName;
+ _columns = columns;
+ _relatedTables = new ArrayList<>();
+ }
+ public String getTableName() {
+ return _tableName;
+ }
-/**
- * Constants used in TPCH integration tests.
- */
-public final class Constants {
- static final String[] TPCH_TABLE_NAMES = {
- "customer", "lineitem", "nation", "orders", "part", "partsupp",
"region", "supplier"
- };
- static final String AVRO_FILE_SUFFIX = ".avro";
- private static final String TPCH_TABLE_RESOURCE_FOLDER_PREFIX =
"examples/batch/tpch/";
- private Constants() {
+ public List<Column> getColumns() {
+ return _columns;
}
- static String getTableResourceFolder(String tableName) {
- return TPCH_TABLE_RESOURCE_FOLDER_PREFIX + tableName;
+ public List<RelatedTable> getRelatedTables() {
+ return _relatedTables;
}
- static String getTableAvroFilePath(String tableName) {
- return getTableResourceFolder(tableName) + File.separator
- + "rawdata" + File.separator + tableName + AVRO_FILE_SUFFIX;
+ public void addRelation(String foreignTableName, String foreignTableKey,
String localTableKey) {
+ _relatedTables.add(new RelatedTable(foreignTableName, foreignTableKey,
localTableKey));
}
}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/DocIdDictionary.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/DocIdDictionary.java
index 59e752a8b8..92991e1947 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/DocIdDictionary.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/DocIdDictionary.java
@@ -45,6 +45,11 @@ public class DocIdDictionary extends BaseImmutableDictionary
{
}
}
+ @Override
+ public int indexOf(int value) {
+ return insertionIndexOf(Integer.toString(value));
+ }
+
@Override
public DataType getValueType() {
return DataType.INT;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]