This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 18f3d0a8a5 Add auto generated query tests for tpch (#11511)
18f3d0a8a5 is described below

commit 18f3d0a8a5e1faa6ddcc74562b9e3c464945ce54
Author: Saurabh Dubey <[email protected]>
AuthorDate: Wed Sep 6 09:35:46 2023 +0530

    Add auto generated query tests for tpch (#11511)
    
    Co-authored-by: Saurabh Dubey <[email protected]>
---
 .../pinot/integration/tests/tpch/Constants.java    |  21 +-
 .../tpch/TPCHGeneratedQueryIntegrationTest.java    | 218 ++++++++
 .../tests/tpch/TblToAvroMultiValue.java            | 220 ++++++++
 .../integration/tests/tpch/generator/Column.java   |  97 ++++
 .../{Constants.java => generator/ColumnType.java}  |  35 +-
 .../PinotQueryBasedColumnDataProvider.java         |  76 +++
 .../tests/tpch/generator/QuerySkeleton.java        |  94 ++++
 .../RelatedTable.java}                             |  35 +-
 .../SampleColumnDataProvider.java}                 |  29 +-
 .../tests/tpch/generator/TPCHQueryGeneratorV2.java | 553 +++++++++++++++++++++
 .../tpch/{Constants.java => generator/Table.java}  |  41 +-
 .../segment/index/readers/DocIdDictionary.java     |   5 +
 12 files changed, 1341 insertions(+), 83 deletions(-)

diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
index 7fbae29c08..b09a959946 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
@@ -29,16 +29,29 @@ public final class Constants {
       "customer", "lineitem", "nation", "orders", "part", "partsupp", 
"region", "supplier"
   };
   static final String AVRO_FILE_SUFFIX = ".avro";
-  private static final String TPCH_TABLE_RESOURCE_FOLDER_PREFIX = 
"examples/batch/tpch/";
+  private static final String TPCH_TABLE_RESOURCE_FOLDER_PREFIX = 
"examples/batch/tpch";
   private Constants() {
   }
 
   static String getTableResourceFolder(String tableName) {
-    return TPCH_TABLE_RESOURCE_FOLDER_PREFIX + tableName;
+    return getTableResourceFolder(tableName, false);
+  }
+
+  static String getTableResourceFolder(String tableName, boolean 
useMultiValue) {
+    String path = TPCH_TABLE_RESOURCE_FOLDER_PREFIX;
+    if (useMultiValue) {
+      path += "MultiValue";
+    }
+
+    return path + File.separator + tableName;
   }
 
   static String getTableAvroFilePath(String tableName) {
-    return getTableResourceFolder(tableName) + File.separator
-        + "rawdata" + File.separator + tableName + AVRO_FILE_SUFFIX;
+    return getTableAvroFilePath(tableName, false);
+  }
+
+  static String getTableAvroFilePath(String tableName, boolean multiValue) {
+    String path = getTableResourceFolder(tableName, multiValue);
+    return path + File.separator + "rawdata" + File.separator + tableName + 
AVRO_FILE_SUFFIX;
   }
 }
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/TPCHGeneratedQueryIntegrationTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/TPCHGeneratedQueryIntegrationTest.java
new file mode 100644
index 0000000000..7303ef9d45
--- /dev/null
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/TPCHGeneratedQueryIntegrationTest.java
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests.tpch;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.sql.ResultSet;
+import java.sql.SQLTimeoutException;
+import java.sql.Statement;
+import java.util.Collections;
+import java.util.Objects;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.client.Connection;
+import org.apache.pinot.client.ResultSetGroup;
+import org.apache.pinot.integration.tests.BaseClusterIntegrationTest;
+import org.apache.pinot.integration.tests.ClusterIntegrationTestUtils;
+import 
org.apache.pinot.integration.tests.tpch.generator.PinotQueryBasedColumnDataProvider;
+import 
org.apache.pinot.integration.tests.tpch.generator.SampleColumnDataProvider;
+import org.apache.pinot.integration.tests.tpch.generator.TPCHQueryGeneratorV2;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.tools.utils.JarUtils;
+import org.apache.pinot.util.TestUtils;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+
+/**
+ * Integration test that tests Pinot using TPCH data.
+ * Data is loaded into Pinot and H2 from /resources/examples/batch/tpch. The 
dataset size is very small, please follow
+ * REAME.md to generate a larger dataset for better testing.
+ * Queries are executed against Pinot and H2, and the results are compared.
+ */
+public class TPCHGeneratedQueryIntegrationTest extends 
BaseClusterIntegrationTest {
+  private static final int NUM_TPCH_QUERIES = 1000;
+  private static TPCHQueryGeneratorV2 _tpchQueryGeneratorV2;
+  private static final Boolean USE_MULTI_VALUE = false;
+
+  @BeforeClass
+  public void setUp()
+      throws Exception {
+    TestUtils.ensureDirectoriesExistAndEmpty(_tempDir, _segmentDir, _tarDir);
+
+    // Start the Pinot cluster
+    startZk();
+    startController();
+    startBroker();
+    startServer();
+
+    setUpH2Connection();
+    for (String tableName : Constants.TPCH_TABLE_NAMES) {
+      File tableSegmentDir = new File(_segmentDir, tableName);
+      File tarDir = new File(_tarDir, tableName);
+      String tableResourceFolder = Constants.getTableResourceFolder(tableName, 
USE_MULTI_VALUE);
+      URL resourceUrl = 
getClass().getClassLoader().getResource(tableResourceFolder);
+      Assert.assertNotNull(resourceUrl, "Unable to find resource from: " + 
tableResourceFolder);
+      File resourceFile;
+      if ("jar".equals(resourceUrl.getProtocol())) {
+        String[] splits = resourceUrl.getFile().split("!");
+        File tempUnpackDir = new File(_tempDir.getAbsolutePath() + 
File.separator + splits[1]);
+        TestUtils.ensureDirectoriesExistAndEmpty(tempUnpackDir);
+        JarUtils.copyResourcesToDirectory(splits[0], splits[1].substring(1), 
tempUnpackDir.getAbsolutePath());
+        resourceFile = tempUnpackDir;
+      } else {
+        resourceFile = new File(resourceUrl.getFile());
+      }
+      File dataFile =
+          new File(Objects.requireNonNull(
+                  
getClass().getClassLoader().getResource(Constants.getTableAvroFilePath(tableName,
 USE_MULTI_VALUE)))
+              .getFile());
+      Assert.assertTrue(dataFile.exists(), "Unable to load resource file from 
URL: " + dataFile);
+      File schemaFile = new File(resourceFile.getPath(), tableName + 
"_schema.json");
+      File tableFile = new File(resourceFile.getPath(), tableName + 
"_offline_table_config.json");
+      // Pinot
+      TestUtils.ensureDirectoriesExistAndEmpty(tableSegmentDir, tarDir);
+      Schema schema = createSchema(schemaFile);
+      addSchema(schema);
+      TableConfig tableConfig = createTableConfig(tableFile);
+      addTableConfig(tableConfig);
+      
ClusterIntegrationTestUtils.buildSegmentsFromAvro(Collections.singletonList(dataFile),
 tableConfig, schema, 0,
+          tableSegmentDir, tarDir);
+      uploadSegments(tableName, tarDir);
+      // H2
+      
ClusterIntegrationTestUtils.setUpH2TableWithAvro(Collections.singletonList(dataFile),
 tableName, _h2Connection);
+    }
+
+    SampleColumnDataProvider sampleColumnDataProvider =
+        new PinotQueryBasedColumnDataProvider(new 
PinotQueryBasedColumnDataProvider.PinotConnectionProvider() {
+          @Override
+          public Connection getConnection() {
+            return getPinotConnection();
+          }
+        });
+    _tpchQueryGeneratorV2 = new TPCHQueryGeneratorV2(sampleColumnDataProvider);
+    _tpchQueryGeneratorV2.init();
+  }
+
+  @Test(dataProvider = "QueryDataProvider", enabled = false)
+  public void testTPCHQueries(String[] pinotAndH2Queries)
+      throws Exception {
+    testQueriesSucceed(pinotAndH2Queries[1], pinotAndH2Queries[0]);
+  }
+
+  protected long testQueriesSucceed(String pinotQuery, String h2Query)
+      throws Exception {
+    ResultSetGroup pinotResultSetGroup = 
getPinotConnection().execute(pinotQuery);
+    org.apache.pinot.client.ResultSet resultTableResultSet = 
pinotResultSetGroup.getResultSet(0);
+    if (CollectionUtils.isNotEmpty(pinotResultSetGroup.getExceptions())) {
+      Assert.fail(pinotResultSetGroup.getExceptions().get(0).toString());
+    }
+
+    int numRows = resultTableResultSet.getRowCount();
+    int numColumns = resultTableResultSet.getColumnCount();
+
+    // h2 response
+    Assert.assertNotNull(_h2Connection);
+    Statement h2statement = 
_h2Connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, 
ResultSet.CONCUR_READ_ONLY);
+    h2statement.setQueryTimeout(10);
+
+    try {
+      h2statement.execute(h2Query);
+    } catch (SQLTimeoutException e) {
+      Assert.fail("H2 query timed out!");
+    }
+    ResultSet h2ResultSet = h2statement.getResultSet();
+
+    // compare results.
+    Assert.assertEquals(numColumns, 
h2ResultSet.getMetaData().getColumnCount());
+    if (h2ResultSet.first()) {
+      for (int i = 0; i < numRows; i++) {
+        for (int c = 0; c < numColumns; c++) {
+          String h2Value = h2ResultSet.getString(c + 1);
+          String pinotValue = resultTableResultSet.getString(i, c);
+          boolean error = ClusterIntegrationTestUtils.fuzzyCompare(h2Value, 
pinotValue, pinotValue);
+          if (error) {
+            Assert.fail(
+                String.format("Value: %d does not match at (%d, %d), expected 
h2 value: %s actual Pinot value: %s", c,
+                    i, c, h2Value, pinotValue));
+          }
+        }
+        if (!h2ResultSet.next() && i != numRows - 1) {
+          Assert.fail(String.format("H2 result set is smaller than Pinot 
result set after: %d rows", i));
+        }
+      }
+    }
+    Assert.assertFalse(h2ResultSet.next(),
+        String.format("Pinot result set is smaller than H2 result set after: 
%d rows!", numRows));
+
+    return numRows;
+  }
+
+  @Override
+  protected long getCurrentCountStarResult() {
+    return getPinotConnection().execute("SELECT COUNT(*) FROM 
orders").getResultSet(0).getLong(0);
+  }
+
+  @Override
+  protected long getCountStarResult() {
+    return 9999L;
+  }
+
+  @Override
+  protected boolean useMultiStageQueryEngine() {
+    return true;
+  }
+
+  @AfterClass
+  public void tearDown()
+      throws Exception {
+    // unload all TPCH tables.
+    for (String table : Constants.TPCH_TABLE_NAMES) {
+      dropOfflineTable(table);
+    }
+
+    // stop components and clean up
+    stopServer();
+    stopBroker();
+    stopController();
+    stopZk();
+
+    FileUtils.deleteDirectory(_tempDir);
+  }
+
+  @DataProvider(name = "QueryDataProvider")
+  public static Object[][] queryDataProvider()
+      throws IOException {
+    Object[][] queries = new Object[NUM_TPCH_QUERIES][];
+    for (int i = 0; i < NUM_TPCH_QUERIES; i++) {
+      queries[i] = new Object[2];
+      String query = _tpchQueryGeneratorV2.generateRandomQuery();
+      queries[i][0] = query;
+      queries[i][1] = query;
+    }
+
+    return queries;
+  }
+}
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/TblToAvroMultiValue.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/TblToAvroMultiValue.java
new file mode 100644
index 0000000000..014e285bd4
--- /dev/null
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/TblToAvroMultiValue.java
@@ -0,0 +1,220 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests.tpch;
+
+import com.google.common.collect.ImmutableMap;
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.Map;
+import org.apache.avro.Schema;
+import org.apache.avro.SchemaBuilder;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumWriter;
+
+
+public class TblToAvroMultiValue {
+  private static final String LONG_TYPE = "long";
+  private static final String DOUBLE_TYPE = "double";
+  private static final String STRING_TYPE = "string";
+  private static final String FOLDER_PATH = 
"/Users/saurabh.dubey/Documents/workspace/stuff/tpch/data/s001/";
+  private static final String TBL_FILE_SUFFIX = ".tbl";
+  private static final Map<String, String[]> TABLE_NAME_FIELDS_MAP = 
ImmutableMap.of(
+      // One example record:
+      // {"c_custkey":{"long":1},"c_name":{"string":"Customer#000000001"},
+      // "c_address":{"string":"IVhzIApeRb ot,c,E"},"c_nationkey":{"long":15},
+      // 
"c_phone":{"string":"25-989-741-2988"},"c_acctbal":{"double":711.56},"c_mktsegment":{"string":"BUILDING"},
+      // "c_comment":{"string":"to the even, regular platelets. regular, 
ironic epitaphs nag e"}}
+      "customer", new String[]{
+          "c_custkey", LONG_TYPE, "false", "c_name", STRING_TYPE, "true", 
"c_address", STRING_TYPE, "true",
+          "c_nationkey", LONG_TYPE, "false", "c_phone", STRING_TYPE, "false", 
"c_acctbal", DOUBLE_TYPE, "false",
+          "c_mktsegment", STRING_TYPE, "true", "c_comment", STRING_TYPE, 
"false"
+      },
+      // One example record:
+      // 
{"l_orderkey":{"long":600000},"l_partkey":{"long":12916},"l_suppkey":{"long":917},
+      // 
"l_linenumber":{"long":2},"l_quantity":{"long":1},"l_extendedprice":{"double":1828.91},
+      // 
"l_discount":{"double":0.03},"l_tax":{"double":0.0},"l_returnflag":{"string":"N"},
+      // 
"l_linestatus":{"string":"O"},"l_shipdate":{"string":"1998-04-13"},"l_commitdate":{"string":"1998-05-24"},
+      // 
"l_receiptdate":{"string":"1998-04-30"},"l_shipinstruct":{"string":"DELIVER IN 
PERSON"},
+      // "l_shipmode":{"string":"RAIL"},"l_comment":{"string":" wake braids. 
"}}
+      "lineitem", new String[]{
+          "l_orderkey", LONG_TYPE, "false", "l_partkey", LONG_TYPE, "false", 
"l_suppkey", LONG_TYPE, "false",
+          "l_linenumber", LONG_TYPE, "true", "l_quantity", LONG_TYPE, "true", 
"l_extendedprice", DOUBLE_TYPE, "true",
+          "l_discount", DOUBLE_TYPE, "true", "l_tax", DOUBLE_TYPE, "false", 
"l_returnflag", STRING_TYPE, "false",
+          "l_linestatus", STRING_TYPE, "false", "l_shipdate", STRING_TYPE, 
"false", "l_commitdate", STRING_TYPE,
+          "false", "l_receiptdate", STRING_TYPE, "false", "l_shipinstruct", 
STRING_TYPE, "false", "l_shipmode",
+          STRING_TYPE, "false", "l_comment", STRING_TYPE, "false"
+      },
+      // One example record:
+      // 
{"n_nationkey":{"long":0},"n_name":{"string":"ALGERIA"},"n_regionkey":{"long":0},
+      // "n_comment":{"string":" haggle. carefully final deposits detect slyly 
agai"}}
+      "nation", new String[]{
+          "n_nationkey", LONG_TYPE, "false", "n_name", STRING_TYPE, "true", 
"n_regionkey", LONG_TYPE, "false",
+          "n_comment", STRING_TYPE, "false"
+      },
+      // One example record:
+      // 
{"o_orderkey":{"long":1},"o_custkey":{"long":370},"o_orderstatus":{"string":"O"},
+      // 
"o_totalprice":{"double":173665.47},"o_orderdate":{"string":"1996-01-02"},
+      // 
"o_orderpriority":{"string":"5-LOW"},"o_clerk":{"string":"Clerk#000000951"},"o_shippriority":{"long":0},
+      // "o_comment":{"string":"ly final dependencies: slyly bold "}}
+      "orders", new String[]{
+          "o_orderkey", LONG_TYPE, "false", "o_custkey", LONG_TYPE, "false", 
"o_orderstatus", STRING_TYPE, "true",
+          "o_totalprice", DOUBLE_TYPE, "true", "o_orderdate", STRING_TYPE, 
"false", "o_orderpriority", STRING_TYPE,
+          "false", "o_clerk", STRING_TYPE, "true", "o_shippriority", 
LONG_TYPE, "true", "o_comment", STRING_TYPE, "true"
+      },
+      // One example record:
+      // {"p_partkey":{"long":1},"p_name":{"string":"goldenrod lavender spring 
chocolate lace"},
+      // "p_mfgr":{"string":"Manufacturer#1"},"p_brand":{"string":"Brand#13"},
+      // "p_type":{"string":"PROMO BRUSHED 
STEEL"},"p_size":{"long":7},"p_container":{"string":"JUMBO PKG"},
+      // "p_retailprice":{"double":901.0},"p_comment":{"string":"ly final 
dependencies: slyly bold "}}
+      "part", new String[]{
+          "p_partkey", LONG_TYPE, "false", "p_name", STRING_TYPE, "true", 
"p_mfgr", STRING_TYPE, "true", "p_brand",
+          STRING_TYPE, "true", "p_type", STRING_TYPE, "true", "p_size", 
LONG_TYPE, "true", "p_container", STRING_TYPE,
+          "true", "p_retailprice", DOUBLE_TYPE, "true", "p_comment", 
STRING_TYPE, "true"
+      },
+      // One example record:
+      // 
{"ps_partkey":{"long":1},"ps_suppkey":{"long":2},"ps_availqty":{"long":997},
+      // "ps_supplycost":{"double":6.02},"ps_comment":{"string":"ly final 
dependencies: slyly bold "}}
+      "partsupp", new String[]{
+          "ps_partkey", LONG_TYPE, "false", "ps_suppkey", LONG_TYPE, "false", 
"ps_availqty", LONG_TYPE, "true",
+          "ps_supplycost", DOUBLE_TYPE, "true", "ps_comment", STRING_TYPE, 
"true"
+      },
+      // One example record:
+      // 
{"r_regionkey":{"long":0},"r_name":{"string":"AFRICA"},"r_comment":{"string":"lar
 deposits. blithe"}}
+      "region", new String[]{
+          "r_regionkey", LONG_TYPE, "false", "r_name", STRING_TYPE, "true", 
"r_comment", STRING_TYPE, "false"
+      },
+      // One example record:
+      // {"s_suppkey":{"long":1},"s_name":{"string":"Supplier#000000001"},
+      // "s_address":{"string":" N kD4on9OM 
Ipw3,gf0JBoQDd7tgrzrddZ"},"s_nationkey":{"long":15},
+      // "s_phone":{"string":"25-989-741-2988"},"s_acctbal":{"double":711.56},
+      // "s_comment":{"string":" deposits eat slyly ironic, even instructions. 
express foxes detect slyly.
+      // blithely even accounts abov"}}
+      "supplier", new String[]{
+          "s_suppkey", LONG_TYPE, "false", "s_name", STRING_TYPE, "true", 
"s_address", STRING_TYPE, "true",
+          "s_nationkey", LONG_TYPE, "false", "s_phone", STRING_TYPE, "true", 
"s_acctbal", DOUBLE_TYPE, "false",
+          "s_comment", STRING_TYPE, "false"
+      });
+
+  private TblToAvroMultiValue() {
+  }
+
+  public static void main(String[] args)
+      throws IOException {
+//    String tpchTblFileFolder = args[0];
+    String tpchTblFileFolder = FOLDER_PATH;
+    for (Map.Entry<String, String[]> entry : TABLE_NAME_FIELDS_MAP.entrySet()) 
{
+      String tableName = entry.getKey();
+      String[] fieldsAndTypes = entry.getValue();
+      SchemaBuilder.FieldAssembler<Schema> schemaFields = 
SchemaBuilder.record(tableName).fields();
+      for (int i = 0; i < fieldsAndTypes.length; i += 3) {
+        String fieldName = fieldsAndTypes[i];
+        String fieldType = fieldsAndTypes[i + 1];
+        Boolean isMultiValue = Boolean.parseBoolean(fieldsAndTypes[i + 2]);
+        switch (fieldType) {
+          case LONG_TYPE:
+            if (isMultiValue) {
+              
schemaFields.name(fieldName).type().array().items().longType().noDefault();
+            } else {
+              
schemaFields.name(fieldName).type().unionOf().longType().and().nullType().endUnion().noDefault();
+            }
+            break;
+          case DOUBLE_TYPE:
+            if (isMultiValue) {
+              
schemaFields.name(fieldName).type().array().items().doubleType().noDefault();
+            } else {
+              
schemaFields.name(fieldName).type().unionOf().doubleType().and().nullType().endUnion().noDefault();
+            }
+            break;
+          case STRING_TYPE:
+            if (isMultiValue) {
+              
schemaFields.name(fieldName).type().array().items().stringType().noDefault();
+            } else {
+              
schemaFields.name(fieldName).type().unionOf().stringType().and().nullType().endUnion().noDefault();
+            }
+
+            break;
+          default:
+            throw new IllegalStateException("Unsupported field type: " + 
fieldType);
+        }
+      }
+      Schema schema = schemaFields.endRecord();
+
+      // Open Avro data file for writing
+      DatumWriter<GenericRecord> datumWriter = new 
GenericDatumWriter<>(schema);
+      DataFileWriter<GenericRecord> dataFileWriter = new 
DataFileWriter<>(datumWriter);
+      Path avroFilePath = Paths.get(tpchTblFileFolder + tableName + 
"_multivalue" + Constants.AVRO_FILE_SUFFIX);
+      Files.deleteIfExists(avroFilePath);
+      OutputStream outputStream = Files.newOutputStream(avroFilePath);
+      dataFileWriter.create(schema, outputStream);
+
+      // Read TPC-H .tbl files and convert to Avro format
+      try (
+          BufferedReader reader = new BufferedReader(new 
FileReader(tpchTblFileFolder + tableName + TBL_FILE_SUFFIX))) {
+        String line;
+        while ((line = reader.readLine()) != null) {
+          String[] fields = line.split("\\|");
+          GenericRecord record = new GenericData.Record(schema);
+          for (int i = 0; i < fields.length; i++) {
+            String fieldName = fieldsAndTypes[3 * i];
+            String fieldType = fieldsAndTypes[3 * i + 1];
+            Boolean isMultiValue = Boolean.parseBoolean(fieldsAndTypes[3 * i + 
2]);
+            switch (fieldType) {
+              case LONG_TYPE:
+                if (isMultiValue) {
+                  record.put(fieldName, 
Arrays.asList(Long.parseLong(fields[i])));
+                } else {
+                  record.put(fieldName, Long.parseLong(fields[i]));
+                }
+                break;
+              case DOUBLE_TYPE:
+                if (isMultiValue) {
+                  record.put(fieldName, 
Arrays.asList(Double.parseDouble(fields[i])));
+                } else {
+                  record.put(fieldName, Double.parseDouble(fields[i]));
+                }
+                break;
+              case STRING_TYPE:
+                if (isMultiValue) {
+                  record.put(fieldName, Arrays.asList(fields[i]));
+                } else {
+                  record.put(fieldName, fields[i]);
+                }
+                break;
+              default:
+                throw new IllegalStateException("Unsupported field type: " + 
fieldType);
+            }
+          }
+          dataFileWriter.append(record);
+        }
+      }
+
+      // Close Avro data file
+      dataFileWriter.close();
+    }
+  }
+}
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/Column.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/Column.java
new file mode 100644
index 0000000000..549136ef50
--- /dev/null
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/Column.java
@@ -0,0 +1,97 @@
+package org.apache.pinot.integration.tests.tpch.generator;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.UUID;
+
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+public class Column {
+  private static final Random RANDOM = new Random();
+  private final String _columnName;
+  private final ColumnType _columnType;
+  private List<String> _sampleValues;
+  private boolean _isMultiValue;
+
+  public Column(String columnName, ColumnType columnType) {
+    _columnName = columnName;
+    _columnType = columnType;
+    _sampleValues = new ArrayList<>();
+    generateSampleValues();
+  }
+
+  private void generateSampleValues() {
+    switch (_columnType) {
+      case STRING:
+        for (int i = 0; i < 10; i++) {
+          _sampleValues.add(UUID.randomUUID().toString());
+        }
+        break;
+      case NUMERIC:
+        for (int i = 0; i < 10; i++) {
+          _sampleValues.add(String.valueOf(RANDOM.nextInt(1000)));
+        }
+        break;
+      default:
+        throw new IllegalStateException("Unsupported column type: " + 
_columnType);
+    }
+  }
+
+  public boolean isMultiValue() {
+    return _isMultiValue;
+  }
+
+  public void setMultiValue(boolean isMultiValue) {
+    _isMultiValue = isMultiValue;
+  }
+
+  public void setSampleValues(List<String> sampleValues) {
+    _sampleValues = sampleValues;
+  }
+
+  public String getColumnName() {
+    return _columnName;
+  }
+
+  public String getColumnNameForPredicate(String prefix) {
+    return prefix + _columnName;
+  }
+
+  public String getColumnNameForPinotPredicate(String prefix) {
+    if (isMultiValue()) {
+      return "arrayToMv(" + prefix + _columnName + ")";
+    } else {
+      return prefix + _columnName;
+    }
+  }
+
+  public ColumnType getColumnType() {
+    return _columnType;
+  }
+
+  public String getRandomStringValue() {
+    return _sampleValues.get(RANDOM.nextInt(_sampleValues.size()));
+  }
+
+  public long getRandomNumericValue() {
+    return (long) Double.parseDouble(getRandomStringValue());
+  }
+}
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/ColumnType.java
similarity index 51%
copy from 
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
copy to 
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/ColumnType.java
index 7fbae29c08..9783446959 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/ColumnType.java
@@ -1,3 +1,8 @@
+package org.apache.pinot.integration.tests.tpch.generator;
+
+import java.util.List;
+
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
@@ -16,29 +21,15 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.pinot.integration.tests.tpch;
-
-import java.io.File;
-
-
-/**
- * Constants used in TPCH integration tests.
- */
-public final class Constants {
-  static final String[] TPCH_TABLE_NAMES = {
-      "customer", "lineitem", "nation", "orders", "part", "partsupp", 
"region", "supplier"
-  };
-  static final String AVRO_FILE_SUFFIX = ".avro";
-  private static final String TPCH_TABLE_RESOURCE_FOLDER_PREFIX = 
"examples/batch/tpch/";
-  private Constants() {
-  }
+public enum ColumnType {
+  NUMERIC(List.of("=", "!=", "<", ">", "<=", ">=", "<>"), List.of("max", 
"min", "avg", "sum", "count")),
+  STRING(List.of("=", "!=", "LIKE"), List.of());
 
-  static String getTableResourceFolder(String tableName) {
-    return TPCH_TABLE_RESOURCE_FOLDER_PREFIX + tableName;
-  }
+  List<String> _operators;
+  List<String> _aggregations;
 
-  static String getTableAvroFilePath(String tableName) {
-    return getTableResourceFolder(tableName) + File.separator
-        + "rawdata" + File.separator + tableName + AVRO_FILE_SUFFIX;
+  ColumnType(List<String> operators, List<String> aggregateFunctions) {
+    _operators = operators;
+    _aggregations = aggregateFunctions;
   }
 }
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/PinotQueryBasedColumnDataProvider.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/PinotQueryBasedColumnDataProvider.java
new file mode 100644
index 0000000000..15f66557d2
--- /dev/null
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/PinotQueryBasedColumnDataProvider.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests.tpch.generator;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.pinot.client.ResultSet;
+import org.codehaus.jettison.json.JSONArray;
+import org.codehaus.jettison.json.JSONException;
+
+
+public class PinotQueryBasedColumnDataProvider implements 
SampleColumnDataProvider {
+  public interface PinotConnectionProvider {
+    org.apache.pinot.client.Connection getConnection();
+  }
+
+  private static final String COUNT_START_QUERY_FORMAT = "SELECT COUNT(*) FROM 
%s";
+  private static final String QUERY_FORMAT = "SELECT %s FROM %s WHERE $docId 
IN (%s)";
+  private final PinotConnectionProvider _pinotConnectionProvider;
+
+  public PinotQueryBasedColumnDataProvider(PinotConnectionProvider 
connectionProvider) {
+    _pinotConnectionProvider = connectionProvider;
+  }
+
+  @Override
+  public Pair<Boolean, List<String>> getSampleValues(String tableName, String 
columnName)
+      throws JSONException {
+    String countStarQuery = String.format(COUNT_START_QUERY_FORMAT, tableName);
+    boolean isMultiValue = false;
+    int count = 
_pinotConnectionProvider.getConnection().execute(countStarQuery).getResultSet(0).getInt(0);
+
+    StringBuilder randomDocIds = new StringBuilder();
+
+    for (int i = 0; i < 10; i++) {
+      randomDocIds.append((int) (Math.random() * count));
+      if (i != 9) {
+        randomDocIds.append(", ");
+      }
+    }
+
+    String query = String.format(QUERY_FORMAT, columnName, tableName, 
randomDocIds);
+
+    List<String> columnValues = new ArrayList<>();
+    ResultSet resultSet = 
_pinotConnectionProvider.getConnection().execute(query).getResultSet(0);
+
+    for (int i = 0; i < resultSet.getRowCount(); i++) {
+      if (resultSet.getColumnDataType(0).contains("ARRAY")) {
+        String array = resultSet.getString(i, 0);
+        JSONArray jsnobject = new JSONArray(array);
+        columnValues.add(jsnobject.get(0).toString());
+        isMultiValue = true;
+      } else {
+        columnValues.add(resultSet.getString(i, 0));
+      }
+    }
+
+    return Pair.of(isMultiValue, columnValues);
+  }
+}
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/QuerySkeleton.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/QuerySkeleton.java
new file mode 100644
index 0000000000..067505aa97
--- /dev/null
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/QuerySkeleton.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests.tpch.generator;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+public class QuerySkeleton {
+  private final List<String> _projections;
+  private final List<String> _predicates;
+  private final List<String> _groupByColumns;
+  private final List<String> _orderByColumns;
+  private final List<String> _tables;
+
+  public QuerySkeleton() {
+    _projections = new ArrayList<>();
+    _predicates = new ArrayList<>();
+    _groupByColumns = new ArrayList<>();
+    _orderByColumns = new ArrayList<>();
+    _tables = new ArrayList<>();
+  }
+
+  public void addTable(String table) {
+    _tables.add(table);
+  }
+
+  public void addProjection(String projection) {
+    _projections.add(projection);
+  }
+
+  public QuerySkeleton addPredicate(String predicate) {
+    _predicates.add(predicate);
+    return this;
+  }
+
+  public QuerySkeleton addGroupByColumn(String groupByColumn) {
+    _groupByColumns.add(groupByColumn);
+    return this;
+  }
+
+  public QuerySkeleton addOrderByColumn(String orderByColumn) {
+    _orderByColumns.add(orderByColumn);
+    return this;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder query = new StringBuilder();
+    query.append("SELECT ");
+    _projections.forEach(predicate -> query.append(predicate).append(", "));
+    query.delete(query.length() - 2, query.length());
+
+    query.append(" FROM ");
+    _tables.forEach(table -> query.append(table).append(", "));
+    query.delete(query.length() - 2, query.length());
+
+    if (_predicates.size() > 0) {
+      query.append(" WHERE ");
+      _predicates.forEach(predicate -> query.append(predicate).append(" AND 
"));
+      query.delete(query.length() - 5, query.length());
+    }
+
+    if (_groupByColumns.size() > 0) {
+      query.append(" GROUP BY ");
+      _groupByColumns.forEach(groupByColumn -> 
query.append(groupByColumn).append(", "));
+      query.delete(query.length() - 2, query.length());
+    }
+
+    if (_orderByColumns.size() > 0) {
+      query.append(" ORDER BY ");
+      _orderByColumns.forEach(orderByColumn -> 
query.append(orderByColumn).append(", "));
+      query.delete(query.length() - 2, query.length());
+    }
+
+    return query.toString();
+  }
+}
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/RelatedTable.java
similarity index 52%
copy from 
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
copy to 
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/RelatedTable.java
index 7fbae29c08..6796f3a283 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/RelatedTable.java
@@ -1,3 +1,5 @@
+package org.apache.pinot.integration.tests.tpch.generator;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
@@ -16,29 +18,26 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.pinot.integration.tests.tpch;
-
-import java.io.File;
+class RelatedTable {
+  private final String _foreignTableName;
+  private final String _foreignTableKey;
+  private final String _localTableKey;
 
+  public RelatedTable(String foreignTableName, String foreignTableKey, String 
localTableKey) {
+    _foreignTableName = foreignTableName;
+    _foreignTableKey = foreignTableKey;
+    _localTableKey = localTableKey;
+  }
 
-/**
- * Constants used in TPCH integration tests.
- */
-public final class Constants {
-  static final String[] TPCH_TABLE_NAMES = {
-      "customer", "lineitem", "nation", "orders", "part", "partsupp", 
"region", "supplier"
-  };
-  static final String AVRO_FILE_SUFFIX = ".avro";
-  private static final String TPCH_TABLE_RESOURCE_FOLDER_PREFIX = 
"examples/batch/tpch/";
-  private Constants() {
+  public String getForeignTableName() {
+    return _foreignTableName;
   }
 
-  static String getTableResourceFolder(String tableName) {
-    return TPCH_TABLE_RESOURCE_FOLDER_PREFIX + tableName;
+  public String getForeignTableKey() {
+    return _foreignTableKey;
   }
 
-  static String getTableAvroFilePath(String tableName) {
-    return getTableResourceFolder(tableName) + File.separator
-        + "rawdata" + File.separator + tableName + AVRO_FILE_SUFFIX;
+  public String getLocalTableKey() {
+    return _localTableKey;
   }
 }
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/SampleColumnDataProvider.java
similarity index 51%
copy from 
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
copy to 
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/SampleColumnDataProvider.java
index 7fbae29c08..a322bc6680 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/SampleColumnDataProvider.java
@@ -16,29 +16,14 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.pinot.integration.tests.tpch;
+package org.apache.pinot.integration.tests.tpch.generator;
 
-import java.io.File;
+import java.util.List;
+import org.apache.commons.lang3.tuple.Pair;
+import org.codehaus.jettison.json.JSONException;
 
 
-/**
- * Constants used in TPCH integration tests.
- */
-public final class Constants {
-  static final String[] TPCH_TABLE_NAMES = {
-      "customer", "lineitem", "nation", "orders", "part", "partsupp", 
"region", "supplier"
-  };
-  static final String AVRO_FILE_SUFFIX = ".avro";
-  private static final String TPCH_TABLE_RESOURCE_FOLDER_PREFIX = 
"examples/batch/tpch/";
-  private Constants() {
-  }
-
-  static String getTableResourceFolder(String tableName) {
-    return TPCH_TABLE_RESOURCE_FOLDER_PREFIX + tableName;
-  }
-
-  static String getTableAvroFilePath(String tableName) {
-    return getTableResourceFolder(tableName) + File.separator
-        + "rawdata" + File.separator + tableName + AVRO_FILE_SUFFIX;
-  }
+public interface SampleColumnDataProvider {
+  Pair<Boolean, List<String>> getSampleValues(String tableName, String 
columnName)
+      throws JSONException;
 }
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/TPCHQueryGeneratorV2.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/TPCHQueryGeneratorV2.java
new file mode 100644
index 0000000000..678dee8a3e
--- /dev/null
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/TPCHQueryGeneratorV2.java
@@ -0,0 +1,553 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests.tpch.generator;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import org.apache.commons.lang3.tuple.Pair;
+import org.codehaus.jettison.json.JSONException;
+
+
+public class TPCHQueryGeneratorV2 {
+  private static final Map<String, Table> TABLES_MAP = new HashMap<>();
+  private static final List<String> TABLE_NAMES =
+      List.of("nation", "region", "supplier", "customer", "part", "partsupp", 
"orders", "lineitem");
+  private static final String[] JOIN_TYPES = {
+      "INNER JOIN", "LEFT JOIN", "RIGHT JOIN"
+  };
+  private final SampleColumnDataProvider _sampleColumnDataProvider;
+  private final Random _random = new Random();
+
+  public TPCHQueryGeneratorV2() {
+    _sampleColumnDataProvider = null;
+  }
+
+  public TPCHQueryGeneratorV2(SampleColumnDataProvider 
sampleColumnDataProvider) {
+    _sampleColumnDataProvider = sampleColumnDataProvider;
+  }
+
+  private static Table getRandomTable() {
+    Random random = new Random();
+    int index = random.nextInt(TABLES_MAP.size());
+    return TABLES_MAP.get(TABLE_NAMES.get(index));
+  }
+
+  private void addRelation(String t1, String t2, String t1Key, String t2Key) {
+    TABLES_MAP.get(t1).addRelation(t2, t2Key, t1Key);
+    TABLES_MAP.get(t2).addRelation(t1, t1Key, t2Key);
+  }
+
+  public void init() {
+    TABLES_MAP.put("nation", new Table("nation",
+        List.of(new Column("n_nationkey", ColumnType.NUMERIC), new 
Column("n_name", ColumnType.STRING),
+            new Column("n_regionkey", ColumnType.NUMERIC), new 
Column("n_comment", ColumnType.STRING))));
+
+    TABLES_MAP.put("region", new Table("region",
+        List.of(new Column("r_regionkey", ColumnType.NUMERIC), new 
Column("r_name", ColumnType.STRING),
+            new Column("r_comment", ColumnType.STRING))));
+
+    TABLES_MAP.put("supplier", new Table("supplier",
+        List.of(new Column("s_suppkey", ColumnType.NUMERIC), new 
Column("s_name", ColumnType.STRING),
+            new Column("s_address", ColumnType.STRING), new 
Column("s_nationkey", ColumnType.NUMERIC),
+            new Column("s_phone", ColumnType.STRING), new Column("s_acctbal", 
ColumnType.NUMERIC),
+            new Column("s_comment", ColumnType.STRING))));
+
+    TABLES_MAP.put("customer", new Table("customer",
+        List.of(new Column("c_custkey", ColumnType.NUMERIC), new 
Column("c_name", ColumnType.STRING),
+            new Column("c_address", ColumnType.STRING), new 
Column("c_nationkey", ColumnType.NUMERIC),
+            new Column("c_phone", ColumnType.STRING), new Column("c_acctbal", 
ColumnType.NUMERIC),
+            new Column("c_mktsegment", ColumnType.STRING), new 
Column("c_comment", ColumnType.STRING))));
+
+    TABLES_MAP.put("part", new Table("part",
+        List.of(new Column("p_partkey", ColumnType.NUMERIC), new 
Column("p_name", ColumnType.STRING),
+            new Column("p_mfgr", ColumnType.STRING), new Column("p_brand", 
ColumnType.STRING),
+            new Column("p_type", ColumnType.STRING), new Column("p_size", 
ColumnType.NUMERIC),
+            new Column("p_container", ColumnType.STRING), new 
Column("p_retailprice", ColumnType.NUMERIC),
+            new Column("p_comment", ColumnType.STRING))));
+
+    TABLES_MAP.put("partsupp", new Table("partsupp",
+        List.of(new Column("ps_partkey", ColumnType.NUMERIC), new 
Column("ps_suppkey", ColumnType.NUMERIC),
+            new Column("ps_availqty", ColumnType.NUMERIC), new 
Column("ps_supplycost", ColumnType.NUMERIC),
+            new Column("ps_comment", ColumnType.STRING))));
+
+    TABLES_MAP.put("orders", new Table("orders",
+        List.of(new Column("o_orderkey", ColumnType.NUMERIC), new 
Column("o_custkey", ColumnType.NUMERIC),
+            new Column("o_orderstatus", ColumnType.STRING), new 
Column("o_totalprice", ColumnType.NUMERIC),
+            new Column("o_orderdate", ColumnType.STRING), new 
Column("o_orderpriority", ColumnType.STRING),
+            new Column("o_clerk", ColumnType.STRING), new 
Column("o_shippriority", ColumnType.STRING),
+            new Column("o_comment", ColumnType.STRING))));
+
+    TABLES_MAP.put("lineitem", new Table("lineitem",
+        List.of(new Column("l_orderkey", ColumnType.NUMERIC), new 
Column("l_partkey", ColumnType.NUMERIC),
+            new Column("l_suppkey", ColumnType.NUMERIC), new 
Column("l_linenumber", ColumnType.NUMERIC),
+            new Column("l_quantity", ColumnType.NUMERIC), new 
Column("l_extendedprice", ColumnType.NUMERIC),
+            new Column("l_discount", ColumnType.NUMERIC), new Column("l_tax", 
ColumnType.NUMERIC),
+            new Column("l_returnflag", ColumnType.STRING), new 
Column("l_linestatus", ColumnType.STRING),
+            new Column("l_shipdate", ColumnType.STRING), new 
Column("l_commitdate", ColumnType.STRING),
+            new Column("l_receiptdate", ColumnType.STRING), new 
Column("l_shipinstruct", ColumnType.STRING),
+            new Column("l_shipmode", ColumnType.STRING), new 
Column("l_comment", ColumnType.STRING))));
+
+    addRelation("nation", "region", "n_regionkey", "r_regionkey");
+    addRelation("supplier", "nation", "s_nationkey", "n_nationkey");
+    addRelation("supplier", "customer", "s_nationkey", "c_nationkey");
+    addRelation("supplier", "partsupp", "s_suppkey", "ps_suppkey");
+    addRelation("customer", "nation", "c_nationkey", "n_nationkey");
+    addRelation("orders", "customer", "o_custkey", "c_custkey");
+    addRelation("lineitem", "orders", "l_orderkey", "o_orderkey");
+    addRelation("lineitem", "part", "l_partkey", "p_partkey");
+    addRelation("lineitem", "supplier", "l_suppkey", "s_suppkey");
+    addRelation("lineitem", "partsupp", "l_partkey", "ps_partkey");
+    addRelation("lineitem", "partsupp", "l_suppkey", "ps_partkey");
+    addRelation("part", "partsupp", "p_partkey", "ps_partkey");
+
+    if (_sampleColumnDataProvider != null) {
+      TABLES_MAP.forEach((tableName, table) -> {
+        table.getColumns().forEach(column -> {
+          Pair<Boolean, List<String>> sampleValues = null;
+          try {
+            sampleValues = 
_sampleColumnDataProvider.getSampleValues(tableName, column.getColumnName());
+          } catch (JSONException e) {
+            throw new RuntimeException(e);
+          }
+          column.setSampleValues(sampleValues.getRight());
+          column.setMultiValue(sampleValues.getLeft());
+        });
+      });
+    }
+  }
+
+  private List<String> getRandomProjections(Table t1) {
+    Random random = new Random();
+    int numColumns = random.nextInt(t1.getColumns().size()) + 1;
+    List<String> selectedColumns = new ArrayList<>();
+
+    while (selectedColumns.size() < numColumns) {
+      String columnName = 
t1.getColumns().get(random.nextInt(t1.getColumns().size())).getColumnName();
+      if (!selectedColumns.contains(columnName)) {
+        selectedColumns.add(columnName);
+      }
+    }
+
+    return selectedColumns;
+  }
+
+  private String generateInnerQueryForPredicate(Table t1, Column c) {
+    QuerySkeleton innerQuery = new QuerySkeleton();
+
+    Random random = new Random();
+    List<String> predicates = new ArrayList<>();
+    innerQuery.addTable(t1.getTableName());
+    // Limit to maximum of 1 join
+    if (random.nextBoolean()) {
+      RelatedTable relatedTable = 
t1.getRelatedTables().get(random.nextInt(t1.getRelatedTables().size()));
+      if (relatedTable != null) {
+        innerQuery.addTable(relatedTable.getForeignTableName());
+        predicates.add(relatedTable.getLocalTableKey() + "=" + 
relatedTable.getForeignTableKey());
+
+        List<String> inp = 
getRandomPredicates(TABLES_MAP.get(relatedTable.getForeignTableName()), false);
+        predicates.addAll(inp);
+      }
+    }
+    String aggregation = 
c.getColumnType()._aggregations.get(random.nextInt(c.getColumnType()._aggregations.size()));
+    innerQuery.addProjection(aggregation + "(" + c.getColumnName() + ")");
+
+    List<String> inp = getRandomPredicates(t1, false);
+
+    predicates.addAll(inp);
+
+    predicates.forEach(innerQuery::addPredicate);
+    return innerQuery.toString();
+  }
+
+  private String getRandomValueForPredicate(Table t1, Column c, boolean 
useNextedQueries) {
+    Random random = new Random();
+    if (random.nextBoolean() && useNextedQueries && 
c.getColumnType()._aggregations.size() > 0) {
+      // Use nested query for predicate
+      String nestedQueries = generateInnerQueryForPredicate(t1, c);
+      return "(" + nestedQueries + ")";
+    } else {
+      if (c.getColumnType() == ColumnType.STRING) {
+        return "'" + c.getRandomStringValue() + "'";
+      } else {
+        return String.valueOf(c.getRandomNumericValue());
+      }
+    }
+  }
+
+  private List<String> getRandomPredicates(Table t1, boolean useNestedQueries) 
{
+    Random random = new Random();
+    int predicateCount = random.nextInt(5) + 1;
+    List<String> predicates = new ArrayList<>();
+    List<String> results = new ArrayList<>();
+    while (predicates.size() < predicateCount) {
+      Column column = 
t1.getColumns().get(random.nextInt(t1.getColumns().size()));
+      predicates.add(column.getColumnName());
+      ColumnType columnType = column.getColumnType();
+      String operator = 
columnType._operators.get(random.nextInt(columnType._operators.size()));
+      String value = getRandomValueForPredicate(t1, column, useNestedQueries);
+      String predicateBuilder = column.getColumnName() + " " + operator + " " 
+ value + " ";
+      results.add(predicateBuilder);
+    }
+
+    return results;
+  }
+
+  private List<String> getRandomPredicates(Table t1) {
+    return getRandomPredicates(t1, true);
+  }
+
+  private List<String> getRandomOrderBys(Table t1) {
+    Random random = new Random();
+    int orderByCount = random.nextInt(2) + 1;
+    List<String> orderBys = new ArrayList<>();
+    List<String> results = new ArrayList<>();
+    while (orderBys.size() < orderByCount) {
+      Column column = 
t1.getColumns().get(random.nextInt(t1.getColumns().size()));
+      orderBys.add(column.getColumnName());
+      String name = column.getColumnName();
+      StringBuilder orderByBuilder = new StringBuilder();
+      orderByBuilder.append(name).append(" ");
+      if (random.nextBoolean()) {
+        orderByBuilder.append(" DESC ");
+      }
+      results.add(orderByBuilder.toString());
+    }
+
+    return results;
+  }
+
+  public String generateSelectionOnlyQuery(boolean includePredicates, boolean 
includeOrderBy) {
+    QuerySkeleton querySkeleton = new QuerySkeleton();
+    Table t1 = getRandomTable();
+    querySkeleton.addTable(t1.getTableName());
+
+    getRandomProjections(t1).forEach(querySkeleton::addProjection);
+
+    if (includePredicates) {
+      List<String> predicates = getRandomPredicates(t1);
+      predicates.forEach(querySkeleton::addPredicate);
+    }
+
+    if (includeOrderBy) {
+      getRandomOrderBys(t1).forEach(querySkeleton::addOrderByColumn);
+    }
+
+    return querySkeleton.toString();
+  }
+
+  private List<String> getRandomOrderBys(Table t1, List<String> groupByCols) {
+    List<String> result = new ArrayList<>();
+    if (groupByCols.size() == 0) {
+      return result;
+    }
+    Random random = new Random();
+    List<String> orderBys = new ArrayList<>();
+    int orderByCount = random.nextInt(groupByCols.size()) + 1;
+    while (orderBys.size() < orderByCount) {
+      String column = groupByCols.get(random.nextInt(groupByCols.size()));
+
+      if (groupByCols.contains(column)) {
+        orderBys.add(column);
+        if (random.nextBoolean()) {
+          result.add(column + " DESC");
+        } else {
+          result.add(column);
+        }
+      }
+    }
+
+    return result;
+  }
+
+  public String selectionOnlyWithJoins(boolean includePredicates, boolean 
includeOrderBy) {
+    QuerySkeleton querySkeleton = new QuerySkeleton();
+
+    Table t1;
+    while (true) {
+      t1 = getRandomTable();
+      if (t1.getRelatedTables().size() > 0) {
+        break;
+      }
+    }
+
+    Random random = new Random();
+    RelatedTable rt = 
t1.getRelatedTables().get(random.nextInt(t1.getRelatedTables().size()));
+    Table t2 = TABLES_MAP.get(rt.getForeignTableName());
+    getRandomProjections(t1).forEach(querySkeleton::addProjection);
+    getRandomProjections(t2).forEach(querySkeleton::addProjection);
+
+    String t2NameWithJoin =
+        t1.getTableName() + " " + 
JOIN_TYPES[random.nextInt(JOIN_TYPES.length)] + " " + t2.getTableName() + " ON "
+            + rt.getLocalTableKey() + " = " + rt.getForeignTableKey() + " ";
+    querySkeleton.addTable(t2NameWithJoin);
+
+    if (includePredicates) {
+      List<String> predicates = getRandomPredicates(t1);
+      predicates.forEach(querySkeleton::addPredicate);
+
+      predicates = getRandomPredicates(t2);
+      predicates.forEach(querySkeleton::addPredicate);
+    }
+
+    if (includeOrderBy) {
+      getRandomOrderBys(t1).forEach(querySkeleton::addOrderByColumn);
+    }
+
+    return querySkeleton.toString();
+  }
+
+  private Pair<List<String>, List<String>> getGroupByAndAggregates(Table t1) {
+    Random random = new Random();
+    int numColumns = random.nextInt(t1.getColumns().size()) + 1;
+    List<String> selectedColumns = new ArrayList<>();
+    List<String> groupByColumns = new ArrayList<>();
+    List<String> resultProjections = new ArrayList<>();
+
+    while (selectedColumns.size() < numColumns) {
+      Column column = 
t1.getColumns().get(random.nextInt(t1.getColumns().size()));
+      String columnName = column.getColumnName();
+      if (!selectedColumns.contains(columnName)) {
+        if (random.nextBoolean() && 
column.getColumnType()._aggregations.size() > 0) {
+          // Use as aggregation
+          String aggregation =
+              
column.getColumnType()._aggregations.get(random.nextInt(column.getColumnType()._aggregations.size()));
+          resultProjections.add(aggregation + "(" + columnName + ")");
+        } else {
+          // Use as group by
+          groupByColumns.add(columnName);
+          resultProjections.add(columnName);
+        }
+        selectedColumns.add(columnName);
+      }
+    }
+
+    return Pair.of(resultProjections, groupByColumns);
+  }
+
+  public String selectionOnlyWithGroupBy(boolean includePredicates, boolean 
includeOrderBy) {
+    QuerySkeleton querySkeleton = new QuerySkeleton();
+
+    Table t1 = getRandomTable();
+    Pair<List<String>, List<String>> cols = getGroupByAndAggregates(t1);
+    cols.getLeft().forEach(querySkeleton::addProjection);
+
+    querySkeleton.addTable(t1.getTableName());
+
+    cols.getRight().forEach(querySkeleton::addGroupByColumn);
+    if (includePredicates) {
+      List<String> preds = getRandomPredicates(t1);
+      preds.forEach(querySkeleton::addPredicate);
+    }
+
+    if (includeOrderBy && cols.getRight().size() > 0) {
+      getRandomOrderBys(t1, 
cols.getRight()).forEach(querySkeleton::addOrderByColumn);
+    }
+
+    return querySkeleton.toString();
+  }
+
+  public String selectionOnlyGroupByWithJoins(boolean includePredicates, 
boolean includeOrderBy) {
+    QuerySkeleton querySkeleton = new QuerySkeleton();
+    Table t1;
+    while (true) {
+      t1 = getRandomTable();
+      if (t1.getRelatedTables().size() > 0) {
+        break;
+      }
+    }
+
+    Random random = new Random();
+    RelatedTable rt = 
t1.getRelatedTables().get(random.nextInt(t1.getRelatedTables().size()));
+    Table t2 = TABLES_MAP.get(rt.getForeignTableName());
+    Pair<List<String>, List<String>> groupByColumns = 
getGroupByAndAggregates(t1);
+    groupByColumns.getLeft().forEach(querySkeleton::addProjection);
+
+    Pair<List<String>, List<String>> groupByColumnsT2 = 
getGroupByAndAggregates(t2);
+    groupByColumnsT2.getLeft().forEach(querySkeleton::addProjection);
+
+    String tName =
+        t1.getTableName() + "  " + 
JOIN_TYPES[random.nextInt(JOIN_TYPES.length)] + " " + t2.getTableName() + " ON "
+            + " " + rt.getLocalTableKey() + " = " + " " + 
rt.getForeignTableKey() + " ";
+
+    querySkeleton.addTable(tName);
+
+    groupByColumns.getRight().forEach(querySkeleton::addGroupByColumn);
+    groupByColumnsT2.getRight().forEach(querySkeleton::addGroupByColumn);
+
+    if (includePredicates) {
+      List<String> predicates = getRandomPredicates(t1);
+      predicates.forEach(querySkeleton::addPredicate);
+    }
+
+    if (includeOrderBy) {
+      getRandomOrderBys(t1, 
groupByColumns.getRight()).forEach(querySkeleton::addOrderByColumn);
+      getRandomOrderBys(t2, 
groupByColumnsT2.getRight()).forEach(querySkeleton::addOrderByColumn);
+    }
+
+    return querySkeleton.toString();
+  }
+
+  public String selectionOnlyMultiJoin(boolean includePredicates, boolean 
includeOrderBy) {
+    QuerySkeleton querySkeleton = new QuerySkeleton();
+
+    List<String> predicates = new ArrayList<>();
+    List<Table> tables = new ArrayList<>();
+    Set<String> tableNames = new HashSet<>();
+
+    Random random = new Random();
+
+    // Start off with a random table with related tables
+    while (true) {
+      Table t1 = getRandomTable();
+      if (t1.getRelatedTables().size() > 0) {
+        tables.add(t1);
+        tableNames.add(t1.getTableName());
+        break;
+      }
+    }
+
+    // Add more tables
+    while (random.nextInt() % 8 != 0) {
+      int tableToAddIdx = random.nextInt(tables.size());
+      RelatedTable relatedTable = tables.get(tableToAddIdx).getRelatedTables()
+          
.get(random.nextInt(tables.get(tableToAddIdx).getRelatedTables().size()));
+      if (!tableNames.contains(relatedTable.getForeignTableName())) {
+        tableNames.add(relatedTable.getForeignTableName());
+        
tables.add(TPCHQueryGeneratorV2.TABLES_MAP.get(relatedTable.getForeignTableName()));
+        predicates.add(relatedTable.getLocalTableKey() + "=" + 
relatedTable.getForeignTableKey());
+      }
+    }
+
+    for (Table item : tables) {
+      getRandomProjections(item).forEach(querySkeleton::addProjection);
+    }
+    for (Table value : tables) {
+      querySkeleton.addTable(value.getTableName());
+    }
+
+    if (predicates.size() > 0) {
+      for (String predicate : predicates) {
+        querySkeleton.addPredicate(predicate);
+      }
+    }
+
+    if (includePredicates) {
+      for (Table table : tables) {
+        List<String> preds = getRandomPredicates(table);
+        preds.forEach(querySkeleton::addPredicate);
+      }
+    }
+
+    if (includeOrderBy) {
+      for (Table table : tables) {
+        getRandomOrderBys(table).forEach(querySkeleton::addOrderByColumn);
+      }
+    }
+
+    return querySkeleton.toString();
+  }
+
+  public String selectionGroupByMultiJoin(boolean includePredicates, boolean 
includeOrderBy) {
+    QuerySkeleton querySkeleton = new QuerySkeleton();
+
+    List<String> predicates = new ArrayList<>();
+    List<Table> tables = new ArrayList<>();
+    Set<String> tableNames = new HashSet<>();
+
+    Random random = new Random();
+
+    // Start off with a random table with related tables
+    while (true) {
+      Table t1 = getRandomTable();
+      if (t1.getRelatedTables().size() > 0) {
+        tables.add(t1);
+        tableNames.add(t1.getTableName());
+        break;
+      }
+    }
+
+    // Add more tables
+    while (random.nextInt() % 8 != 0) {
+      int tableToAddIdx = random.nextInt(tables.size());
+      RelatedTable relatedTable = tables.get(tableToAddIdx).getRelatedTables()
+          
.get(random.nextInt(tables.get(tableToAddIdx).getRelatedTables().size()));
+      if (!tableNames.contains(relatedTable.getForeignTableName())) {
+        tableNames.add(relatedTable.getForeignTableName());
+        
tables.add(TPCHQueryGeneratorV2.TABLES_MAP.get(relatedTable.getForeignTableName()));
+        predicates.add(relatedTable.getLocalTableKey() + "=" + 
relatedTable.getForeignTableKey());
+      }
+    }
+
+    Map<String, List<String>> tableWiseGroupByCols = new HashMap<>();
+    for (Table value : tables) {
+      Pair<List<String>, List<String>> groupByAndAggregates = 
getGroupByAndAggregates(value);
+      groupByAndAggregates.getLeft().forEach(querySkeleton::addProjection);
+      groupByAndAggregates.getRight().forEach(querySkeleton::addGroupByColumn);
+      tableWiseGroupByCols.put(value.getTableName(), 
groupByAndAggregates.getRight());
+    }
+    for (Table table : tables) {
+      querySkeleton.addTable(table.getTableName());
+    }
+    predicates.forEach(querySkeleton::addPredicate);
+
+    if (includePredicates) {
+      for (Table table : tables) {
+        List<String> preds = getRandomPredicates(table);
+        preds.forEach(querySkeleton::addPredicate);
+      }
+    }
+
+    if (includeOrderBy) {
+      for (Table table : tables) {
+        getRandomOrderBys(table, 
tableWiseGroupByCols.get(table.getTableName())).forEach(
+            querySkeleton::addOrderByColumn);
+      }
+    }
+
+    return querySkeleton.toString();
+  }
+
+  public String generateRandomQuery() {
+    Random random = new Random();
+    int queryType = random.nextInt(6);
+    boolean includePredicates = random.nextBoolean();
+    boolean includeOrderBy = true;
+    switch (queryType) {
+      case 0:
+        return generateSelectionOnlyQuery(includePredicates, includeOrderBy);
+      case 1:
+        return selectionOnlyWithJoins(includePredicates, includeOrderBy);
+      case 2:
+        return selectionOnlyWithGroupBy(includePredicates, includeOrderBy);
+      case 3:
+        return selectionOnlyGroupByWithJoins(includePredicates, 
includeOrderBy);
+      case 4:
+        return selectionOnlyMultiJoin(includePredicates, includeOrderBy);
+      case 5:
+        return selectionGroupByMultiJoin(includePredicates, includeOrderBy);
+      default:
+        return generateSelectionOnlyQuery(includePredicates, includeOrderBy);
+    }
+  }
+}
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/Table.java
similarity index 51%
copy from 
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
copy to 
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/Table.java
index 7fbae29c08..90ff494b4e 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/Constants.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/tpch/generator/Table.java
@@ -1,3 +1,9 @@
+package org.apache.pinot.integration.tests.tpch.generator;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
@@ -16,29 +22,30 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.pinot.integration.tests.tpch;
+class Table {
+  private final String _tableName;
+  private final List<Column> _columns;
+  private final List<RelatedTable> _relatedTables;
 
-import java.io.File;
+  public Table(String tableName, List<Column> columns) {
+    _tableName = tableName;
+    _columns = columns;
+    _relatedTables = new ArrayList<>();
+  }
 
+  public String getTableName() {
+    return _tableName;
+  }
 
-/**
- * Constants used in TPCH integration tests.
- */
-public final class Constants {
-  static final String[] TPCH_TABLE_NAMES = {
-      "customer", "lineitem", "nation", "orders", "part", "partsupp", 
"region", "supplier"
-  };
-  static final String AVRO_FILE_SUFFIX = ".avro";
-  private static final String TPCH_TABLE_RESOURCE_FOLDER_PREFIX = 
"examples/batch/tpch/";
-  private Constants() {
+  public List<Column> getColumns() {
+    return _columns;
   }
 
-  static String getTableResourceFolder(String tableName) {
-    return TPCH_TABLE_RESOURCE_FOLDER_PREFIX + tableName;
+  public List<RelatedTable> getRelatedTables() {
+    return _relatedTables;
   }
 
-  static String getTableAvroFilePath(String tableName) {
-    return getTableResourceFolder(tableName) + File.separator
-        + "rawdata" + File.separator + tableName + AVRO_FILE_SUFFIX;
+  public void addRelation(String foreignTableName, String foreignTableKey, 
String localTableKey) {
+    _relatedTables.add(new RelatedTable(foreignTableName, foreignTableKey, 
localTableKey));
   }
 }
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/DocIdDictionary.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/DocIdDictionary.java
index 59e752a8b8..92991e1947 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/DocIdDictionary.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/DocIdDictionary.java
@@ -45,6 +45,11 @@ public class DocIdDictionary extends BaseImmutableDictionary 
{
     }
   }
 
+  @Override
+  public int indexOf(int value) {
+    return insertionIndexOf(Integer.toString(value));
+  }
+
   @Override
   public DataType getValueType() {
     return DataType.INT;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to