This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 505f7a9ec982e3c9f7e2a01988c7b3f2d66e7f75 Author: Hussain Towaileb <[email protected]> AuthorDate: Thu Oct 3 18:39:48 2019 +0300 [ASTERIXDB-2649][FUN] TPC-DS datasource function, generate proper data types - user model changes: no - storage format changes: no - interface changes: no Details: - Changed the TPC-DS datasource function to generate the TPC-DS data with proper data types according to the TPC-DS schema. - Updated the TPC-DS test cases. Change-Id: I20f6b8d043906ad62652d098e09ab70eb1d78b1b Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/3604 Tested-by: Jenkins <[email protected]> Integration-Tests: Jenkins <[email protected]> Reviewed-by: Hussain Towaileb <[email protected]> --- .../app/function/TPCDSDataGeneratorReader.java | 148 +++++++++++++-------- .../datagen_sf_1_all_tables_2.1.query.sqlpp | 6 +- .../datagen_sf_1_all_tables_2.1.adm | 48 +++---- 3 files changed, 118 insertions(+), 84 deletions(-) diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/TPCDSDataGeneratorReader.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/TPCDSDataGeneratorReader.java index 3d08c01..72bf46f 100644 --- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/TPCDSDataGeneratorReader.java +++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/TPCDSDataGeneratorReader.java @@ -47,6 +47,14 @@ public class TPCDSDataGeneratorReader extends FunctionReader { private final FunctionIdentifier functionIdentifier; + // Table name will be added to each generated record + private final static String TABLE_NAME_FIELD_NAME = "table_name"; + + // When generating the values, a list is created, at index 0, all the values for the parent record exist, if a + // child record is created, it is at index 1 in the list + private static final int PARENT_VALUES_INDEX = 0; + private static final int CHILD_VALUES_INDEX = 1; + // Table members private final List<Table> selectedTables; private final StringBuilder builder = new StringBuilder(); @@ -90,6 +98,44 @@ public class TPCDSDataGeneratorReader extends FunctionReader { } } + /** + * Gets the table matching the provided string table name, throws an exception if no table is returned. + * + * @param tableName String table name to search for. + * @return Table if found, throws an exception otherwise. + */ + private List<Table> getTableFromStringTableName(String tableName) throws HyracksDataException { + + // Get all the tables + if (generateAllTables) { + // Remove the DBGEN_VERSION table and all children tables, parent tables will generate them + return Table.getBaseTables().stream() + .filter(table -> !table.equals(Table.DBGEN_VERSION) && !table.isChild()) + .collect(Collectors.toList()); + } + + // Search for the table + List<Table> matchedTables = Table.getBaseTables().stream() + .filter(table -> tableName.equalsIgnoreCase(table.getName())).collect(Collectors.toList()); + + // Ensure the table was found + if (matchedTables.isEmpty()) { + throw new RuntimeDataException(ErrorCode.TPCDS_INVALID_TABLE_NAME, getFunctionIdentifier().getName(), + tableName); + } + + return matchedTables; + } + + /** + * Gets the function identifier + * + * @return function identifier + */ + private FunctionIdentifier getFunctionIdentifier() { + return functionIdentifier; + } + @Override public boolean hasNext() { @@ -144,21 +190,8 @@ public class TPCDSDataGeneratorReader extends FunctionReader { // Clear the builder (This is faster than re-creating the builder each iteration) builder.setLength(0); - builder.append("{\"tableName\":\""); - builder.append(currentTable.toString()); - builder.append("\""); - - // Build the record data - for (int counter = 0; counter < values.get(0).size(); counter++) { - builder.append(",\""); - builder.append(currentTable.getColumns()[counter].getName()); - builder.append("\":\""); - builder.append(values.get(0).get(counter)); - builder.append("\""); - } - - // Close the record - builder.append("}"); + // Construct the record + constructRecord(values.get(PARENT_VALUES_INDEX), currentTable); // Reference to the parent row to be returned, before resetting the builder again String parentRow = builder.toString(); @@ -168,21 +201,9 @@ public class TPCDSDataGeneratorReader extends FunctionReader { // are done if (generateAllTables && values.size() > 1) { builder.setLength(0); - builder.append("{\"tableName\":\""); - builder.append(currentTable.getChild().toString()); - builder.append("\""); - - // Build the record data - for (int counter = 0; counter < values.get(1).size(); counter++) { - builder.append(",\""); - builder.append(currentTable.getChild().getColumns()[counter].getName()); - builder.append("\":\""); - builder.append(values.get(0).get(counter)); - builder.append("\""); - } - // Close the record - builder.append("}"); + // Construct the record + constructRecord(values.get(CHILD_VALUES_INDEX), currentTable.getChild()); // Add it to the children rows list childRow = builder.toString(); @@ -193,40 +214,53 @@ public class TPCDSDataGeneratorReader extends FunctionReader { } /** - * Gets the table matching the provided string table name, throws an exception if no table is returned. + * Constructs the record with the appropriate data types. * - * @param tableName String table name to search for. - * @return Table if found, throws an exception otherwise. + * @param values list containing all the generated values for all columns in a string format. + * @param table Table the record is being constructed for */ - private List<Table> getTableFromStringTableName(String tableName) throws HyracksDataException { + private void constructRecord(List<String> values, Table table) { + // Add the table name to the record + builder.append("{\"").append(TABLE_NAME_FIELD_NAME).append("\":\"").append(table.getName()).append("\""); - // Get all the tables - if (generateAllTables) { - // Remove the DBGEN_VERSION table and all children tables, parent tables will generate them - return Table.getBaseTables().stream() - .filter(table -> !table.equals(Table.DBGEN_VERSION) && !table.isChild()) - .collect(Collectors.toList()); - } + // Build the record data + for (int counter = 0; counter < values.size(); counter++) { - // Search for the table - List<Table> matchedTables = Table.getBaseTables().stream() - .filter(table -> tableName.equalsIgnoreCase(table.getName())).collect(Collectors.toList()); + // If the value is null, no need to check for the column type + if (values.get(counter) == null) { + builder.append(",\""); + builder.append(table.getColumns()[counter].getName()); + builder.append("\":"); + builder.append(values.get(counter)); + continue; + } - // Ensure the table was found - if (matchedTables.size() != 1) { - throw new RuntimeDataException(ErrorCode.TPCDS_INVALID_TABLE_NAME, getFunctionIdentifier().getName(), - tableName); + String fieldName = table.getColumns()[counter].getName(); + String stringValue = values.get(counter); + + // Convert the value to the appropriate type based on the column type + switch (table.getColumns()[counter].getType().getBase()) { + case INTEGER: + builder.append(",\"").append(fieldName).append("\":").append(Integer.valueOf(stringValue)); + break; + case DECIMAL: + builder.append(",\"").append(fieldName).append("\":").append(Double.valueOf(stringValue)); + break; + // IDENTIFIER type could be any value, so we're taking it as a string + // DATE and TIME are not supported, they are stored as strings and can be modified with date functions + // CHAR and VARCHAR are handled as strings + // any other type (default case) is handled as a string value + case IDENTIFIER: + case DATE: + case TIME: + case CHAR: + case VARCHAR: + default: + builder.append(",\"").append(fieldName).append("\":\"").append(stringValue).append("\""); + break; + } } - return matchedTables; - } - - /** - * Gets the function identifier - * - * @return function identifier - */ - private FunctionIdentifier getFunctionIdentifier() { - return functionIdentifier; + builder.append("}"); } } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.query.sqlpp index 303c1c7..99151b8 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.query.sqlpp @@ -19,7 +19,7 @@ set `import-private-functions` `true`; -select d.tableName, count(*) as count +select d.table_name, count(*) as count from tpcds_datagen(1) as d -group by d.tableName -order by d.tableName; \ No newline at end of file +group by d.table_name +order by d.table_name; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.adm index 4040b6b..b01e2cc 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.adm @@ -1,24 +1,24 @@ -{ "count": 6, "tableName": "CALL_CENTER" } -{ "count": 11718, "tableName": "CATALOG_PAGE" } -{ "count": 144067, "tableName": "CATALOG_RETURNS" } -{ "count": 1441548, "tableName": "CATALOG_SALES" } -{ "count": 100000, "tableName": "CUSTOMER" } -{ "count": 50000, "tableName": "CUSTOMER_ADDRESS" } -{ "count": 1920800, "tableName": "CUSTOMER_DEMOGRAPHICS" } -{ "count": 73049, "tableName": "DATE_DIM" } -{ "count": 7200, "tableName": "HOUSEHOLD_DEMOGRAPHICS" } -{ "count": 20, "tableName": "INCOME_BAND" } -{ "count": 11745000, "tableName": "INVENTORY" } -{ "count": 18000, "tableName": "ITEM" } -{ "count": 300, "tableName": "PROMOTION" } -{ "count": 35, "tableName": "REASON" } -{ "count": 20, "tableName": "SHIP_MODE" } -{ "count": 12, "tableName": "STORE" } -{ "count": 287514, "tableName": "STORE_RETURNS" } -{ "count": 2880404, "tableName": "STORE_SALES" } -{ "count": 86400, "tableName": "TIME_DIM" } -{ "count": 5, "tableName": "WAREHOUSE" } -{ "count": 60, "tableName": "WEB_PAGE" } -{ "count": 71763, "tableName": "WEB_RETURNS" } -{ "count": 719384, "tableName": "WEB_SALES" } -{ "count": 30, "tableName": "WEB_SITE" } \ No newline at end of file +{ "count": 6, "table_name": "call_center" } +{ "count": 11718, "table_name": "catalog_page" } +{ "count": 144067, "table_name": "catalog_returns" } +{ "count": 1441548, "table_name": "catalog_sales" } +{ "count": 100000, "table_name": "customer" } +{ "count": 50000, "table_name": "customer_address" } +{ "count": 1920800, "table_name": "customer_demographics" } +{ "count": 73049, "table_name": "date_dim" } +{ "count": 7200, "table_name": "household_demographics" } +{ "count": 20, "table_name": "income_band" } +{ "count": 11745000, "table_name": "inventory" } +{ "count": 18000, "table_name": "item" } +{ "count": 300, "table_name": "promotion" } +{ "count": 35, "table_name": "reason" } +{ "count": 20, "table_name": "ship_mode" } +{ "count": 12, "table_name": "store" } +{ "count": 287514, "table_name": "store_returns" } +{ "count": 2880404, "table_name": "store_sales" } +{ "count": 86400, "table_name": "time_dim" } +{ "count": 5, "table_name": "warehouse" } +{ "count": 60, "table_name": "web_page" } +{ "count": 71763, "table_name": "web_returns" } +{ "count": 719384, "table_name": "web_sales" } +{ "count": 30, "table_name": "web_site" } \ No newline at end of file
