This is an automated email from the ASF dual-hosted git repository. apucher pushed a commit to branch mock-data-generator-range-extension in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
commit 8cde6b9738284b6d19e111322dd9cf0a99f22735 Author: Alexander Pucher <[email protected]> AuthorDate: Mon Jun 1 11:22:05 2020 -0700 bring pinot mock data generator.sh script in line with TE mock data generator --- docker/images/pinot/bin/generator.sh | 2 +- pinot-tools/src/main/resources/generator/README.md | 8 ++- .../generator/complexWebsite_generator.json | 2 +- .../src/main/resources/generator/generator.sh | 76 ---------------------- .../generator/simpleWebsite_generator.json | 2 +- 5 files changed, 9 insertions(+), 81 deletions(-) diff --git a/docker/images/pinot/bin/generator.sh b/docker/images/pinot/bin/generator.sh index dc84967..a803f30 100755 --- a/docker/images/pinot/bin/generator.sh +++ b/docker/images/pinot/bin/generator.sh @@ -42,7 +42,7 @@ echo "Extracting template files" echo "Generating data for ${TEMPLATE_NAME} in ${DATA_DIR}" ${ADMIN_PATH} GenerateData \ --numFiles 1 -numRecords 354780 -format csv \ +-numFiles 1 -numRecords 946080 -format csv \ -schemaFile "${TEMPLATE_BASEDIR}/${TEMPLATE_NAME}_schema.json" \ -schemaAnnotationFile "${TEMPLATE_BASEDIR}/${TEMPLATE_NAME}_generator.json" \ -outDir "$DATA_DIR" diff --git a/pinot-tools/src/main/resources/generator/README.md b/pinot-tools/src/main/resources/generator/README.md index f2a3b89..0123cc7 100644 --- a/pinot-tools/src/main/resources/generator/README.md +++ b/pinot-tools/src/main/resources/generator/README.md @@ -31,6 +31,9 @@ data with these patterns if you so desire. The command line examples below are meant to be executed from the **pinot repository root**. (This was tested with pinot-quickstart in batch mode. Requires DefaultTenant and broker) +NOTE: there's a `generator.sh` script under `incubator-pinot/docker/images/pinot/bin` that wraps this +functionality for Apache Pinot docker containers. + ## Generate data via pattern This first step generates the raw data from a given generator file. By default, we generate the data as CSV, and you can have a look manually with your favorite spreadsheet tool. @@ -39,7 +42,7 @@ have a look manually with your favorite spreadsheet tool. ``` ./pinot-tools/target/pinot-tools-pkg/bin/pinot-admin.sh GenerateData \ --numFiles 1 -numRecords 354780 -format csv \ +-numFiles 1 -numRecords 946080 -format csv \ -schemaFile ./pinot-tools/src/main/resources/generator/complexWebsite_schema.json \ -schemaAnnotationFile ./pinot-tools/src/main/resources/generator/complexWebsite_generator.json \ -outDir ./myTestData @@ -50,7 +53,8 @@ Now we turn the verbose CSV data into an efficiently packed segment ready for up ``` ./pinot-tools/target/pinot-tools-pkg/bin/pinot-admin.sh CreateSegment \ --tableName complexWebsite -segmentName complexWebsite -format CSV -overwrite \ +-format CSV \ +-tableConfigFile ./pinot-tools/src/main/resources/generator/complexWebsite_config.json \ -schemaFile ./pinot-tools/src/main/resources/generator/complexWebsite_schema.json \ -dataDir ./myTestData \ -outDir ./myTestSegment diff --git a/pinot-tools/src/main/resources/generator/complexWebsite_generator.json b/pinot-tools/src/main/resources/generator/complexWebsite_generator.json index 41f08c9..a2ee301 100644 --- a/pinot-tools/src/main/resources/generator/complexWebsite_generator.json +++ b/pinot-tools/src/main/resources/generator/complexWebsite_generator.json @@ -2,7 +2,7 @@ { "column": "hoursSinceEpoch", "pattern": { - "type": "SEQUENCE", "start": 420768, "stepsize": 1, "repetitions": 18 + "type": "SEQUENCE", "start": 429528, "stepsize": 1, "repetitions": 18 } }, { diff --git a/pinot-tools/src/main/resources/generator/generator.sh b/pinot-tools/src/main/resources/generator/generator.sh deleted file mode 100755 index ac1e746..0000000 --- a/pinot-tools/src/main/resources/generator/generator.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - - -#ADMIN_PATH="/opt/pinot/bin/pinot-admin.sh" -ADMIN_PATH="./pinot-tools/target/pinot-tools-pkg/bin/pinot-admin.sh" -#PATTERN_BASEDIR="/opt/pinot/examples/docker/generators" -PATTERN_BASEDIR="./pinot-tools/src/main/resources/generator" -TEMP_DIR="/tmp/pinotGenerator" - -if [ -z "$1" ]; then - echo "No PATTERN name specified. Aborting." - exit 1 -fi - -PATTERN_NAME="$1" -DATA_DIR="${TEMP_DIR:?}/${PATTERN_NAME}" -SEGMENT_DIR="${TEMP_DIR:?}/${PATTERN_NAME}Segment" - -echo "Preparing temp directory for ${PATTERN_NAME}" -rm -rf "${DATA_DIR}" -rm -rf "${SEGMENT_DIR}" -mkdir -p "${TEMP_DIR}" - -echo "Generating data for ${PATTERN_NAME} in ${DATA_DIR}" -${ADMIN_PATH} GenerateData \ --numFiles 1 -numRecords 354780 -format csv \ --schemaFile "${PATTERN_BASEDIR}/${PATTERN_NAME}_schema.json" \ --schemaAnnotationFile "${PATTERN_BASEDIR}/${PATTERN_NAME}_generator.json" \ --outDir "$DATA_DIR" - -if [ ! -d "${DATA_DIR}" ]; then - echo "Data generation failed. Aborting." - exit 1 -fi - -echo "Creating segment for ${PATTERN_NAME} in ${SEGMENT_DIR}" -${ADMIN_PATH} CreateSegment \ --tableName "${PATTERN_NAME}" -segmentName "${PATTERN_NAME}" -format CSV -overwrite \ --schemaFile "${PATTERN_BASEDIR}/${PATTERN_NAME}_schema.json" \ --dataDir "${DATA_DIR}" \ --outDir "${SEGMENT_DIR}" || exit 1 - -if [ ! -d "${SEGMENT_DIR}" ]; then - echo "Data generation failed. Aborting." - exit 1 -fi - -echo "Adding table ${PATTERN_NAME}" -${ADMIN_PATH} AddTable -exec \ --tableConfigFile "${PATTERN_BASEDIR}/${PATTERN_NAME}_config.json" \ --schemaFile "${PATTERN_BASEDIR}/${PATTERN_NAME}_schema.json" || exit 1 - -echo "Uploading segment for ${PATTERN_NAME}" -${ADMIN_PATH} UploadSegment \ --tableName "${PATTERN_NAME}" \ --segmentDir "${SEGMENT_DIR}" || exit 1 - -echo "Succesfully applied PATTERN ${PATTERN_NAME}" diff --git a/pinot-tools/src/main/resources/generator/simpleWebsite_generator.json b/pinot-tools/src/main/resources/generator/simpleWebsite_generator.json index 0f17e0d..4f00e0a 100644 --- a/pinot-tools/src/main/resources/generator/simpleWebsite_generator.json +++ b/pinot-tools/src/main/resources/generator/simpleWebsite_generator.json @@ -3,7 +3,7 @@ "column": "hoursSinceEpoch", "pattern": { "type": "SEQUENCE", - "start": 420768, + "start": 429528, "stepsize": 1 } }, --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
