Github user xubo245 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1670#discussion_r157974643
--- Diff:
examples/spark2/src/main/scala/org/apache/carbondata/examples/ConcurrencyTest.scala
---
@@ -0,0 +1,355 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.examples
+
+import java.io.File
+import java.util
+import java.util.concurrent.{Callable, Executors, Future, TimeUnit}
+
+import scala.util.Random
+
+import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession}
+import org.apache.spark.sql.types._
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+// scalastyle:off println
+object ConcurrencyTest {
+
+ var totalNum = 100 * 1000 * 1000
+ var ThreadNum = 16
+ var TaskNum = 100
+ var ResultIsEmpty = true
+ val cardinalityId = 10000 * 10000
+ val cardinalityCity = 6
+
+ def parquetTableName: String = "comparetest_parquet"
+
+ def orcTableName: String = "comparetest_orc"
+
+ def carbonTableName(version: String): String =
s"comparetest_carbonV$version"
+
+ // Table schema:
+ // +-------------+-----------+-------------+-------------+------------+
+ // | id | string | 100,000,000 | dimension | no |
+ // +-------------+-----------+-------------+-------------+------------+
+ // | Column name | Data type | Cardinality | Column type | Dictionary |
+ // +-------------+-----------+-------------+-------------+------------+
+ // | city | string | 6 | dimension | yes |
+ // +-------------+-----------+-------------+-------------+------------+
+ // | country | string | 6 | dimension | yes |
+ // +-------------+-----------+-------------+-------------+------------+
+ // | planet | string | 100,007 | dimension | yes |
+ // +-------------+-----------+-------------+-------------+------------+
+ // | m1 | short | NA | measure | no |
+ // +-------------+-----------+-------------+-------------+------------+
+ // | m2 | int | NA | measure | no |
+ // +-------------+-----------+-------------+-------------+------------+
+ // | m3 | big int | NA | measure | no |
+ // +-------------+-----------+-------------+-------------+------------+
+ // | m4 | double | NA | measure | no |
+ // +-------------+-----------+-------------+-------------+------------+
+ // | m5 | decimal | NA | measure | no |
+ // +-------------+-----------+-------------+-------------+------------+
+
+ private def generateDataFrame(spark: SparkSession): DataFrame = {
+ val rdd = spark.sparkContext
+ .parallelize(1 to totalNum, 4)
+ .map { x =>
+ ((x % 100000000).toString, "city" + x % 6, "country" + x % 6,
"planet" + x % 10007,
+ (x % 16).toShort, x / 2, (x << 1).toLong, x.toDouble / 13,
+ BigDecimal.valueOf(x.toDouble / 11))
+ }.map { x =>
+ Row(x._1, x._2, x._3, x._4, x._5, x._6, x._7, x._8, x._9)
+ }
+
+ val schema = StructType(
+ Seq(
+ StructField("id", StringType, nullable = false),
+ StructField("city", StringType, nullable = false),
+ StructField("country", StringType, nullable = false),
+ StructField("planet", StringType, nullable = false),
+ StructField("m1", ShortType, nullable = false),
+ StructField("m2", IntegerType, nullable = false),
+ StructField("m3", LongType, nullable = false),
+ StructField("m4", DoubleType, nullable = false),
+ StructField("m5", DecimalType(30, 10), nullable = false)
+ )
+ )
+
+ spark.createDataFrame(rdd, schema)
+ }
+
+ // performance test queries, they are designed to test various data
access type
+ val r = new Random()
+ val tmpId = r.nextInt(cardinalityId) % totalNum
+ val tmpCity = "city" + (r.nextInt(cardinalityCity) % totalNum)
+ val queries: Array[Query] = Array(
--- End diff --
OK, I will enhance the framework.
---