andygrove commented on code in PR #2902:
URL: https://github.com/apache/datafusion-comet/pull/2902#discussion_r2624176652
##########
spark/src/main/scala/org/apache/comet/testing/FuzzDataGenerator.scala:
##########
@@ -88,6 +89,68 @@ object FuzzDataGenerator {
StructType(fields.toSeq)
}
+ def generateNestedSchema(
+ r: Random,
+ numCols: Int,
+ minDepth: Int,
+ maxDepth: Int,
+ options: SchemaGenOptions): StructType = {
+ assert(numCols > 0)
+ assert(minDepth >= 0)
+ assert(maxDepth >= 0)
+ assert(minDepth <= maxDepth)
+ assert(
+ options.generateArray || options.generateStruct || options.generateMap,
+ "cannot generate nested schema if options do not include generating
complex types")
+
+ val counter = new AtomicLong
+
+ def generateFieldName() = {
+ s"c_${counter.incrementAndGet()}"
+ }
+
+ def generateArray(depth: Int, name: String) = {
+ val element = genField(r, depth + 1)
+ StructField(name, DataTypes.createArrayType(element.dataType, true))
+ }
+
+ def generateStruct(depth: Int, name: String) = {
+ val fields =
+ Range(1, 2 + r.nextInt(10)).map(_ => genField(r, depth + 1)).toArray
+ StructField(name, DataTypes.createStructType(fields))
+ }
+
+ def generateMap(depth: Int, name: String) = {
+ val keyField = genField(r, depth + 1)
+ val valueField = genField(r, depth + 1)
+ StructField(name, DataTypes.createMapType(keyField.dataType,
valueField.dataType))
+ }
+
+ def generatePrimitive(name: String) = {
+ StructField(name, randomChoice(options.primitiveTypes, r))
+ }
+
+ def genField(r: Random, depth: Int): StructField = {
+ val name = generateFieldName()
+ val generators = new ListBuffer[() => StructField]()
+ if (options.generateArray && depth < maxDepth) {
+ generators += (() => generateArray(depth + 1, name))
+ }
+ if (options.generateStruct && depth < maxDepth) {
+ generators += (() => generateStruct(depth + 1, name))
+ }
+ if (options.generateMap && depth < maxDepth) {
+ generators += (() => generateMap(depth, name))
Review Comment:
Thanks, that's incorrect ... I will fix
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]