vinishjail97 commented on code in PR #17694:
URL: https://github.com/apache/hudi/pull/17694#discussion_r2714019621
##########
hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/hudi/schema/TestHoodieSparkSchemaUtils.scala:
##########
@@ -20,45 +20,177 @@
package org.apache.hudi.schema
import org.apache.hudi.HoodieSchemaUtils
+import org.apache.hudi.exception.HoodieException
-import org.apache.spark.sql.types.{DataType, IntegerType, LongType,
StringType, StructField, StructType}
-import org.junit.jupiter.api.Assertions.assertEquals
+import org.apache.spark.sql.types._
+import org.junit.jupiter.api.Assertions.{assertEquals, assertThrows}
import org.junit.jupiter.api.Test
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.CsvSource
/**
- * Tests {@link HoodieSparkSchemaUtils}
+ * Tests for {@link HoodieSchemaUtils#getSchemaForField}
*/
class TestHoodieSparkSchemaUtils {
+ // Shared test schemas
+ private val simpleSchema = StructType(
+ StructField("id", StringType) ::
+ StructField("name", StringType) ::
+ StructField("count", IntegerType) :: Nil)
+
+ private val nestedSchema = StructType(
+ StructField("id", StringType) ::
+ StructField("nested", StructType(
+ StructField("inner_string", StringType) ::
+ StructField("inner_int", IntegerType) :: Nil)) :: Nil)
+
+ private val arraySchema = StructType(
+ StructField("id", StringType) ::
+ StructField("items", ArrayType(StringType, containsNull = true)) :: Nil)
+
+ private val arrayOfStructSchema = StructType(
+ StructField("id", StringType) ::
+ StructField("items", ArrayType(StructType(
+ StructField("nested_int", IntegerType) ::
+ StructField("nested_string", StringType) :: Nil), containsNull =
true)) :: Nil)
+
+ private val mapSchema = StructType(
+ StructField("id", StringType) ::
+ StructField("metadata", MapType(StringType, IntegerType,
valueContainsNull = true)) :: Nil)
+
+ private val mapOfStructSchema = StructType(
+ StructField("id", StringType) ::
+ StructField("nested_map", MapType(StringType, StructType(
+ StructField("nested_int", IntegerType) ::
+ StructField("nested_string", StringType) :: Nil), valueContainsNull
= true)) :: Nil)
+
+ private val complexSchema = StructType(
+ StructField("id", StringType) ::
+ StructField("top", StructType(
+ StructField("nested_array", ArrayType(StructType(
+ StructField("inner_field", StringType) :: Nil), containsNull =
true)) ::
+ StructField("nested_map", MapType(StringType, IntegerType,
valueContainsNull = true)) :: Nil)) :: Nil)
+
+ // ===========================================
+ // Simple and Nested Field Tests
+ // ===========================================
+
+ @ParameterizedTest
+ @CsvSource(Array(
+ "id, id, string",
+ "name, name, string",
+ "count, count, int"
+ ))
+ def testSimpleFields(inputPath: String, expectedKey: String, expectedType:
String): Unit = {
+ val result = HoodieSchemaUtils.getSchemaForField(simpleSchema,
inputPath.trim)
+ assertEquals(expectedKey.trim, result.getKey)
+ assertEquals(expectedType.trim, result.getValue.dataType.simpleString)
+ }
+
+ @ParameterizedTest
+ @CsvSource(Array(
+ "nested.inner_string, nested.inner_string, string",
+ "nested.inner_int, nested.inner_int, int"
+ ))
+ def testNestedFields(inputPath: String, expectedKey: String, expectedType:
String): Unit = {
+ val result = HoodieSchemaUtils.getSchemaForField(nestedSchema,
inputPath.trim)
+ assertEquals(expectedKey.trim, result.getKey)
+ assertEquals(expectedType.trim, result.getValue.dataType.simpleString)
+ }
+
+ // ===========================================
+ // Array Path Tests
+ // ===========================================
+
+ @ParameterizedTest
+ @CsvSource(Array(
+ "items.list.element, items.list.element, string"
+ ))
+ def testArrayPath(inputPath: String, expectedKey: String, expectedType:
String): Unit = {
+ val result = HoodieSchemaUtils.getSchemaForField(arraySchema,
inputPath.trim)
+ assertEquals(expectedKey.trim, result.getKey)
+ assertEquals(expectedType.trim, result.getValue.dataType.simpleString)
+ }
+
+ @ParameterizedTest
+ @CsvSource(Array(
+ "items.list.element.nested_int, items.list.element.nested_int,
int",
+ "items.list.element.nested_string, items.list.element.nested_string,
string"
+ ))
+ def testNestedArrayPath(inputPath: String, expectedKey: String,
expectedType: String): Unit = {
+ val result = HoodieSchemaUtils.getSchemaForField(arrayOfStructSchema,
inputPath.trim)
+ assertEquals(expectedKey.trim, result.getKey)
+ assertEquals(expectedType.trim, result.getValue.dataType.simpleString)
+ }
+
+ // ===========================================
+ // Map Navigation Tests
+ // ===========================================
+
+ @ParameterizedTest
+ @CsvSource(Array(
+ "metadata.key_value.key, metadata.key_value.key, string",
+ "metadata.key_value.value, metadata.key_value.value, int"
+ ))
+ def testMapNavigation(inputPath: String, expectedKey: String, expectedType:
String): Unit = {
+ val result = HoodieSchemaUtils.getSchemaForField(mapSchema, inputPath.trim)
+ assertEquals(expectedKey.trim, result.getKey)
+ assertEquals(expectedType.trim, result.getValue.dataType.simpleString)
+ }
Review Comment:
Addressed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]