vinishjail97 commented on code in PR #17694:
URL: https://github.com/apache/hudi/pull/17694#discussion_r2710842131
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSchemaUtils.scala:
##########
@@ -46,24 +46,226 @@ import scala.collection.JavaConverters._
object HoodieSchemaUtils {
private val log = LoggerFactory.getLogger(getClass)
- def getSchemaForField(schema: StructType, fieldName: String):
org.apache.hudi.common.util.collection.Pair[String, StructField] = {
- getSchemaForField(schema, fieldName, StringUtils.EMPTY_STRING)
+ /**
+ * Constants for Parquet-style accessor patterns used in nested MAP and
ARRAY navigation.
+ * These patterns are specifically used for column stats generation and
differ from
+ * InternalSchema constants which are used in schema evolution contexts.
+ */
+ private final val ARRAY_LIST = "list"
+ private final val ARRAY_ELEMENT = "element"
+ private final val ARRAY_SPARK = "array" // Spark writer uses this
+ private final val MAP_KEY_VALUE = "key_value"
+ private final val MAP_KEY = "key"
+ private final val MAP_VALUE = "value"
+
+ private final val ARRAY_LIST_ELEMENT = ARRAY_LIST + "." + ARRAY_ELEMENT
+ private final val MAP_KEY_VALUE_KEY = MAP_KEY_VALUE + "." + MAP_KEY
+ private final val MAP_KEY_VALUE_VALUE = MAP_KEY_VALUE + "." + MAP_VALUE
+
+ /**
+ * Advances offset past a component name in the path, handling end-of-path
and dot separator.
+ *
+ * @param path the full path string
+ * @param offset current position in path
+ * @param component the component name to match (e.g., "element", "key",
"value")
+ * @return new offset after component and dot, or path.length() if at end,
or -1 if no match
+ */
+ private def getNextOffset(path: String, offset: Int, component: String): Int
= {
Review Comment:
I have removed the duplicate code references and moved everything to
HoodieSchema.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]