bvaradar commented on code in PR #8303:
URL: https://github.com/apache/hudi/pull/8303#discussion_r1162253186
##########
docker/demo/sparksql-batch2.commands:
##########
@@ -26,7 +26,8 @@ spark.sql("select `_hoodie_commit_time`, symbol, ts, volume,
open, close from s
spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol
HAVING symbol = 'GOOG'").show(100, false)
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from
stock_ticks_mor_rt where symbol = 'GOOG'").show(100, false)
- // Copy-On-Write Bootstrapped table
+// Copy-On-Write Bootstrapped table
+spark.sql("set hoodie.bootstrap.data.queries.only=false")
Review Comment:
Are there any integration test for bootstrap where we test with this feature
on?
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala:
##########
@@ -100,7 +101,7 @@ class DefaultSource extends RelationProvider
)
} else {
Map()
- }) ++ DataSourceOptionsHelper.parametersWithReadDefaults(optParams)
+ }) ++
DataSourceOptionsHelper.parametersWithReadDefaults(sqlContext.getAllConfs.filter(k
=> k._1.startsWith("hoodie.")) ++ optParams)
Review Comment:
Why is this needed ?
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala:
##########
@@ -270,6 +271,21 @@ object DefaultSource {
}
}
+ private def resolveHoodieBootstrapRelation(sqlContext: SQLContext,
+ globPaths: Seq[Path],
+ userSchema: Option[StructType],
+ metaClient: HoodieTableMetaClient,
+ parameters: Map[String, String]):
BaseRelation = {
+ val enableFileIndex = HoodieSparkConfUtils.getConfigValue(parameters,
sqlContext.sparkSession.sessionState.conf,
+ ENABLE_HOODIE_FILE_INDEX.key,
ENABLE_HOODIE_FILE_INDEX.defaultValue.toString).toBoolean
+ if (!enableFileIndex || globPaths.nonEmpty ||
parameters.getOrElse(HoodieBootstrapConfig.DATA_QUERIES_ONLY.key(), "true") !=
"true") {
Review Comment:
@jonvex : Wouldn't this change cause user queries which includes hoodie
metadata columns to fail ? Can't we just userschema being passed here to
determine if there are any hoodie metadata columns being queried to determine
appropriate next steps ?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]