[GitHub] [hudi] yihua commented on a diff in pull request #7528: [HUDI-5443] Fixing exception trying to read MOR table after `NestedSchemaPruning` rule has been applied
yihua commented on code in PR #7528: URL: https://github.com/apache/hudi/pull/7528#discussion_r1082866912 ## hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala: ## @@ -78,14 +80,52 @@ object HoodieCatalystExpressionUtils { * NOTE: Projection of the row from [[StructType]] A to [[StructType]] B is only possible, if * B is a subset of A */ - def generateUnsafeProjection(from: StructType, to: StructType): UnsafeProjection = { -val attrs = from.toAttributes -val attrsMap = attrs.map(attr => (attr.name, attr)).toMap -val targetExprs = to.fields.map(f => attrsMap(f.name)) + def generateUnsafeProjection(sourceStructType: StructType, targetStructType: StructType): UnsafeProjection = { +val resolver = SQLConf.get.resolver +val attrs = sourceStructType.toAttributes +val targetExprs = targetStructType.fields.map { targetField => + val attrRef = attrs.find(attr => resolver(attr.name, targetField.name)) +.getOrElse(throw new AnalysisException(s"Wasn't able to match target field `${targetField.name}` to any of the source attributes ($attrs)")) + + genProjectingExpression(attrRef, targetField.dataType) +} GenerateUnsafeProjection.generate(targetExprs, attrs) } + private def genProjectingExpression(sourceExpr: Expression, + targetDataType: DataType): Expression = { +checkState(sourceExpr.resolved) + +// TODO support array, map +(sourceExpr.dataType, targetDataType) match { + case (sdt, tdt) if sdt == tdt => +sourceExpr + + case (sourceType: StructType, targetType: StructType) => +val fieldValueExprs = targetType.fields.map { tf => + val ord = sourceType.fieldIndex(tf.name) + val fieldValExpr = genProjectingExpression(GetStructField(sourceExpr, ord, Some(tf.name)), tf.dataType) + Alias(fieldValExpr, tf.name)() +} + +CreateStruct(fieldValueExprs) + + case _ => throw new UnsupportedOperationException(s"(${sourceExpr.dataType}, $targetDataType)") +} + } + + // TODO scala-docs Review Comment: @alexeykudinkin could you address this? ## hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala: ## @@ -427,6 +428,10 @@ class TestMORDataSource extends HoodieClientTestBase with SparkDatasetMixin { @ParameterizedTest @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK")) def testPrunedFiltered(recordType: HoodieRecordType) { + +spark.sessionState.conf.setConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED, false) Review Comment: Is this still needed? ## hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala: ## @@ -106,18 +112,16 @@ class BaseFileOnlyRelation(sqlContext: SQLContext, } protected def collectFileSplits(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Seq[HoodieBaseFileSplit] = { -val partitions = listLatestBaseFiles(globPaths, partitionFilters, dataFilters) -val fileSplits = partitions.values.toSeq - .flatMap { files => -files.flatMap { file => - // TODO fix, currently assuming parquet as underlying format - HoodieDataSourceHelper.splitFiles( -sparkSession = sparkSession, -file = file, -partitionValues = getPartitionColumnsAsInternalRow(file) - ) -} - } +val fileSlices = listLatestFileSlices(globPaths, partitionFilters, dataFilters) Review Comment: Got it. Then it should be fine. Could you confirm we have test coverage for MOR read-optimized query on a table with log files (which set of tests covers this)? ## hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala: ## @@ -138,10 +137,16 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext, override protected def getPartitions: Array[Partition] = fileSplits.zipWithIndex.map(file => HoodieMergeOnReadPartition(file._2, file._1)).toArray - private def getConfig: Configuration = { -val conf = confBroadcast.value.value -CONFIG_INSTANTIATION_LOCK.synchronized { - new Configuration(conf) -} + private def getHadoopConf: Configuration = { +val conf = hadoopConfBroadcast.value.value +new Configuration(conf) Review Comment: Given that this might introduce side effects and there's little time before the code freeze of the release to verify the removal of the lock, could you keep this part the same as before? It is not essential to the PR. ## hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala: ## @@ -78,14 +80,52 @@ object HoodieCatalystExpressionUtils { * NOTE: Projection of the row from [[StructType]] A to [[StructType]] B is only poss
[GitHub] [hudi] hudi-bot commented on pull request #7719: [HUDI-5584] When the table to be synchronized already exists in hive,…
hudi-bot commented on PR #7719: URL: https://github.com/apache/hudi/pull/7719#issuecomment-1398739070 ## CI report: * b29c08828fe9a7ab027c4de3c5055ac88413b97d UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7712: [DNM] Check CI timeout
hudi-bot commented on PR #7712: URL: https://github.com/apache/hudi/pull/7712#issuecomment-1398738962 ## CI report: * 860b46bcff9d06d71c4cd453700bb4160ae6a61a Azure: [SUCCESS](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14499) * 7020a165b334db4ea547bef27d60815e6182122b UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7702: [HUDI-5579] Fixing Kryo registration to be properly wired into Spark sessions
hudi-bot commented on PR #7702: URL: https://github.com/apache/hudi/pull/7702#issuecomment-1398738792 ## CI report: * 1b075e25aa5811f36e83e12bfba11a08bc929bf1 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14495) Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14491) * 8218fde3afec8a069de63eac7dda1abf45523f12 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14506) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7642: [HUDI-5534][Stacked on 6815] Optimizing Bloom Index lookup when using Bloom Filters from Metadata Table
hudi-bot commented on PR #7642: URL: https://github.com/apache/hudi/pull/7642#issuecomment-1398738476 ## CI report: * 48c0f695a3b9aade6fc3439a8d53433019b95e89 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14494) Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14490) * de6ce712cc12145040d3fc4f9772e786c29fa7bd Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14505) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7607: [HUDI-5499] Fixing Spark SQL configs not being properly propagated for CTAS and other commands
hudi-bot commented on PR #7607: URL: https://github.com/apache/hudi/pull/7607#issuecomment-1398738311 ## CI report: * 32033e4a4ed91005a237aa88afa2c6adcb51169f UNKNOWN * 6e67e79228d1e4d165af0faf5905e216153a80e3 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14439) * dc9095021d40149588a5e161cfcee684d2eb6076 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14502) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6815: [HUDI-4937][Stacked on 7702] Fix `HoodieTable` injecting non-reusable `HoodieBackedTableMetadata` aggressively flushing MT readers
hudi-bot commented on PR #6815: URL: https://github.com/apache/hudi/pull/6815#issuecomment-1398737116 ## CI report: * 031dc62b21fc55546243a8fea450138ef94f3405 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14485) * 26ecbdd92b72041ee52285753d01a3ee9040253f Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14504) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] LinMingQiang opened a new pull request, #7719: [HUDI-5584] When the table to be synchronized already exists in hive,…
LinMingQiang opened a new pull request, #7719: URL: https://github.com/apache/hudi/pull/7719 … need to update serde/table properties ### Change Logs HiveSyncTool#syncSchema _Describe context and summary for this change. Highlight if any code was copied._ ### Impact Hoodie sync to hive metastore. _Describe any public API or user-facing feature change or any performance impact._ ### Risk level (write none, low medium or high below) none _If medium or high, explain what verification was done to mitigate the risks._ ### Documentation Update _Describe any necessary documentation update if there is any new feature, config, or user-facing change_ - _The config description must be updated if new configs are added or the default value of the configs are changed_ - _Any new feature or user-facing change requires updating the Hudi website. Please create a Jira ticket, attach the ticket number here and follow the [instruction](https://hudi.apache.org/contribute/developer-setup#website) to make changes to the website._ ### Contributor's checklist - [ ] Read through [contributor's guide](https://hudi.apache.org/contribute/how-to-contribute) - [ ] Change Logs and Impact were stated clearly - [ ] Adequate tests were added if applicable - [ ] CI passed -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7709: [HUDI-5582] Do not let users override internal metadata configs
hudi-bot commented on PR #7709: URL: https://github.com/apache/hudi/pull/7709#issuecomment-1398731340 ## CI report: * 8048762c6d565c09289564d812c4bba77cc90d61 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14462) * 7c8167d0dddea5f4fc002f8f5d65334cdd45d1e8 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14503) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7642: [HUDI-5534][Stacked on 6815] Optimizing Bloom Index lookup when using Bloom Filters from Metadata Table
hudi-bot commented on PR #7642: URL: https://github.com/apache/hudi/pull/7642#issuecomment-1398730409 ## CI report: * 48c0f695a3b9aade6fc3439a8d53433019b95e89 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14494) Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14490) * de6ce712cc12145040d3fc4f9772e786c29fa7bd UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7702: [HUDI-5579] Fixing Kryo registration to be properly wired into Spark sessions
hudi-bot commented on PR #7702: URL: https://github.com/apache/hudi/pull/7702#issuecomment-1398731123 ## CI report: * 1b075e25aa5811f36e83e12bfba11a08bc929bf1 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14495) Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14491) * 8218fde3afec8a069de63eac7dda1abf45523f12 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7607: [HUDI-5499] Fixing Spark SQL configs not being properly propagated for CTAS and other commands
hudi-bot commented on PR #7607: URL: https://github.com/apache/hudi/pull/7607#issuecomment-1398730183 ## CI report: * 32033e4a4ed91005a237aa88afa2c6adcb51169f UNKNOWN * 6e67e79228d1e4d165af0faf5905e216153a80e3 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14439) * dc9095021d40149588a5e161cfcee684d2eb6076 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6815: [HUDI-4937][Stacked on 7702] Fix `HoodieTable` injecting non-reusable `HoodieBackedTableMetadata` aggressively flushing MT readers
hudi-bot commented on PR #6815: URL: https://github.com/apache/hudi/pull/6815#issuecomment-1398728655 ## CI report: * 031dc62b21fc55546243a8fea450138ef94f3405 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14485) * 26ecbdd92b72041ee52285753d01a3ee9040253f UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7712: [DNM] Check CI timeout
hudi-bot commented on PR #7712: URL: https://github.com/apache/hudi/pull/7712#issuecomment-1398722329 ## CI report: * 860b46bcff9d06d71c4cd453700bb4160ae6a61a Azure: [SUCCESS](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14499) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7709: [HUDI-5582] Do not let users override internal metadata configs
hudi-bot commented on PR #7709: URL: https://github.com/apache/hudi/pull/7709#issuecomment-1398722262 ## CI report: * 8048762c6d565c09289564d812c4bba77cc90d61 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14462) * 7c8167d0dddea5f4fc002f8f5d65334cdd45d1e8 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] alexeykudinkin merged pull request #7423: [HUDI-5384] Adding optimization rule to appropriately push down filters into the `HoodieFileIndex`
alexeykudinkin merged PR #7423: URL: https://github.com/apache/hudi/pull/7423 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] alexeykudinkin commented on pull request #7423: [HUDI-5384] Adding optimization rule to appropriately push down filters into the `HoodieFileIndex`
alexeykudinkin commented on PR #7423: URL: https://github.com/apache/hudi/pull/7423#issuecomment-1398631718 CI is green: https://user-images.githubusercontent.com/428277/213751799-9d943b81-62a6-482c-94eb-c558a2fbb736.png";> https://dev.azure.com/apache-hudi-ci-org/apache-hudi-ci/_build/results?buildId=14464&view=results -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] soumilshah1995 commented on issue #2544: [SUPPORT]failed to read timestamp column in version 0.7.0 even when HIVE_SUPPORT_TIMESTAMP is enabled
soumilshah1995 commented on issue #2544: URL: https://github.com/apache/hudi/issues/2544#issuecomment-1398626568 > find the commit for this fix? ru using latest version of hudi ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7712: [DNM] Check CI timeout
hudi-bot commented on PR #7712: URL: https://github.com/apache/hudi/pull/7712#issuecomment-1398619566 ## CI report: * 45e4b8e0ed22052683a7982c48444f7fc43b767d Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14497) * 860b46bcff9d06d71c4cd453700bb4160ae6a61a Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14499) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7718: [HUDI-5591] HoodieSparkSqlWriter#getHiveTableNames needs to consider …
hudi-bot commented on PR #7718: URL: https://github.com/apache/hudi/pull/7718#issuecomment-1398609592 ## CI report: * 0da7f5ff6c70c773c035aae60d84ec460252aae6 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14496) Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14500) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7712: [DNM] Check CI timeout
hudi-bot commented on PR #7712: URL: https://github.com/apache/hudi/pull/7712#issuecomment-1398609328 ## CI report: * 45e4b8e0ed22052683a7982c48444f7fc43b767d Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14497) * 860b46bcff9d06d71c4cd453700bb4160ae6a61a UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] LinMingQiang commented on pull request #7718: [HUDI-5591] HoodieSparkSqlWriter#getHiveTableNames needs to consider …
LinMingQiang commented on PR #7718: URL: https://github.com/apache/hudi/pull/7718#issuecomment-1398604505 @hudi-bot run azure -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7716: [HUDI-5558] Serializable interface implementation don't explicitly declare serialVersionUID
hudi-bot commented on PR #7716: URL: https://github.com/apache/hudi/pull/7716#issuecomment-1398597214 ## CI report: * 860cf0ff1a05c3e79ccc57c48efa44894a916c4f Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14492) Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14498) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7712: [DNM] Check CI timeout
hudi-bot commented on PR #7712: URL: https://github.com/apache/hudi/pull/7712#issuecomment-1398596964 ## CI report: * 45e4b8e0ed22052683a7982c48444f7fc43b767d Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14497) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] SteNicholas commented on pull request #7716: [HUDI-5558] Serializable interface implementation don't explicitly declare serialVersionUID
SteNicholas commented on PR #7716: URL: https://github.com/apache/hudi/pull/7716#issuecomment-1398542077 @hudi-bot run azure -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7712: [DNM] Check CI timeout
hudi-bot commented on PR #7712: URL: https://github.com/apache/hudi/pull/7712#issuecomment-1398497794 ## CI report: * 45e4b8e0ed22052683a7982c48444f7fc43b767d Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14497) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7712: [DNM] Check CI timeout
hudi-bot commented on PR #7712: URL: https://github.com/apache/hudi/pull/7712#issuecomment-1398487649 ## CI report: * 45e4b8e0ed22052683a7982c48444f7fc43b767d UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7718: [HUDI-5591] HoodieSparkSqlWriter#getHiveTableNames needs to consider …
hudi-bot commented on PR #7718: URL: https://github.com/apache/hudi/pull/7718#issuecomment-1398346998 ## CI report: * 0da7f5ff6c70c773c035aae60d84ec460252aae6 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14496) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7642: [HUDI-5534][Stacked on 6815] Optimizing Bloom Index lookup when using Bloom Filters from Metadata Table
hudi-bot commented on PR #7642: URL: https://github.com/apache/hudi/pull/7642#issuecomment-1398346271 ## CI report: * 48c0f695a3b9aade6fc3439a8d53433019b95e89 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14494) Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14490) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7716: [HUDI-5558] Serializable interface implementation don't explicitly declare serialVersionUID
hudi-bot commented on PR #7716: URL: https://github.com/apache/hudi/pull/7716#issuecomment-1398267238 ## CI report: * 860cf0ff1a05c3e79ccc57c48efa44894a916c4f Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14492) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7607: [HUDI-5499] Fixing Spark SQL configs not being properly propagated for CTAS and other commands
hudi-bot commented on PR #7607: URL: https://github.com/apache/hudi/pull/7607#issuecomment-1398266742 ## CI report: * 32033e4a4ed91005a237aa88afa2c6adcb51169f UNKNOWN * 6e67e79228d1e4d165af0faf5905e216153a80e3 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14439) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7702: [HUDI-5579] Fixing Kryo registration to be properly wired into Spark sessions
hudi-bot commented on PR #7702: URL: https://github.com/apache/hudi/pull/7702#issuecomment-1398259582 ## CI report: * 1b075e25aa5811f36e83e12bfba11a08bc929bf1 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14495) Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14491) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7718: [HUDI-5591] HoodieSparkSqlWriter#getHiveTableNames needs to consider …
hudi-bot commented on PR #7718: URL: https://github.com/apache/hudi/pull/7718#issuecomment-1398187113 ## CI report: * 0da7f5ff6c70c773c035aae60d84ec460252aae6 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14496) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6815: [HUDI-4937][Stacked on 7702] Fix `HoodieTable` injecting non-reusable `HoodieBackedTableMetadata` aggressively flushing MT readers
hudi-bot commented on PR #6815: URL: https://github.com/apache/hudi/pull/6815#issuecomment-1398184942 ## CI report: * 031dc62b21fc55546243a8fea450138ef94f3405 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14485) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7718: [HUDI-5591] HoodieSparkSqlWriter#getHiveTableNames needs to consider …
hudi-bot commented on PR #7718: URL: https://github.com/apache/hudi/pull/7718#issuecomment-1398178136 ## CI report: * 0da7f5ff6c70c773c035aae60d84ec460252aae6 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7703: [HUDI-1575][DO NOT MERGE] Testing early conflict detection with feature flag enabled by default
hudi-bot commented on PR #7703: URL: https://github.com/apache/hudi/pull/7703#issuecomment-1398177925 ## CI report: * 0fe1eddd4034e3861ff2519dc21d7a008b10d74d UNKNOWN * a1fdd1603b1fb1f59ce04990601c2e99f00cc9af Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14488) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7512: [HUDI-5417] support to read avro from non-legacy map/list in parquet log
hudi-bot commented on PR #7512: URL: https://github.com/apache/hudi/pull/7512#issuecomment-1398177022 ## CI report: * 49fab36027c88f5235ce360a374e98a3b8f1a1d2 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14483) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6815: [HUDI-4937][Stacked on 7702] Fix `HoodieTable` injecting non-reusable `HoodieBackedTableMetadata` aggressively flushing MT readers
hudi-bot commented on PR #6815: URL: https://github.com/apache/hudi/pull/6815#issuecomment-1398175391 ## CI report: * 97af2458373c47dff52bc8e2a8cd63099461ff67 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14493) Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14489) * 031dc62b21fc55546243a8fea450138ef94f3405 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6133: [HUDI-1575] Early Conflict Detection For Multi-writer
hudi-bot commented on PR #6133: URL: https://github.com/apache/hudi/pull/6133#issuecomment-1398174461 ## CI report: * dbe3db845908d261baa5a1aa71d19e0db55816de UNKNOWN * 678cce4a9748cb54a90a559384a0cb0443082535 UNKNOWN * 6fc5bf1ce7921bf25acc3659565457264d8b9dc2 UNKNOWN * 0b74647767677a4cc1193295b493dc0537dd4c96 UNKNOWN * 3369e5e8770cf9eb4c4d272f7c3af54933c992aa UNKNOWN * 1ccecb4fa727cc254cf4780012c28bab24e6afde UNKNOWN * 6fdf901df1086d6ecc07c7987b6a3212b08eaefb UNKNOWN * 7344fabacee437adb4b55ca922df0f5fb14ae372 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14484) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7713: [HUDI-5589] Fix Hudi config inference
hudi-bot commented on PR #7713: URL: https://github.com/apache/hudi/pull/7713#issuecomment-1398164784 ## CI report: * 4ef51f2e03159eda252b15f90993069d257923f6 UNKNOWN * a55175d7d5bf775ed16ccac6d859ce6619c6 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14482) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] LinMingQiang opened a new pull request, #7718: [HUDI-5591] HoodieSparkSqlWriter#getHiveTableNames needs to consider …
LinMingQiang opened a new pull request, #7718: URL: https://github.com/apache/hudi/pull/7718 …parameter HIVE_SYNC_TABLE_STRATEGY ### Change Logs HoodieSparkSqlWriter#getHiveTableNames _Describe context and summary for this change. Highlight if any code was copied._ ### Impact HoodieSparkSqlWriter#getHiveTableNames _Describe any public API or user-facing feature change or any performance impact._ ### Risk level (write none, low medium or high below) none _If medium or high, explain what verification was done to mitigate the risks._ ### Documentation Update _Describe any necessary documentation update if there is any new feature, config, or user-facing change_ - _The config description must be updated if new configs are added or the default value of the configs are changed_ - _Any new feature or user-facing change requires updating the Hudi website. Please create a Jira ticket, attach the ticket number here and follow the [instruction](https://hudi.apache.org/contribute/developer-setup#website) to make changes to the website._ ### Contributor's checklist - [ ] Read through [contributor's guide](https://hudi.apache.org/contribute/how-to-contribute) - [ ] Change Logs and Impact were stated clearly - [ ] Adequate tests were added if applicable - [ ] CI passed -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] wzx140 commented on a diff in pull request #7714: [HUDI-5549] SparkRecordManager support avro data block
wzx140 commented on code in PR #7714: URL: https://github.com/apache/hudi/pull/7714#discussion_r1082263118 ## hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java: ## @@ -192,16 +198,25 @@ public boolean hasNext() { } @Override -public IndexedRecord next() { +public HoodieRecord next() { try { int recordLength = this.dis.readInt(); BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(this.content, this.dis.getNumberOfBytesRead(), recordLength, this.decoderCache.get()); this.decoderCache.set(decoder); -IndexedRecord record = this.reader.read(null, decoder); +IndexedRecord indexedRecord = this.reader.read(null, decoder); this.dis.skipBytes(recordLength); this.readRecords++; -return record; +if (recordType == HoodieRecordType.SPARK) { Review Comment: Do you mean that we should move the logic of reading avro data into HoodieAvroReader and HoodieSparkAvroReader? And we create these readers from HoodieFileReaderFactory. Just like usage of FileReader in HoodieParquetDataBlock. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7714: [HUDI-5549] SparkRecordManager support avro data block
hudi-bot commented on PR #7714: URL: https://github.com/apache/hudi/pull/7714#issuecomment-1398082322 ## CI report: * 462a4041585cfeb6c16354bbc8b964bdd08ed301 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14479) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7702: [HUDI-5579] Fixing Kryo registration to be properly wired into Spark sessions
hudi-bot commented on PR #7702: URL: https://github.com/apache/hudi/pull/7702#issuecomment-1398082191 ## CI report: * 1b075e25aa5811f36e83e12bfba11a08bc929bf1 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14495) Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14491) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7642: [HUDI-5534][Stacked on 6815] Optimizing Bloom Index lookup when using Bloom Filters from Metadata Table
hudi-bot commented on PR #7642: URL: https://github.com/apache/hudi/pull/7642#issuecomment-1398081852 ## CI report: * 48c0f695a3b9aade6fc3439a8d53433019b95e89 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14494) Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14490) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6815: [HUDI-4937][Stacked on 7702] Fix `HoodieTable` injecting non-reusable `HoodieBackedTableMetadata` aggressively flushing MT readers
hudi-bot commented on PR #6815: URL: https://github.com/apache/hudi/pull/6815#issuecomment-1398080581 ## CI report: * 97af2458373c47dff52bc8e2a8cd63099461ff67 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14493) Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14489) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7702: [HUDI-5579] Fixing Kryo registration to be properly wired into Spark sessions
hudi-bot commented on PR #7702: URL: https://github.com/apache/hudi/pull/7702#issuecomment-1398072303 ## CI report: * 1b075e25aa5811f36e83e12bfba11a08bc929bf1 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14491) Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14495) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7642: [HUDI-5534][Stacked on 6815] Optimizing Bloom Index lookup when using Bloom Filters from Metadata Table
hudi-bot commented on PR #7642: URL: https://github.com/apache/hudi/pull/7642#issuecomment-1398071952 ## CI report: * 48c0f695a3b9aade6fc3439a8d53433019b95e89 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14490) Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14494) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7423: [HUDI-5384] Adding optimization rule to appropriately push down filters into the `HoodieFileIndex`
hudi-bot commented on PR #7423: URL: https://github.com/apache/hudi/pull/7423#issuecomment-1398071383 ## CI report: * 78a6da0b0d5d65f8e7f4c59b495a2820e1f9877f UNKNOWN * 296dadf9e961375e4a81d35f87fef55ce8a1d860 UNKNOWN * 7c0c8a22940ae822b51b0848d97dea6fafd5216f Azure: [SUCCESS](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14464) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7232: [HUDI-5235] clustering target size should larger than small file limit
hudi-bot commented on PR #7232: URL: https://github.com/apache/hudi/pull/7232#issuecomment-1398071029 ## CI report: * 08239e5b8d4d49da4b5b3d814233251f81b3d0b0 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14397) Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14451) Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14474) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6815: [HUDI-4937][Stacked on 7702] Fix `HoodieTable` injecting non-reusable `HoodieBackedTableMetadata` aggressively flushing MT readers
hudi-bot commented on PR #6815: URL: https://github.com/apache/hudi/pull/6815#issuecomment-1398070394 ## CI report: * 97af2458373c47dff52bc8e2a8cd63099461ff67 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14489) Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14493) * Unknown: [CANCELED](TBD) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] SteNicholas commented on a diff in pull request #7669: [HUDI-5553] Prevent partition(s) from being dropped if there are pending…
SteNicholas commented on code in PR #7669: URL: https://github.com/apache/hudi/pull/7669#discussion_r1082189809 ## hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkDeletePartitionCommitActionExecutor.java: ## @@ -98,4 +103,42 @@ private List getAllExistingFileIds(String partitionPath) { // because new commit is not complete. it is safe to mark all existing file Ids as old files return table.getSliceView().getLatestFileSlices(partitionPath).map(FileSlice::getFileId).distinct().collect(Collectors.toList()); } + + /** + * Check if there are any pending table service actions (requested + inflight) on a table affecting the partitions to + * be dropped. + * + * This check is to prevent a drop-partition from proceeding should a partition have a table service action in + * the pending stage. If this is allowed to happen, the filegroup that is an input for a table service action, might + * also be a candidate for being replaced. As such, when the table service action and drop-partition commits are + * committed, there will be two commits replacing a single filegroup. + * + * For example, a timeline might have an execution order as such: + * 000.replacecommit.requested (clustering filegroup_1 + filegroup_2 -> filegroup_3) + * 001.replacecommit.requested, 001.replacecommit.inflight, 0001.replacecommit (drop_partition to replace filegroup_1) + * 000.replacecommit.inflight (clustering is executed now) + * 000.replacecommit (clustering completed) + * For an execution order as shown above, 000.replacecommit and 001.replacecommit will both flag filegroup_1 to be replaced. + * This will cause downstream duplicate key errors when a map is being constructed. + */ + private void checkPreconditions() { +List instantsOfOffendingPendingTableServiceAction = new ArrayList<>(); +// ensure that there are no pending inflight clustering/compaction operations involving this partition +SyncableFileSystemView fileSystemView = (SyncableFileSystemView) table.getSliceView(); + +Stream.concat(fileSystemView.getPendingCompactionOperations(), fileSystemView.getPendingLogCompactionOperations()) +.filter(op -> partitions.contains(op.getRight().getPartitionPath())) +.forEach(op -> instantsOfOffendingPendingTableServiceAction.add(op.getLeft())); + Review Comment: +1, for example, there are pending clustering instants for the partition to be deleted. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] SteNicholas commented on pull request #7716: [HUDI-5558] Serializable interface implementation don't explicitly declare serialVersionUID
SteNicholas commented on PR #7716: URL: https://github.com/apache/hudi/pull/7716#issuecomment-1398017208 @danny0405, this pull request explicitly declares the `serialVersionUID` for hudi-flink module. PTAL. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7716: [HUDI-5558] Serializable interface implementation don't explicitly declare serialVersionUID
hudi-bot commented on PR #7716: URL: https://github.com/apache/hudi/pull/7716#issuecomment-1398012661 ## CI report: * 860cf0ff1a05c3e79ccc57c48efa44894a916c4f Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14492) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7702: [HUDI-5579] Fixing Kryo registration to be properly wired into Spark sessions
hudi-bot commented on PR #7702: URL: https://github.com/apache/hudi/pull/7702#issuecomment-1398012526 ## CI report: * 1b075e25aa5811f36e83e12bfba11a08bc929bf1 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7642: [HUDI-5534][Stacked on 6815] Optimizing Bloom Index lookup when using Bloom Filters from Metadata Table
hudi-bot commented on PR #7642: URL: https://github.com/apache/hudi/pull/7642#issuecomment-1398012310 ## CI report: * 48c0f695a3b9aade6fc3439a8d53433019b95e89 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6815: [HUDI-4937][Stacked on 7702] Fix `HoodieTable` injecting non-reusable `HoodieBackedTableMetadata` aggressively flushing MT readers
hudi-bot commented on PR #6815: URL: https://github.com/apache/hudi/pull/6815#issuecomment-1398011318 ## CI report: * 97af2458373c47dff52bc8e2a8cd63099461ff67 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] abhishekshenoy opened a new issue, #7717: [SUPPORT] org.apache.avro.SchemaParseException: Can't redefine: array When there are Top level variables , Struct and Array[struct] (no compl
abhishekshenoy opened a new issue, #7717: URL: https://github.com/apache/hudi/issues/7717 ### Describe the problem you faced When storing a data structure with the following layout into a copy-on-write table: ``` root |-- personDetails: struct (nullable = true) ||-- id: integer (nullable = false) |-- idInfo: struct (nullable = true) ||-- adhaarId: integer (nullable = false) |-- addressInfo: array (nullable = true) ||-- element: struct (containsNull = true) |||-- addressId: integer (nullable = false) |-- employmentInfo: array (nullable = true) ||-- element: struct (containsNull = true) |||-- employmenCd: integer (nullable = false) |-- src_load_ts: timestamp (nullable = false) |-- load_ts: timestamp (nullable = false) |-- load_dt: date (nullable = false) ``` the first write will succeed, but then subsequent writes will fail with the error included in the stacktrace. ### To Reproduce Steps to reproduce the behavior: ``` case class Person(personDetails: PersonDetails, idInfo: IdInfo, addressInfo: Array[AddressInfo] = Array.empty[AddressInfo], employmentInfo: Array[EmploymentInfo] = Array.empty[EmploymentInfo]) case class PersonDetails(id: Int) case class IdInfo(adhaarId: Int) case class AddressInfo(addressId: Int) case class EmploymentInfo(employmenCd: Int) def maskedParquetBugTest(spark: SparkSession): Unit = { import spark.implicits._ val personDetails1 = PersonDetails(1) val idInfo1 = IdInfo(1) val addressInfo1 = AddressInfo(1) val employmentInfo1 = EmploymentInfo(1) val item1 = Person(personDetails1, idInfo1, Array(addressInfo1), Array(employmentInfo1)) val parquetBugDs = Seq(item1).toDF() .withColumn("src_load_ts", current_timestamp()) .withColumn("load_ts", timestampInCst).withColumn("load_dt", to_date(col("load_ts"))) parquetBugDs.printSchema() writeHudi(parquetBugDs, "parquet_bug_ds", "load_dt", "personDetails.id", "src_load_ts") } def writeHudi(ds: DataFrame, tableName: String, partitionPath: String, recordKey: String, precombineKey: String): Unit = { val hoodieConfigs: util.Map[String, String] = new java.util.HashMap[String, String] hoodieConfigs.put("hoodie.table.name", tableName) hoodieConfigs.put("hoodie.datasource.write.keygenerator.class", classOf[SimpleKeyGenerator].getName) hoodieConfigs.put("hoodie.datasource.write.partitionpath.field", partitionPath) hoodieConfigs.put("hoodie.datasource.write.recordkey.field", recordKey) hoodieConfigs.put("hoodie.datasource.write.precombine.field", precombineKey) hoodieConfigs.put("hoodie.payload.ordering.field", precombineKey) hoodieConfigs.put("hoodie.index.type", "GLOBAL_SIMPLE") hoodieConfigs.put("hoodie.insert.shuffle.parallelism", "1") hoodieConfigs.put("hoodie.upsert.shuffle.parallelism", "1") hoodieConfigs.put("hoodie.bulkinsert.shuffle.parallelism", "1") hoodieConfigs.put("hoodie.delete.shuffle.parallelism", "1") hoodieConfigs.put("hoodie.simple.index.update.partition.path", "false") hoodieConfigs.put("hoodie.datasource.write.payload.class", classOf[DefaultHoodieRecordPayload].getName) hoodieConfigs.put("hoodie.datasource.write.hive_style_partitioning", "false") hoodieConfigs.put("hoodie.datasource.write.table.type", COW_TABLE_TYPE_OPT_VAL) hoodieConfigs.put("hoodie.datasource.write.row.writer.enable", "true") hoodieConfigs.put("hoodie.combine.before.upsert", "true") hoodieConfigs.put("hoodie.datasource.write.keygenerator.consistent.logical.timestamp.enabled", "true") hoodieConfigs.put("hoodie.schema.on.read.enable", "true") hoodieConfigs.put("hoodie.datasource.write.reconcile.schema", "true") hoodieConfigs.put("hoodie.datasource.write.operation", "upsert") ds.toDF().write.format("hudi"). options(hoodieConfigs). mode("append"). save(s"/tmp/data/hudi/$tableName") } maskedParquetBugTest(spark) maskedParquetBugTest(spark) ``` ### Expected behavior The second write succeeds. ### Environment Description Hudi version (hudi-spark3.1-bundle_2.12) : 0.12.2 , 0.12.1, 0.12.0 Spark version : 3.1.3 Hive version : - Hadoop version : - Storage (HDFS/S3/GCS..) : Local storage Running on Docker? (yes/no) : No ### Additional context Fix mentioned in [#7145](https://github.com/apache/hudi/issues/7145) does not work as we do not have any Array[Struct] within another Array[Struct] ### Stack Trace ``` Driver stacktrace: at org.apache.spark.scheduler.DAGSchedu
[GitHub] [hudi] alexeykudinkin commented on pull request #7642: [HUDI-5534][Stacked on 6815] Optimizing Bloom Index lookup when using Bloom Filters from Metadata Table
alexeykudinkin commented on PR #7642: URL: https://github.com/apache/hudi/pull/7642#issuecomment-1398007962 @hudi-bot run azure -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] alexeykudinkin commented on pull request #6815: [HUDI-4937][Stacked on 7702] Fix `HoodieTable` injecting non-reusable `HoodieBackedTableMetadata` aggressively flushing MT readers
alexeykudinkin commented on PR #6815: URL: https://github.com/apache/hudi/pull/6815#issuecomment-1398007837 @hudi-bot run azure -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] alexeykudinkin commented on pull request #7702: [HUDI-5579] Fixing Kryo registration to be properly wired into Spark sessions
alexeykudinkin commented on PR #7702: URL: https://github.com/apache/hudi/pull/7702#issuecomment-1398007610 @hudi-bot run azure -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7716: [HUDI-5558] Serializable interface implementation don't explicitly declare serialVersionUID
hudi-bot commented on PR #7716: URL: https://github.com/apache/hudi/pull/7716#issuecomment-1398007450 ## CI report: * 860cf0ff1a05c3e79ccc57c48efa44894a916c4f UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7702: [HUDI-5579] Fixing Kryo registration to be properly wired into Spark sessions
hudi-bot commented on PR #7702: URL: https://github.com/apache/hudi/pull/7702#issuecomment-1398007308 ## CI report: * 384a9774018272e13b967817b0e48b1596a23dcc UNKNOWN * ae1bcf3c42da3945c843864cdeac7f8cb89ef088 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14487) * 1b075e25aa5811f36e83e12bfba11a08bc929bf1 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7642: [HUDI-5534][Stacked on 6815] Optimizing Bloom Index lookup when using Bloom Filters from Metadata Table
hudi-bot commented on PR #7642: URL: https://github.com/apache/hudi/pull/7642#issuecomment-1398007081 ## CI report: * b11fa6b2246e4f02f1da12487093a9b5bfaf2149 UNKNOWN * a37aa62fd9ea8d3b70a06e181237df23097d90a4 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14486) * 48c0f695a3b9aade6fc3439a8d53433019b95e89 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7159: [HUDI-5173]Skip if there is only one file in clusteringGroup
hudi-bot commented on PR #7159: URL: https://github.com/apache/hudi/pull/7159#issuecomment-1398006500 ## CI report: * 15ecd91180d32c7fa1905c11408f4bc23347e682 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6815: [HUDI-4937][Stacked on 7702] Fix `HoodieTable` injecting non-reusable `HoodieBackedTableMetadata` aggressively flushing MT readers
hudi-bot commented on PR #6815: URL: https://github.com/apache/hudi/pull/6815#issuecomment-1398006155 ## CI report: * 13fb78850890b96b86b66d7df060feb11950ec0c UNKNOWN * 031dc62b21fc55546243a8fea450138ef94f3405 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14485) * 97af2458373c47dff52bc8e2a8cd63099461ff67 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7711: [HUDI-5569] Files written by first commit/delta commit if it failed are detected as valid data files
hudi-bot commented on PR #7711: URL: https://github.com/apache/hudi/pull/7711#issuecomment-1398001739 ## CI report: * fb1f2609baf5b3f12a4ca5243f5205f2ba8f6367 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14469) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7702: [HUDI-5579] Fixing Kryo registration to be properly wired into Spark sessions
hudi-bot commented on PR #7702: URL: https://github.com/apache/hudi/pull/7702#issuecomment-1398001638 ## CI report: * 384a9774018272e13b967817b0e48b1596a23dcc UNKNOWN * ae1bcf3c42da3945c843864cdeac7f8cb89ef088 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14487) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7642: [HUDI-5534][Stacked on 6815] Optimizing Bloom Index lookup when using Bloom Filters from Metadata Table
hudi-bot commented on PR #7642: URL: https://github.com/apache/hudi/pull/7642#issuecomment-1398001285 ## CI report: * b11fa6b2246e4f02f1da12487093a9b5bfaf2149 UNKNOWN * a37aa62fd9ea8d3b70a06e181237df23097d90a4 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14486) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6815: [HUDI-4937][Stacked on 7702] Fix `HoodieTable` injecting non-reusable `HoodieBackedTableMetadata` aggressively flushing MT readers
hudi-bot commented on PR #6815: URL: https://github.com/apache/hudi/pull/6815#issuecomment-139731 ## CI report: * 13fb78850890b96b86b66d7df060feb11950ec0c UNKNOWN * 031dc62b21fc55546243a8fea450138ef94f3405 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14485) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6384: [HUDI-4613] Avoid the use of regex expressions when call hoodieFileGroup#addLogFile function
hudi-bot commented on PR #6384: URL: https://github.com/apache/hudi/pull/6384#issuecomment-1397999485 ## CI report: * 5c72287193c51530504e19b69e81f877bd03c675 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14475) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] TengHuo commented on pull request #7626: [HUDI-5516] Reduce memory footprint on workload with thousand active partitions
TengHuo commented on PR #7626: URL: https://github.com/apache/hudi/pull/7626#issuecomment-1397989682 > @TengHuo I tried the following workload with MOR table, 2000 partitions and compaction (checkpoint here triggers compaction) Got it, thanks so much @trushev -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] nsivabalan commented on issue #7628: [SUPPORT] Hudi Metadata Column Stats Fail
nsivabalan commented on issue #7628: URL: https://github.com/apache/hudi/issues/7628#issuecomment-1397984406 yes, integer might have problems if you use it as record keys. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] fengjian428 commented on issue #7654: [SUPPORT] Starvation on Hudi Java Client in OCC mode
fengjian428 commented on issue #7654: URL: https://github.com/apache/hudi/issues/7654#issuecomment-1397981689 > @fengjian428 , sure.. Please feel free to include the test code. Thanks for the quick fix.. I tried it out a snapshot built from your branch clean_deadlock, did not come across deadlocks anymore with FS based lock provider. > > However, I bumped up the writers to 100 and ran into the below. **I have not used FileSystemLockExpire.** > > ``` > 2023-01-19 02:04:40,411 [INFO ] HoodieMergeHandle - Merging new data into oldPath /Users//IdeaProjects/ApacheHudiOccTest/occ/tmp/hudiTest/44/test/5dd8fea5-cf44-4432-b775-01cb67d1250d-0_0-0-0_20230119020209727.parquet, as newPath /Users//IdeaProjects/ApacheHudiOccTest/occ/tmp/hudiTest/44/test/5dd8fea5-cf44-4432-b775-01cb67d1250d-0_0-0-0_20230119020440202.parquet > 2023-01-19 02:04:40,412 [INFO ] DirectWriteMarkers - Creating Marker Path=/Users//IdeaProjects/ApacheHudiOccTest/occ/tmp/hudiTest/.hoodie/.temp/20230119020440202/44/test/5dd8fea5-cf44-4432-b775-01cb67d1250d-0_0-0-0_20230119020440202.parquet.marker.MERGE > > org.apache.hudi.exception.HoodieUpsertException: Failed upsert schema compatibility check > >at org.apache.hudi.table.HoodieTable.validateUpsertSchema(HoodieTable.java:820) >at org.apache.hudi.client.HoodieJavaWriteClient.upsert(HoodieJavaWriteClient.java:109) >at org.example.HudiOccTest.lambda$HudiTest$2(HudiOccTest.java:213) >at java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183) >at java.base/java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948) >at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:484) >at java.base/java.util.stream.ForEachOps$ForEachTask.compute(ForEachOps.java:290) >at java.base/java.util.concurrent.CountedCompleter.exec(CountedCompleter.java:746) >at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:290) >at java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1020) >at java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1656) >at java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1594) >at java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:183) > Caused by: org.apache.hudi.exception.HoodieException: Failed to read schema/check compatibility for base path /Users//IdeaProjects/ApacheHudiOccTest/occ/tmp/hudiTest >at org.apache.hudi.table.HoodieTable.validateSchema(HoodieTable.java:807) >at org.apache.hudi.table.HoodieTable.validateUpsertSchema(HoodieTable.java:818) >... 12 more > Caused by: org.apache.hudi.exception.HoodieIOException: Could not read commit details from /Users//IdeaProjects/ApacheHudiOccTest/occ/tmp/hudiTest/.hoodie/20230119020417275.commit >at org.apache.hudi.common.table.timeline.HoodieActiveTimeline.readDataFromPath(HoodieActiveTimeline.java:824) >at org.apache.hudi.common.table.timeline.HoodieActiveTimeline.getInstantDetails(HoodieActiveTimeline.java:310) >at org.apache.hudi.common.table.timeline.HoodieActiveTimeline.lambda$getCommitMetadataStream$2(HoodieActiveTimeline.java:349) >at java.base/java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:195) >at java.base/java.util.stream.SortedOps$SizedRefSortingSink.end(SortedOps.java:361) >at java.base/java.util.stream.AbstractPipeline.copyIntoWithCancel(AbstractPipeline.java:503) >at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:488) >at java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:474) >at java.base/java.util.stream.FindOps$FindOp.evaluateSequential(FindOps.java:150) >at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) >at java.base/java.util.stream.ReferencePipeline.findFirst(ReferencePipeline.java:543) >at org.apache.hudi.common.table.timeline.HoodieActiveTimeline.getLastCommitMetadataWithValidSchema(HoodieActiveTimeline.java:321) >at org.apache.hudi.common.table.TableSchemaResolver.getLatestCommitMetadataWithValidSchema(TableSchemaResolver.java:491) >at org.apache.hudi.common.table.TableSchemaResolver.getTableSchemaFromLatestCommitMetadata(TableSchemaResolver.java:225) >at org.apache.hudi.common.table.TableSchemaResolver.getTableAvroSchemaInternal(TableSchemaResolver.java:199) >at org.apache.hudi.common.table.TableSchemaResolver.getTableAvroSchema(TableSchemaResolver.java:139) >at org.apache.hudi.common.table.TableSchemaResolver.getTableAvroSchemaWithoutMetadataFields(TableSchemaResolver.java:192) >at org.apache.hudi.table.HoodieTable.validateSchema(HoodieTable.java:804) >... 13 more > Caused by: j
[GitHub] [hudi] SteNicholas opened a new pull request, #7716: [HUDI-5558] Serializable interface implementation don't explicitly declare serialVersionUID
SteNicholas opened a new pull request, #7716: URL: https://github.com/apache/hudi/pull/7716 ### Change Logs `Serializable` interface implementation don't explicitly declare `serialVersionUID`, which causes the `InvalidClassException` for the deserialization. `Serializable` interface implementation should explicitly declare serialVersionUID for all the implementation including their subclass implementation. ### Impact Operators and functions of hudi-flink module explicitly declare `serialVersionUID`. ### Risk level (write none, low medium or high below) _If medium or high, explain what verification was done to mitigate the risks._ ### Documentation Update _Describe any necessary documentation update if there is any new feature, config, or user-facing change_ - _The config description must be updated if new configs are added or the default value of the configs are changed_ - _Any new feature or user-facing change requires updating the Hudi website. Please create a Jira ticket, attach the ticket number here and follow the [instruction](https://hudi.apache.org/contribute/developer-setup#website) to make changes to the website._ ### Contributor's checklist - [x] Read through [contributor's guide](https://hudi.apache.org/contribute/how-to-contribute) - [x] Change Logs and Impact were stated clearly - [x] Adequate tests were added if applicable - [x] CI passed -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] danny0405 commented on a diff in pull request #7633: Fix Deletes issued without any prior commits
danny0405 commented on code in PR #7633: URL: https://github.com/apache/hudi/pull/7633#discussion_r1082144102 ## hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java: ## @@ -1637,8 +1637,6 @@ protected void setWriteSchemaForDeletes(HoodieTableMetaClient metaClient) { } else { throw new HoodieIOException("Latest commit does not have any schema in commit metadata"); } - } else { -throw new HoodieIOException("Deletes issued without any prior commits"); Review Comment: Yeah, let's leave some loggings here. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] koochiswathiTR commented on issue #7708: Parquet files are in small size
koochiswathiTR commented on issue #7708: URL: https://github.com/apache/hudi/issues/7708#issuecomment-1397968358 I we use clustering will it slow down ingestion? @danny0405 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] koochiswathiTR commented on issue #7708: Parquet files are in small size
koochiswathiTR commented on issue #7708: URL: https://github.com/apache/hudi/issues/7708#issuecomment-1397968093 We use inline, we dont use clustering @danny0405 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] danny0405 commented on issue #7708: Parquet files are in small size
danny0405 commented on issue #7708: URL: https://github.com/apache/hudi/issues/7708#issuecomment-1397967114 Did you try the inline or async clustering. the clustering service constantly merges small files into large ones. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] danny0405 commented on a diff in pull request #7710: [Doc] [minor] remove duplicated match clause in MergeInto syntax
danny0405 commented on code in PR #7710: URL: https://github.com/apache/hudi/pull/7710#discussion_r1082141639 ## website/versioned_docs/version-0.12.2/quick-start-guide.md: ## @@ -761,7 +761,6 @@ MERGE INTO tableIdentifier AS target_alias USING (sub_query | tableIdentifier) AS source_alias ON [ WHEN MATCHED [ AND ] THEN ] -[ WHEN MATCHED [ AND ] THEN ] [ WHEN NOT MATCHED [ AND ] THEN ] Review Comment: Can we fix all the versions that support MERGE INTO? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] danny0405 commented on pull request #7706: [HUDI-5585][flink]Fix flink creates and writes the table, the spark alter table reports an error
danny0405 commented on PR #7706: URL: https://github.com/apache/hudi/pull/7706#issuecomment-1397964369 Thanks for the fix @waywtdcc , can we describe in high level what we are fixing here? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7703: [HUDI-1575][DO NOT MERGE] Testing early conflict detection with feature flag enabled by default
hudi-bot commented on PR #7703: URL: https://github.com/apache/hudi/pull/7703#issuecomment-1397954902 ## CI report: * 0fe1eddd4034e3861ff2519dc21d7a008b10d74d UNKNOWN * e7ea55af65e8b0af7024268b652db37561eb501f Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14481) * a1fdd1603b1fb1f59ce04990601c2e99f00cc9af Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14488) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7702: [HUDI-5579] Fixing Kryo registration to be properly wired into Spark sessions
hudi-bot commented on PR #7702: URL: https://github.com/apache/hudi/pull/7702#issuecomment-1397954859 ## CI report: * 384a9774018272e13b967817b0e48b1596a23dcc UNKNOWN * 45b15748e01531fb144e37f5b04b34b811ab1474 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14466) * ae1bcf3c42da3945c843864cdeac7f8cb89ef088 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14487) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7642: [HUDI-5534][Stacked on 6815] Optimizing Bloom Index lookup when using Bloom Filters from Metadata Table
hudi-bot commented on PR #7642: URL: https://github.com/apache/hudi/pull/7642#issuecomment-1397954692 ## CI report: * b11fa6b2246e4f02f1da12487093a9b5bfaf2149 UNKNOWN * a4afea9412a655aee083524e56b6d75e56720bc4 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14467) * a37aa62fd9ea8d3b70a06e181237df23097d90a4 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14486) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6815: [HUDI-4937][Stacked on 7702] Fix `HoodieTable` injecting non-reusable `HoodieBackedTableMetadata` aggressively flushing MT readers
hudi-bot commented on PR #6815: URL: https://github.com/apache/hudi/pull/6815#issuecomment-1397953949 ## CI report: * 13fb78850890b96b86b66d7df060feb11950ec0c UNKNOWN * 29de073c80985fa18576e7a01ca47b61d32ac944 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14465) * 031dc62b21fc55546243a8fea450138ef94f3405 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14485) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6133: [HUDI-1575] Early Conflict Detection For Multi-writer
hudi-bot commented on PR #6133: URL: https://github.com/apache/hudi/pull/6133#issuecomment-1397953606 ## CI report: * dbe3db845908d261baa5a1aa71d19e0db55816de UNKNOWN * 678cce4a9748cb54a90a559384a0cb0443082535 UNKNOWN * 6fc5bf1ce7921bf25acc3659565457264d8b9dc2 UNKNOWN * 0b74647767677a4cc1193295b493dc0537dd4c96 UNKNOWN * 3369e5e8770cf9eb4c4d272f7c3af54933c992aa UNKNOWN * 1ccecb4fa727cc254cf4780012c28bab24e6afde UNKNOWN * 6fdf901df1086d6ecc07c7987b6a3212b08eaefb UNKNOWN * a2980b73d4ad32976360804059fcd6df969b9f89 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14480) * 7344fabacee437adb4b55ca922df0f5fb14ae372 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14484) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] danny0405 commented on pull request #7687: Update to handle deletes in postgres debezium
danny0405 commented on PR #7687: URL: https://github.com/apache/hudi/pull/7687#issuecomment-1397953163 > > Thanks @BalaMahesh can we fire a JIRA issue and change the commit title to: [HUDI-${JIRA_ID}] ${you commit title} > > @danny0405 - How do I get access to jira to create the issue for this. You can fire a JIRA issue here: https://issues.apache.org/jira/projects/HUDI/issues/HUDI-1575?filter=allopenissues -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] danny0405 commented on issue #7715: [SUPPORT] HoodieDeltaStreamer gives an errror when reading from Redpanda Avro topics
danny0405 commented on issue #7715: URL: https://github.com/apache/hudi/issues/7715#issuecomment-1397951254 Seems the Redpanda returns the null for earliest offset of each partition: https://github.com/apache/hudi/blob/622d27a099f5dec96f992fd423b666083da4b24a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java#L333 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] nfarah86 commented on pull request #7687: Update to handle deletes in postgres debezium
nfarah86 commented on PR #7687: URL: https://github.com/apache/hudi/pull/7687#issuecomment-1397951198 helping with jira -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7703: [HUDI-1575][DO NOT MERGE] Testing early conflict detection with feature flag enabled by default
hudi-bot commented on PR #7703: URL: https://github.com/apache/hudi/pull/7703#issuecomment-1397949951 ## CI report: * d5a19b738146a7003c7957b3a1cd63c5cfbd348d Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14477) * 0fe1eddd4034e3861ff2519dc21d7a008b10d74d UNKNOWN * e7ea55af65e8b0af7024268b652db37561eb501f Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14481) * a1fdd1603b1fb1f59ce04990601c2e99f00cc9af UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7702: [HUDI-5579] Fixing Kryo registration to be properly wired into Spark sessions
hudi-bot commented on PR #7702: URL: https://github.com/apache/hudi/pull/7702#issuecomment-1397949920 ## CI report: * 384a9774018272e13b967817b0e48b1596a23dcc UNKNOWN * 45b15748e01531fb144e37f5b04b34b811ab1474 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14466) * ae1bcf3c42da3945c843864cdeac7f8cb89ef088 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7694: [HUDI-5572] Flink write need to skip check the compatibility of Schem…
hudi-bot commented on PR #7694: URL: https://github.com/apache/hudi/pull/7694#issuecomment-1397949871 ## CI report: * 97fdc558722b8d5152f9e21112045adb73eca9fe Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14430) Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14470) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7642: [HUDI-5534][Stacked on 6815] Optimizing Bloom Index lookup when using Bloom Filters from Metadata Table
hudi-bot commented on PR #7642: URL: https://github.com/apache/hudi/pull/7642#issuecomment-1397949726 ## CI report: * dd82fb612d88d6c9ba4f06be2989ec5061052047 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14446) * b11fa6b2246e4f02f1da12487093a9b5bfaf2149 UNKNOWN * a4afea9412a655aee083524e56b6d75e56720bc4 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14467) * a37aa62fd9ea8d3b70a06e181237df23097d90a4 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6815: [HUDI-4937][Stacked on 7702] Fix `HoodieTable` injecting non-reusable `HoodieBackedTableMetadata` aggressively flushing MT readers
hudi-bot commented on PR #6815: URL: https://github.com/apache/hudi/pull/6815#issuecomment-1397949027 ## CI report: * 13fb78850890b96b86b66d7df060feb11950ec0c UNKNOWN * 29de073c80985fa18576e7a01ca47b61d32ac944 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14465) * 031dc62b21fc55546243a8fea450138ef94f3405 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #6133: [HUDI-1575] Early Conflict Detection For Multi-writer
hudi-bot commented on PR #6133: URL: https://github.com/apache/hudi/pull/6133#issuecomment-1397948658 ## CI report: * dbe3db845908d261baa5a1aa71d19e0db55816de UNKNOWN * 678cce4a9748cb54a90a559384a0cb0443082535 UNKNOWN * 6fc5bf1ce7921bf25acc3659565457264d8b9dc2 UNKNOWN * 0b74647767677a4cc1193295b493dc0537dd4c96 UNKNOWN * 3369e5e8770cf9eb4c4d272f7c3af54933c992aa UNKNOWN * 1ccecb4fa727cc254cf4780012c28bab24e6afde UNKNOWN * 6fdf901df1086d6ecc07c7987b6a3212b08eaefb UNKNOWN * 67b3892f4bafe64ecf203a53bb86db22e61ae587 Azure: [SUCCESS](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14422) * a2980b73d4ad32976360804059fcd6df969b9f89 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14480) * 7344fabacee437adb4b55ca922df0f5fb14ae372 UNKNOWN Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7661: [DO NOT MERGE] Release testing record merger
hudi-bot commented on PR #7661: URL: https://github.com/apache/hudi/pull/7661#issuecomment-1397945732 ## CI report: * f698f26db2314cbbbee30d37df0d6fd343317796 UNKNOWN * 4a2dbb50cff97211589a22059ac7fb1ffcf605a8 UNKNOWN * 9dc9aed49c0797ba20dd716bab973ed6cfc803a4 Azure: [FAILURE](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14468) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] afuyo opened a new issue, #7715: [SUPPORT] HoodieDeltaStreamer gives an errror when reading from Redpanda Avro topics
afuyo opened a new issue, #7715: URL: https://github.com/apache/hudi/issues/7715 **_Tips before filing an issue_** - Have you gone through our [FAQs](https://hudi.apache.org/learn/faq/)? Yes **Describe the problem you faced** I have replaced Apache Kafka with Redpanda. All existing programs like Spring Kafka Avro Producer and Consumer works just fine. Hudi spark jobs, that works fine on Kafka gives an error when running on Redpanda. **To Reproduce** ``` spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer /opt/spark/hudi-utilities-bundle_2.12-0.11.1.jar \ --table-type COPY_ON_WRITE --source-class org.apache.hudi.utilities.sources.AvroKafkaSource \ --source-ordering-field tradeTime \ --target-base-path /opt/spark/stock_ticks_avro_cow \ --target-table stock_ticks_avro_cow \ --props /opt/spark/kafka-source.properties \ --schemaprovider-class org.apache.hudi.utilities.schema.SchemaRegistryProvider \ --op UPSERT \ --continuous \ ``` **Environment Description** * Hudi version : 0.11 * Spark version : 3.1 * Hive version : * Hadoop version : * Storage (HDFS/S3/GCS..) : * Running on Docker? (yes/no) : yes **Stacktrace** ``` ERROR HoodieAsyncService: Service shutdown with error java.util.concurrent.ExecutionException: org.apache.hudi.exception.HoodieException at java.base/java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:395) at java.base/java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1999) at org.apache.hudi.async.HoodieAsyncService.waitForShutdown(HoodieAsyncService.java:103) at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.lambda$sync$1(HoodieDeltaStreamer.java:189) at org.apache.hudi.common.util.Option.ifPresent(Option.java:97) at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:186) at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:553) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.base/java.lang.reflect.Method.invoke(Method.java:566) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:951) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1039) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1048) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: org.apache.hudi.exception.HoodieException at org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer$DeltaSyncService.lambda$startService$0(HoodieDeltaStreamer.java:713) at java.base/java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1700) at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) at java.base/java.lang.Thread.run(Thread.java:829) Caused by: java.lang.NullPointerException at org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.lambda$fetchValidOffsets$1(KafkaOffsetGen.java:333) at java.base/java.util.stream.MatchOps$1MatchSink.accept(MatchOps.java:90) at java.base/java.util.HashMap$EntrySpliterator.tryAdvance(HashMap.java:1785) at java.base/java.util.stream.ReferencePipeline.forEachWithCancel(ReferencePipeline.java:127) at java.base/java.util.stream.AbstractPipeline.copyIntoWithCancel(AbstractPipeline.java:502) at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:488) at java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:474) at java.base/java.util.stream.MatchOps$MatchOp.evaluateSequential(MatchOps.java:230) at java.base/java.util.stream.MatchOps$MatchOp.evaluateSequential(MatchOps.java:196) at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) at java.base/java.util.stream.ReferencePipeline.anyMatch(ReferencePipeline.java:528)
[GitHub] [hudi] hudi-bot commented on pull request #7713: [HUDI-5589] Fix Hudi config inference
hudi-bot commented on PR #7713: URL: https://github.com/apache/hudi/pull/7713#issuecomment-1397911609 ## CI report: * 198a828b76b654e4b8f3ef8ac133f672a682cdf8 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14478) * 4ef51f2e03159eda252b15f90993069d257923f6 UNKNOWN * a55175d7d5bf775ed16ccac6d859ce6619c6 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14482) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7703: [HUDI-1575][DO NOT MERGE] Testing early conflict detection with feature flag enabled by default
hudi-bot commented on PR #7703: URL: https://github.com/apache/hudi/pull/7703#issuecomment-1397911575 ## CI report: * d5a19b738146a7003c7957b3a1cd63c5cfbd348d Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14477) * 0fe1eddd4034e3861ff2519dc21d7a008b10d74d UNKNOWN * e7ea55af65e8b0af7024268b652db37561eb501f Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14481) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] hudi-bot commented on pull request #7512: [HUDI-5417] support to read avro from non-legacy map/list in parquet log
hudi-bot commented on PR #7512: URL: https://github.com/apache/hudi/pull/7512#issuecomment-1397911293 ## CI report: * b248f60720e34316217dadc67882ff2417cf6781 Azure: [CANCELED](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14476) * 49fab36027c88f5235ce360a374e98a3b8f1a1d2 Azure: [PENDING](https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_build/results?buildId=14483) Bot commands @hudi-bot supports the following commands: - `@hudi-bot run azure` re-run the last Azure build -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org