[ 
https://issues.apache.org/jira/browse/HUDI-6201?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17724316#comment-17724316
 ] 

Jonathan Vexler commented on HUDI-6201:
---------------------------------------

Here is a test that can be run in spark shell and fails around 1/4 of runs. 
Data: [^base.tar.gz][^base001.tar.gz]
{code:java}
import org.apache.hudi.bootstrap.SparkParquetBootstrapDataProvider
import 
org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelector
import org.apache.hudi.{DataSourceWriteOptions, HoodieDataSourceHelpers}
import org.apache.hudi.config.{HoodieBootstrapConfig, HoodieWriteConfig}
import org.apache.hudi.keygen.SimpleKeyGenerator
import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.types._
import java.nio.file.{Paths, Files}

val iter = 3

val srcPath = "/Users/jon/Documents/timelineFail/base"
val targetPath = "/Users/jon/Documents/timelineFail/target/test" + iter + "/"
val updateDf = 
spark.read.format("parquet").load("/Users/jon/Documents/timelineFail/base001")
val m = collection.mutable.Map[String, Int]().withDefaultValue(0)

var timesBad = 0
var total = 0
for (i <- 0 to 100) {
    val bootstrapDF = spark.emptyDataFrame
    val basePath = targetPath + i
    bootstrapDF.write.format("hudi")
        .option(HoodieWriteConfig.TABLE_NAME, "hoodie_test")
        .option(DataSourceWriteOptions.OPERATION_OPT_KEY, 
DataSourceWriteOptions.BOOTSTRAP_OPERATION_OPT_VAL)
        .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key")
        .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, 
"partition_path")
        .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "timestamp")
        .option(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING.key, "true")
        .option(DataSourceWriteOptions.TABLE_TYPE.key, 
DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL)
        .option(HoodieBootstrapConfig.BOOTSTRAP_BASE_PATH_PROP, srcPath)
        .option(HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key, 
classOf[SimpleKeyGenerator].getName)
        .option(HoodieBootstrapConfig.BOOTSTRAP_MODE_SELECTOR, 
classOf[MetadataOnlyBootstrapModeSelector].getName)
        .mode(SaveMode.Overwrite)
        .save(basePath)
    updateDf.write.format("hudi")
        .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "_row_key")
        .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, 
"partition_path")
        .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "timestamp")
        .option(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING.key, "true")
        .option(HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key, 
classOf[SimpleKeyGenerator].getName)
        .mode(SaveMode.Append)
        .save(basePath)
    val result = 
spark.read.format("hudi").load(basePath).filter("_hoodie_is_deleted is NULL")
    total += 1
     if (result.count() > 0) {
        println("Failure in iter " + iter + " run " + i)
        timesBad += 1
     }
}
println("Failure rate is: " + timesBad.toDouble/total.toDouble)
 {code}

> Timeline server sometimes does not send bootstrap base path for a skeleton 
> file
> -------------------------------------------------------------------------------
>
>                 Key: HUDI-6201
>                 URL: https://issues.apache.org/jira/browse/HUDI-6201
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: bootstrap, timeline-server
>            Reporter: Jonathan Vexler
>            Priority: Major
>         Attachments: TestBootstrapRead.java, base.tar.gz, base001.tar.gz
>
>
> [^TestBootstrapRead.java] In the attached file, enable the timeline serverĀ 
> 'hoodie.embed.timeline.server'. It will occasionally fail in metadata or 
> mixed mode because some records will be null besides the metadata columns:
> {code:java}
> +-------------------+---------------------+------------------------------------+-------------------------+------------------------------------------------------------------------+------------------+------------------------------------+------------------+-------------------+-------------+------------+----------+------------------+----------+-------------------+-------------------+------------------------+-------------------+--------+----------+--------------+---------+--------------------+---------+--------------------------+----------+|_hoodie_commit_time|_hoodie_commit_seqno
>  |_hoodie_record_key                  |_hoodie_partition_path   
> |_hoodie_file_name                                                       
> |_hoodie_is_deleted|_row_key                            |begin_lat         
> |begin_lon          
> |city_to_state|current_date|current_ts|distance_in_meters|driver    |end_lat  
>           |end_lon            |fare                    |height             
> |nation  |partition |partition_path|rider    |seconds_since_epoch 
> |timestamp|tip_history               |weight    
> |+-------------------+---------------------+------------------------------------+-------------------------+------------------------------------------------------------------------+------------------+------------------------------------+------------------+-------------------+-------------+------------+----------+------------------+----------+-------------------+-------------------+------------------------+-------------------+--------+----------+--------------+---------+--------------------+---------+--------------------------+----------+|00000000000001
>      |00000000000001_4_0   
> |876743b0-f5e7-4289-b13b-1a0404d94380|partition_path=2015-03-16|7e1dea56-e88c-4072-be61-f4ae01feaaa3_1-138-381_20230510125841762.parquet|null
>               |null                                |null              |null   
>             |null         |null        |null      |null              |null    
>   |null               |null               |null                    |null      
>          |null    |null      |null          |null     |null                
> |null     |null                      |null      ||00000000000001     
> |00000000000001_4_1   
> |00923d1a-58fc-4d42-8953-4a47b47d738f|partition_path=2015-03-16|7e1dea56-e88c-4072-be61-f4ae01feaaa3_1-138-381_20230510125841762.parquet|null
>               |null                                |null              |null   
>             |null         |null        |null      |null              |null    
>   |null               |null               |null                    |null      
>          |null    |null      |null          |null     |null                
> |null     |null                      |null      ||20230510125841762  
> |20230510125841762_1_2|b318c482-8e43-4614-bdab-80946d5a9f53|partition_path=2015-03-16|7e1dea56-e88c-4072-be61-f4ae01feaaa3_1-138-381_20230510125841762.parquet|false
>              
> |b318c482-8e43-4614-bdab-80946d5a9f53|0.5285807377766387|0.12835359814395741|[CA]
>          |12          |1047178778|521899450         
> |driver-001|0.41394620067559684|0.08532822423986208|[42.25978252084417, 
> USD]|[0, 0, 7, -91, -36]|[Canada]|2015-03-16|2015-03-16    
> |rider-001|-2845295541651788027|0        |[[32.10533813167099, 
> USD]]|0.59076524||00000000000001     |00000000000001_4_3   
> |4dcc72d7-0878-41c2-a85d-3e6374b88bb8|partition_path=2015-03-16|7e1dea56-e88c-4072-be61-f4ae01feaaa3_1-138-381_20230510125841762.parquet|null
>               |null                                |null              |null   
>             |null         |null        |null      |null              |null    
>   |null               |null               |null                    |null      
>          |null    |null      |null          |null     |null                
> |null     |null                      |null      ||00000000000001     
> |00000000000001_4_4   
> |cfa79530-fc9f-42de-a181-34c06e79d9c5|partition_path=2015-03-16|7e1dea56-e88c-4072-be61-f4ae01feaaa3_1-138-381_20230510125841762.parquet|null
>               |null                                |null              |null   
>             |null         |null        |null      |null              |null    
>   |null               |null               |null                    |null      
>          |null    |null      |null          |null     |null                
> |null     |null                      |null      ||00000000000001     
> |00000000000001_4_5   
> |4d2a4755-83a7-4201-9b65-0148752d55b7|partition_path=2015-03-16|7e1dea56-e88c-4072-be61-f4ae01feaaa3_1-138-381_20230510125841762.parquet|null
>               |null                                |null              |null   
>             |null         |null        |null      |null              |null    
>   |null               |null               |null                    |null      
>          |null    |null      |null          |null     |null                
> |null     |null                      |null      ||00000000000001     
> |00000000000001_4_6   
> |76429084-f78e-4c6d-a70d-088cb5d955aa|partition_path=2015-03-16|7e1dea56-e88c-4072-be61-f4ae01feaaa3_1-138-381_20230510125841762.parquet|null
>               |null                                |null              |null   
>             |null         |null        |null      |null              |null    
>   |null               |null               |null                    |null      
>          |null    |null      |null          |null     |null                
> |null     |null                      |null      ||00000000000001     
> |00000000000001_4_7   
> |8436456b-9858-45cb-8a07-bdae536a2d17|partition_path=2015-03-16|7e1dea56-e88c-4072-be61-f4ae01feaaa3_1-138-381_20230510125841762.parquet|null
>               |null                                |null              |null   
>             |null         |null        |null      |null              |null    
>   |null               |null               |null                    |null      
>          |null    |null      |null          |null     |null                
> |null     |null                      |null      ||00000000000001     
> |00000000000001_4_8   
> |b6ca00ed-a4f4-4db5-a269-6be250b64caa|partition_path=2015-03-16|7e1dea56-e88c-4072-be61-f4ae01feaaa3_1-138-381_20230510125841762.parquet|null
>               |null                                |null              |null   
>             |null         |null        |null      |null              |null    
>   |null               |null               |null                    |null      
>          |null    |null      |null          |null     |null                
> |null     |null                      |null      
> |+-------------------+---------------------+------------------------------------+-------------------------+------------------------------------------------------------------------+------------------+------------------------------------+------------------+-------------------+-------------+------------+----------+------------------+----------+-------------------+-------------------+------------------------+-------------------+--------+----------+--------------+---------+--------------------+---------+--------------------------+----------++-------------------+--------------------+------------------------------------+-------------------------+-------------------------------------------------------------------+
>  {code}
> This is due to the bootstrap base file not being set, so HoodieMergeHelper 
> does not process the file as a bootstrap skeleton file



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to