[ https://issues.apache.org/jira/browse/HUDI-8553?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Y Ethan Guo updated HUDI-8553: ------------------------------ Sprint: Hudi 1.0 Blockers+Bugs Sprint > Spark SQL UPDATE and DELETE should write record positions > --------------------------------------------------------- > > Key: HUDI-8553 > URL: https://issues.apache.org/jira/browse/HUDI-8553 > Project: Apache Hudi > Issue Type: Bug > Reporter: Y Ethan Guo > Assignee: Jonathan Vexler > Priority: Blocker > Fix For: 1.0.0 > > > Though there is no read and write error, Spark SQL UPDATE and DELETE do not > write record positions to the log files. > {code:java} > spark-sql (default)> CREATE TABLE testing_positions.table2 ( > > ts BIGINT, > > uuid STRING, > > rider STRING, > > driver STRING, > > fare DOUBLE, > > city STRING > > ) USING HUDI > > LOCATION > 'file:///Users/ethan/Work/tmp/hudi-1.0.0-testing/positional/table2' > > TBLPROPERTIES ( > > type = 'mor', > > primaryKey = 'uuid', > > preCombineField = 'ts' > > ) > > PARTITIONED BY (city); > 24/11/16 12:03:26 WARN TableSchemaResolver: Could not find any data file > written for commit, so could not get schema for table > file:/Users/ethan/Work/tmp/hudi-1.0.0-testing/positional/table2 > Time taken: 0.4 seconds > spark-sql (default)> INSERT INTO testing_positions.table2 > > VALUES > > > (1695159649087,'334e26e9-8355-45cc-97c6-c31daf0df330','rider-A','driver-K',19.10,'san_francisco'), > > > (1695091554788,'e96c4396-3fad-413a-a942-4cb36106d721','rider-C','driver-M',27.70 > ,'san_francisco'), > > > (1695046462179,'9909a8b1-2d15-4d3d-8ec9-efc48c536a00','rider-D','driver-L',33.90 > ,'san_francisco'), > > > (1695332066204,'1dced545-862b-4ceb-8b43-d2a568f6616b','rider-E','driver-O',93.50,'san_francisco'), > > > (1695516137016,'e3cf430c-889d-4015-bc98-59bdce1e530c','rider-F','driver-P',34.15,'sao_paulo' > ), > > > (1695376420876,'7a84095f-737f-40bc-b62f-6b69664712d2','rider-G','driver-Q',43.40 > ,'sao_paulo' ), > > > (1695173887231,'3eeb61f7-c2b0-4636-99bd-5d7a5a1d2c04','rider-I','driver-S',41.06 > ,'chennai' ), > > > (1695115999911,'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa','rider-J','driver-T',17.85,'chennai'); > 24/11/16 12:03:26 WARN TableSchemaResolver: Could not find any data file > written for commit, so could not get schema for table > file:/Users/ethan/Work/tmp/hudi-1.0.0-testing/positional/table2 > 24/11/16 12:03:26 WARN TableSchemaResolver: Could not find any data file > written for commit, so could not get schema for table > file:/Users/ethan/Work/tmp/hudi-1.0.0-testing/positional/table2 > 24/11/16 12:03:29 WARN log: Updating partition stats fast for: table2_ro > 24/11/16 12:03:29 WARN log: Updated size to 436166 > 24/11/16 12:03:29 WARN log: Updating partition stats fast for: table2_ro > 24/11/16 12:03:29 WARN log: Updating partition stats fast for: table2_ro > 24/11/16 12:03:29 WARN log: Updated size to 436185 > 24/11/16 12:03:29 WARN log: Updated size to 436386 > 24/11/16 12:03:30 WARN log: Updating partition stats fast for: table2_rt > 24/11/16 12:03:30 WARN log: Updating partition stats fast for: table2_rt > 24/11/16 12:03:30 WARN log: Updated size to 436166 > 24/11/16 12:03:30 WARN log: Updated size to 436386 > 24/11/16 12:03:30 WARN log: Updating partition stats fast for: table2_rt > 24/11/16 12:03:30 WARN log: Updated size to 436185 > 24/11/16 12:03:30 WARN log: Updating partition stats fast for: table2 > 24/11/16 12:03:30 WARN log: Updated size to 436166 > 24/11/16 12:03:30 WARN log: Updating partition stats fast for: table2 > 24/11/16 12:03:30 WARN log: Updated size to 436386 > 24/11/16 12:03:30 WARN log: Updating partition stats fast for: table2 > 24/11/16 12:03:30 WARN log: Updated size to 436185 > 24/11/16 12:03:30 WARN HiveConf: HiveConf of name > hive.internal.ss.authz.settings.applied.marker does not exist > 24/11/16 12:03:30 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout > does not exist > 24/11/16 12:03:30 WARN HiveConf: HiveConf of name hive.stats.retries.wait > does not exist > Time taken: 4.843 seconds > spark-sql (default)> > > SET hoodie.merge.small.file.group.candidates.limit = 0; > hoodie.merge.small.file.group.candidates.limit 0 > Time taken: 0.018 seconds, Fetched 1 row(s) > spark-sql (default)> > > UPDATE testing_positions.table2 SET fare = 20.0 WHERE > rider = 'rider-A'; > 24/11/16 12:03:31 WARN SparkStringUtils: Truncated the string representation > of a plan since it was too large. This behavior can be adjusted by setting > 'spark.sql.debug.maxToStringFields'. > 24/11/16 12:03:32 WARN HoodieFileIndex: Data skipping requires both Metadata > Table and at least one of Column Stats Index, Record Level Index, or > Functional Index to be enabled as well! (isMetadataTableEnabled = false, > isColumnStatsIndexEnabled = false, isRecordIndexApplicable = false, > isFunctionalIndexEnabled = false, isBucketIndexEnable = false, > isPartitionStatsIndexEnabled = false), isBloomFiltersIndexEnabled = false) > 24/11/16 12:03:32 WARN HoodieDataBlock: There are records without valid > positions. Skip writing record positions to the data block header. > 24/11/16 12:03:34 WARN HiveConf: HiveConf of name > hive.internal.ss.authz.settings.applied.marker does not exist > 24/11/16 12:03:34 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout > does not exist > 24/11/16 12:03:34 WARN HiveConf: HiveConf of name hive.stats.retries.wait > does not exist > Time taken: 5.545 seconds > spark-sql (default)> > > DELETE FROM testing_positions.table2 WHERE uuid = > 'e3cf430c-889d-4015-bc98-59bdce1e530c'; > 24/11/16 12:03:37 WARN HoodieFileIndex: Data skipping requires both Metadata > Table and at least one of Column Stats Index, Record Level Index, or > Functional Index to be enabled as well! (isMetadataTableEnabled = false, > isColumnStatsIndexEnabled = false, isRecordIndexApplicable = false, > isFunctionalIndexEnabled = false, isBucketIndexEnable = false, > isPartitionStatsIndexEnabled = false), isBloomFiltersIndexEnabled = false) > 24/11/16 12:03:37 WARN HoodiePositionBasedFileGroupRecordBuffer: No record > position info is found when attempt to do position based merge. > 24/11/16 12:03:37 WARN HoodiePositionBasedFileGroupRecordBuffer: Falling back > to key based merge for Read > 24/11/16 12:03:38 WARN HoodieDeleteBlock: There are delete records without > valid positions. Skip writing record positions to the delete block header. > 24/11/16 12:03:39 WARN HiveConf: HiveConf of name > hive.internal.ss.authz.settings.applied.marker does not exist > 24/11/16 12:03:39 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout > does not exist > 24/11/16 12:03:39 WARN HiveConf: HiveConf of name hive.stats.retries.wait > does not exist > Time taken: 2.992 seconds > spark-sql (default)> > > select * from testing_positions.table2; > 24/11/16 12:03:41 WARN HoodiePositionBasedFileGroupRecordBuffer: No record > position info is found when attempt to do position based merge. > 24/11/16 12:03:41 WARN HoodiePositionBasedFileGroupRecordBuffer: No record > position info is found when attempt to do position based merge. > 24/11/16 12:03:41 WARN HoodiePositionBasedFileGroupRecordBuffer: Falling back > to key based merge for Read > 24/11/16 12:03:41 WARN HoodiePositionBasedFileGroupRecordBuffer: Falling back > to key based merge for Read > 20241116120326527 20241116120326527_0_0 > 1dced545-862b-4ceb-8b43-d2a568f6616b city=san_francisco > 1ba64ef0-bba2-469e-8ef5-696f8cdbe141-0_0-186-338_20241116120326527.parquet > 16953320662041dced545-862b-4ceb-8b43-d2a568f6616b rider-E driver-O > 93.5 san_francisco > 20241116120326527 20241116120326527_0_1 > e96c4396-3fad-413a-a942-4cb36106d721 city=san_francisco > 1ba64ef0-bba2-469e-8ef5-696f8cdbe141-0_0-186-338_20241116120326527.parquet > 1695091554788e96c4396-3fad-413a-a942-4cb36106d721 rider-C driver-M > 27.7 san_francisco > 20241116120326527 20241116120326527_0_2 > 9909a8b1-2d15-4d3d-8ec9-efc48c536a00 city=san_francisco > 1ba64ef0-bba2-469e-8ef5-696f8cdbe141-0_0-186-338_20241116120326527.parquet > 16950464621799909a8b1-2d15-4d3d-8ec9-efc48c536a00 rider-D driver-L > 33.9 san_francisco > 20241116120331896 20241116120331896_0_9 > 334e26e9-8355-45cc-97c6-c31daf0df330 city=san_francisco > 1ba64ef0-bba2-469e-8ef5-696f8cdbe141-0 1695159649087 > 334e26e9-8355-45cc-97c6-c31daf0df330 rider-A driver-K 20.0 > san_francisco > 20241116120326527 20241116120326527_1_1 > 7a84095f-737f-40bc-b62f-6b69664712d2 city=sao_paulo > ba555452-0c3c-47dc-acc0-f90823e12408-0_1-186-339_20241116120326527.parquet > 1695376420876 7a84095f-737f-40bc-b62f-6b69664712d2 rider-G driver-Q > 43.4 sao_paulo > 20241116120326527 20241116120326527_2_0 > 3eeb61f7-c2b0-4636-99bd-5d7a5a1d2c04 city=chennai > 8dacb2f9-6901-4ab3-8139-697b51125f16-0_2-186-340_20241116120326527.parquet > 1695173887231 3eeb61f7-c2b0-4636-99bd-5d7a5a1d2c04 rider-I driver-S > 41.06 chennai > 20241116120326527 20241116120326527_2_1 > c8abbe79-8d89-47ea-b4ce-4d224bae5bfa city=chennai > 8dacb2f9-6901-4ab3-8139-697b51125f16-0_2-186-340_20241116120326527.parquet > 1695115999911 c8abbe79-8d89-47ea-b4ce-4d224bae5bfa rider-J driver-T > 17.85 chennai > Time taken: 1.719 seconds, Fetched 7 row(s) {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)