[ 
https://issues.apache.org/jira/browse/HUDI-4156?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17545663#comment-17545663
 ] 

sivabalan narayanan commented on HUDI-4156:
-------------------------------------------

local fix to unblock myself for now
{code:java}
diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
index f5a96fb676..1e67020810 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
@@ -955,7 +955,6 @@ public abstract class HoodieBackedTableMetadataWriter 
implements HoodieTableMeta
     HoodieTableFileSystemView fsView = 
HoodieTableMetadataUtil.getFileSystemView(metadataMetaClient);
     for (Map.Entry<MetadataPartitionType, HoodieData<HoodieRecord>> entry : 
partitionRecordsMap.entrySet()) {
       final String partitionName = entry.getKey().getPartitionPath();
-      final int fileGroupCount = entry.getKey().getFileGroupCount();
       HoodieData<HoodieRecord> records = entry.getValue();
 
       List<FileSlice> fileSlices =
@@ -965,9 +964,10 @@ public abstract class HoodieBackedTableMetadataWriter 
implements HoodieTableMeta
         // so if there are no committed file slices, look for inflight slices
         fileSlices = 
HoodieTableMetadataUtil.getPartitionLatestFileSlicesIncludingInflight(metadataMetaClient,
 Option.ofNullable(fsView), partitionName);
       }
-      ValidationUtils.checkArgument(fileSlices.size() == fileGroupCount,
+      final int fileGroupCount = fileSlices.size();
+      /*ValidationUtils.checkArgument(fileSlices.size() == fileGroupCount,
           String.format("Invalid number of file groups for partition:%s, 
found=%d, required=%d",
-              partitionName, fileSlices.size(), fileGroupCount));
+              partitionName, fileSlices.size(), fileGroupCount));*/
 
       List<FileSlice> finalFileSlices = fileSlices;
       HoodieData<HoodieRecord> rddSinglePartitionRecords = records.map(r -> { 
{code}

> AsyncIndexer fails for column stats partition 
> ----------------------------------------------
>
>                 Key: HUDI-4156
>                 URL: https://issues.apache.org/jira/browse/HUDI-4156
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: metadata
>            Reporter: sivabalan narayanan
>            Assignee: Sagar Sumit
>            Priority: Major
>             Fix For: 0.11.1
>
>
> Tried to build col stats for a hudi table w/ async indexer and ran into below 
> exception
>  
> Configs I had set are 
> {code:java}
> hoodie.metadata.enable=true
> hoodie.metadata.index.async=true
> hoodie.metadata.index.column.stats.enable=true
> hoodie.write.concurrency.mode=optimistic_concurrency_control
> hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.InProcessLockProvider
>  {code}
> command
> {code:java}
> ./bin/spark-submit --class org.apache.hudi.utilities.HoodieIndexer 
> /home/hadoop/hudi-utilities-bundle_2.12-0.12.0-SNAPSHOT.jar --props 
> file:///home/hadoop/indexer.properties --mode scheduleandexecute --base-path 
> TBL_PATH --table-name call_center --index-types COLUMN_STATS --parallelism 1 
> --spark-memory 10g {code}
>  
>  
> {code:java}
> 2022-05-26 00:14:27,936 INFO util.ClusteringUtils: Found 0 files in pending 
> clustering operations
> 2022-05-26 00:14:27,937 INFO client.BaseHoodieClient: Stopping Timeline 
> service !!
> 2022-05-26 00:14:27,937 INFO embedded.EmbeddedTimelineService: Closing 
> Timeline server
> 2022-05-26 00:14:27,937 INFO service.TimelineService: Closing Timeline Service
> 2022-05-26 00:14:27,937 INFO javalin.Javalin: Stopping Javalin ...
> 2022-05-26 00:14:27,945 INFO javalin.Javalin: Javalin has stopped
> 2022-05-26 00:14:27,945 INFO service.TimelineService: Closed Timeline Service
> 2022-05-26 00:14:27,945 INFO embedded.EmbeddedTimelineService: Closed 
> Timeline server
> 2022-05-26 00:14:27,945 INFO transaction.TransactionManager: Transaction 
> manager closed
> 2022-05-26 00:14:27,946 ERROR utilities.UtilHelpers: Indexer failed
> java.lang.IllegalArgumentException: Invalid number of file groups for 
> partition:column_stats, found=2, required=1
>       at 
> org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:40)
>       at 
> org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.prepRecords(HoodieBackedTableMetadataWriter.java:968)
>       at 
> org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.commit(SparkHoodieBackedTableMetadataWriter.java:132)
>       at 
> org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.initialCommit(HoodieBackedTableMetadataWriter.java:1087)
>       at 
> org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.buildMetadataPartitions(HoodieBackedTableMetadataWriter.java:858)
>       at 
> org.apache.hudi.table.action.index.RunIndexActionExecutor.execute(RunIndexActionExecutor.java:140)
>       at 
> org.apache.hudi.table.HoodieSparkCopyOnWriteTable.index(HoodieSparkCopyOnWriteTable.java:291)
>       at 
> org.apache.hudi.client.BaseHoodieWriteClient.index(BaseHoodieWriteClient.java:1027)
>       at 
> org.apache.hudi.utilities.HoodieIndexer.scheduleAndRunIndexing(HoodieIndexer.java:278)
>       at 
> org.apache.hudi.utilities.HoodieIndexer.lambda$start$1(HoodieIndexer.java:198)
>       at org.apache.hudi.utilities.UtilHelpers.retry(UtilHelpers.java:541)
>       at org.apache.hudi.utilities.HoodieIndexer.start(HoodieIndexer.java:185)
>       at org.apache.hudi.utilities.HoodieIndexer.main(HoodieIndexer.java:154)
>       at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>       at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>       at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>       at java.lang.reflect.Method.invoke(Method.java:498)
>       at 
> org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
>       at 
> org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:955)
>       at 
> org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
>       at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
>       at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
>       at 
> org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1043)
>       at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1052)
>       at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> 2022-05-26 00:14:27,947 ERROR utilities.HoodieIndexer: Indexing with 
> basePath: 
> s3a://sagars-testlake/TPC-DS/1TB/hudi_hand_tuned_may20_1/call_center, 
> tableName: call_center, runningMode: scheduleandexecute failed
> 2022-05-26 00:14:27,954 INFO server.AbstractConnector: Stopped 
> Spark@450794b4{HTTP/1.1, (http/1.1)}{0.0.0.0:8090}
> 2022-05-26 00:14:27,954 INFO ui.SparkUI: Stopped Spark web UI at 
> http://ip-172-31-39-68.us-east-2.compute.internal:8090
> 2022-05-26 00:14:27,964 INFO spark.MapOutputTrackerMasterEndpoint: 
> MapOutputTrackerMasterEndpoint stopped! {code}
>  
>  



--
This message was sent by Atlassian Jira
(v8.20.7#820007)

Reply via email to