This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 336b5268dd4 [HUDI-7596] Enable Jacoco code coverage report across 
multiple modules (#12842)
336b5268dd4 is described below

commit 336b5268dd4e1ed0857ac28fdb42c2bb85b43ff8
Author: Y Ethan Guo <[email protected]>
AuthorDate: Fri Feb 14 17:20:09 2025 -0800

    [HUDI-7596] Enable Jacoco code coverage report across multiple modules 
(#12842)
---
 azure-pipelines-20230430.yml                      | 180 ++++++++++++++++++++--
 pom.xml                                           |  83 +++++++++-
 scripts/jacoco/README.md                          | 110 +++++++++++++
 scripts/jacoco/download_jacoco.sh                 |  22 +++
 scripts/jacoco/generate_jacoco_coverage_report.sh |  23 +++
 scripts/jacoco/merge_jacoco_exec_files.sh         |  24 +++
 scripts/jacoco/merge_jacoco_job_files.sh          |  25 +++
 7 files changed, 451 insertions(+), 16 deletions(-)

diff --git a/azure-pipelines-20230430.yml b/azure-pipelines-20230430.yml
index 4cf9a383168..914bd7eaddd 100644
--- a/azure-pipelines-20230430.yml
+++ b/azure-pipelines-20230430.yml
@@ -14,8 +14,10 @@
 # limitations under the License.
 
 # NOTE:
-# This config file defines how Azure CI runs tests with Spark 2.4 and Flink 
1.18 profiles.
+# This config file defines how Azure CI runs tests with Spark 3.5 and Flink 
1.18 profiles.
 # PRs will need to keep in sync with master's version to trigger the CI runs.
+# See scripts/jacoco/README.md for how aggregated code coverage report works
+# across multiple modules.
 
 trigger:
   branches:
@@ -131,10 +133,34 @@ parameters:
       - 'org.apache.spark.sql.hudi.command'
       - 'org.apache.spark.sql.hudi.common'
       - 'org.apache.spark.sql.hudi.ddl'
+  - name: jacocoModules
+    type: object
+    default:
+      - '!hudi-examples/hudi-examples-k8s'
+      - '!hudi-flink-datasource/hudi-flink1.14.x'
+      - '!hudi-flink-datasource/hudi-flink1.15.x'
+      - '!hudi-flink-datasource/hudi-flink1.16.x'
+      - '!hudi-flink-datasource/hudi-flink1.17.x'
+      - '!hudi-flink-datasource/hudi-flink1.19.x'
+      - '!hudi-flink-datasource/hudi-flink1.20.x'
+      - '!packaging/hudi-aws-bundle'
+      - '!packaging/hudi-cli-bundle'
+      - '!packaging/hudi-datahub-sync-bundle'
+      - '!packaging/hudi-flink-bundle'
+      - '!packaging/hudi-gcp-bundle'
+      - '!packaging/hudi-hadoop-mr-bundle'
+      - '!packaging/hudi-hive-sync-bundle'
+      - '!packaging/hudi-kafka-connect-bundle'
+      - '!packaging/hudi-metaserver-server-bundle'
+      - '!packaging/hudi-presto-bundle'
+      - '!packaging/hudi-spark-bundle'
+      - '!packaging/hudi-timeline-server-bundle'
+      - '!packaging/hudi-trino-bundle'
+      - '!packaging/hudi-utilities-slim-bundle'
 
 variables:
   BUILD_PROFILES: '-Dscala-2.12 -Dspark3.5 -Dflink1.18'
-  PLUGIN_OPTS: '-Dcheckstyle.skip=true -Drat.skip=true -Djacoco.skip=true -ntp 
-B -V -Pwarn-log 
-Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=warn 
-Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency=warn'
+  PLUGIN_OPTS: '-Dcheckstyle.skip=true -Drat.skip=true -ntp -B -V -Pwarn-log 
-Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=warn 
-Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency=warn'
   MVN_OPTS_INSTALL: '-T 3 -Phudi-platform-service -DskipTests 
$(BUILD_PROFILES) $(PLUGIN_OPTS) 
-Dmaven.wagon.httpconnectionManager.ttlSeconds=25 
-Dmaven.wagon.http.retryHandler.count=5'
   MVN_OPTS_TEST: '-fae -Pwarn-log $(BUILD_PROFILES) $(PLUGIN_OPTS)'
   JAVA_MVN_TEST_FILTER: '-DwildcardSuites=skipScalaTests -DfailIfNoTests=false'
@@ -151,6 +177,10 @@ variables:
   JOB5_SPARK_DDL_OTHERS_WILDCARD_SUITES: ${{ 
join(',',parameters.job5HudiSparkDdlOthersWildcardSuites) }}
   JOB7_UT_MODULES: ${{ join(',',parameters.job7UTModules) }}
   JOB7_FT_MODULES: ${{ join(',',parameters.job7FTModules) }}
+  JACOCO_AGENT_DESTFILE1_ARG: '-Djacoco.agent.dest.filename=jacoco1.exec'
+  JACOCO_AGENT_DESTFILE2_ARG: '-Djacoco.agent.dest.filename=jacoco2.exec'
+  JACOCO_AGENT_DESTFILE3_ARG: '-Djacoco.agent.dest.filename=jacoco3.exec'
+  JACOCO_MODULES: ${{ join(',',parameters.jacocoModules) }}
 
 stages:
   - stage: test
@@ -175,7 +205,7 @@ stages:
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'test'
-              options: $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB1_UT_MODULES)
+              options: $(MVN_OPTS_TEST) -Punit-tests 
$(JACOCO_AGENT_DESTFILE1_ARG) -pl $(JOB1_UT_MODULES)
               publishJUnitResults: false
               jdkVersionOption: '1.8'
               mavenOptions: '-Xmx4g'
@@ -184,11 +214,21 @@ stages:
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'test'
-              options: $(MVN_OPTS_TEST) -Pfunctional-tests -pl 
$(JOB1_FT_MODULES)
+              options: $(MVN_OPTS_TEST) -Pfunctional-tests 
$(JACOCO_AGENT_DESTFILE2_ARG) -pl $(JOB1_FT_MODULES)
               publishJUnitResults: true
               testResultsFiles: '**/surefire-reports/TEST-*.xml'
               jdkVersionOption: '1.8'
               mavenOptions: '-Xmx4g'
+          - script: |
+              ./scripts/jacoco/download_jacoco.sh
+              ./scripts/jacoco/merge_jacoco_exec_files.sh 
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+            displayName: 'Merge JaCoCo Execution Data Files'
+          - task: PublishBuildArtifacts@1
+            displayName: 'Publish Merged JaCoCo Execution Data File'
+            inputs:
+              PathtoPublish: '$(Build.SourcesDirectory)/merged-jacoco.exec'
+              ArtifactName: 'merged-jacoco-$(Build.BuildId)-1'
+              publishLocation: 'Container'
           - script: |
               grep "testcase" */target/surefire-reports/*.xml 
*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr 
| head -n 100
             displayName: Top 100 long-running testcases
@@ -209,11 +249,21 @@ stages:
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'test'
-              options: $(MVN_OPTS_TEST) -Pfunctional-tests -pl 
$(JOB2_FT_MODULES)
+              options: $(MVN_OPTS_TEST) -Pfunctional-tests 
$(JACOCO_AGENT_DESTFILE1_ARG) -pl $(JOB2_FT_MODULES)
               publishJUnitResults: true
               testResultsFiles: '**/surefire-reports/TEST-*.xml'
               jdkVersionOption: '1.8'
               mavenOptions: '-Xmx4g'
+          - script: |
+              ./scripts/jacoco/download_jacoco.sh
+              ./scripts/jacoco/merge_jacoco_exec_files.sh 
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+            displayName: 'Merge JaCoCo Execution Data Files'
+          - task: PublishBuildArtifacts@1
+            displayName: 'Publish Merged JaCoCo Execution Data File'
+            inputs:
+              PathtoPublish: '$(Build.SourcesDirectory)/merged-jacoco.exec'
+              ArtifactName: 'merged-jacoco-$(Build.BuildId)-2'
+              publishLocation: 'Container'
           - script: |
               grep "testcase" */target/surefire-reports/*.xml 
*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr 
| head -n 100
             displayName: Top 100 long-running testcases
@@ -234,11 +284,21 @@ stages:
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'test'
-              options: $(MVN_OPTS_TEST) -Punit-tests $(JAVA_MVN_TEST_FILTER) 
$(JOB3_MVN_ARG_TEST) -pl $(JOB345_MODULES)
+              options: $(MVN_OPTS_TEST) -Punit-tests $(JAVA_MVN_TEST_FILTER) 
$(JOB3_MVN_ARG_TEST) $(JACOCO_AGENT_DESTFILE1_ARG) -pl $(JOB345_MODULES)
               publishJUnitResults: true
               testResultsFiles: '**/surefire-reports/TEST-*.xml'
               jdkVersionOption: '1.8'
               mavenOptions: '-Xmx4g'
+          - script: |
+              ./scripts/jacoco/download_jacoco.sh
+              ./scripts/jacoco/merge_jacoco_exec_files.sh 
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+            displayName: 'Merge JaCoCo Execution Data Files'
+          - task: PublishBuildArtifacts@1
+            displayName: 'Publish Merged JaCoCo Execution Data File'
+            inputs:
+              PathtoPublish: '$(Build.SourcesDirectory)/merged-jacoco.exec'
+              ArtifactName: 'merged-jacoco-$(Build.BuildId)-3'
+              publishLocation: 'Container'
           - script: |
               grep "testcase" */target/surefire-reports/*.xml 
*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr 
| head -n 100
             displayName: Top 100 long-running testcases
@@ -259,11 +319,21 @@ stages:
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'test'
-              options: $(MVN_OPTS_TEST) -Punit-tests $(SCALA_MVN_TEST_FILTER) 
-DwildcardSuites="$(JOB4_SPARK_DML_WILDCARD_SUITES)" -pl $(JOB345_MODULES)
+              options: $(MVN_OPTS_TEST) -Punit-tests $(SCALA_MVN_TEST_FILTER) 
-DwildcardSuites="$(JOB4_SPARK_DML_WILDCARD_SUITES)" 
$(JACOCO_AGENT_DESTFILE1_ARG) -pl $(JOB345_MODULES)
               publishJUnitResults: true
               testResultsFiles: '**/surefire-reports/TEST-*.xml'
               jdkVersionOption: '1.8'
               mavenOptions: '-Xmx4g'
+          - script: |
+              ./scripts/jacoco/download_jacoco.sh
+              ./scripts/jacoco/merge_jacoco_exec_files.sh 
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+            displayName: 'Merge JaCoCo Execution Data Files'
+          - task: PublishBuildArtifacts@1
+            displayName: 'Publish Merged JaCoCo Execution Data File'
+            inputs:
+              PathtoPublish: '$(Build.SourcesDirectory)/merged-jacoco.exec'
+              ArtifactName: 'merged-jacoco-$(Build.BuildId)-4'
+              publishLocation: 'Container'
           - script: |
               grep "testcase" */target/surefire-reports/*.xml 
*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr 
| head -n 100
             displayName: Top 100 long-running testcases
@@ -284,7 +354,7 @@ stages:
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'test'
-              options: $(MVN_OPTS_TEST) -Punit-tests $(JAVA_MVN_TEST_FILTER) 
$(JOB5_MVN_ARG_TEST) -pl $(JOB345_MODULES)
+              options: $(MVN_OPTS_TEST) -Punit-tests $(JAVA_MVN_TEST_FILTER) 
$(JOB5_MVN_ARG_TEST) $(JACOCO_AGENT_DESTFILE1_ARG) -pl $(JOB345_MODULES)
               publishJUnitResults: false
               jdkVersionOption: '1.8'
               mavenOptions: '-Xmx4g'
@@ -293,11 +363,21 @@ stages:
             inputs:
               mavenPomFile: 'pom.xml'
               goals: 'test'
-              options: $(MVN_OPTS_TEST) -Punit-tests $(SCALA_MVN_TEST_FILTER) 
-DwildcardSuites="$(JOB5_SPARK_DDL_OTHERS_WILDCARD_SUITES)" -pl 
$(JOB345_MODULES)
+              options: $(MVN_OPTS_TEST) -Punit-tests $(SCALA_MVN_TEST_FILTER) 
-DwildcardSuites="$(JOB5_SPARK_DDL_OTHERS_WILDCARD_SUITES)" 
$(JACOCO_AGENT_DESTFILE2_ARG) -pl $(JOB345_MODULES)
               publishJUnitResults: true
               testResultsFiles: '**/surefire-reports/TEST-*.xml'
               jdkVersionOption: '1.8'
               mavenOptions: '-Xmx4g'
+          - script: |
+              ./scripts/jacoco/download_jacoco.sh
+              ./scripts/jacoco/merge_jacoco_exec_files.sh 
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+            displayName: 'Merge JaCoCo Execution Data Files'
+          - task: PublishBuildArtifacts@1
+            displayName: 'Publish Merged JaCoCo Execution Data File'
+            inputs:
+              PathtoPublish: '$(Build.SourcesDirectory)/merged-jacoco.exec'
+              ArtifactName: 'merged-jacoco-$(Build.BuildId)-5'
+              publishLocation: 'Container'
           - script: |
               grep "testcase" */target/surefire-reports/*.xml 
*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr 
| head -n 100
             displayName: Top 100 long-running testcases
@@ -328,8 +408,8 @@ stages:
                 -v $(Build.SourcesDirectory):/hudi
                 -i 
docker.io/apachehudi/hudi-ci-bundle-validation-base:$(Build.BuildId)
                 /bin/bash -c "mvn clean install $(MVN_OPTS_INSTALL) 
-Phudi-platform-service -Pthrift-gen-source -pl hudi-utilities -am
-                && mvn test  $(MVN_OPTS_TEST) -Punit-tests 
-DfailIfNoTests=false -pl hudi-utilities
-                && mvn test  $(MVN_OPTS_TEST) -Pfunctional-tests 
-DfailIfNoTests=false -pl hudi-utilities"
+                && mvn test  $(MVN_OPTS_TEST) -Punit-tests 
$(JACOCO_AGENT_DESTFILE1_ARG) -DfailIfNoTests=false -pl hudi-utilities
+                && mvn test  $(MVN_OPTS_TEST) -Pfunctional-tests 
$(JACOCO_AGENT_DESTFILE2_ARG) -DfailIfNoTests=false -pl hudi-utilities"
           - task: PublishTestResults@2
             displayName: 'Publish Test Results'
             inputs:
@@ -337,6 +417,16 @@ stages:
               testResultsFiles: '**/surefire-reports/TEST-*.xml'
               searchFolder: '$(Build.SourcesDirectory)'
               failTaskOnFailedTests: true
+          - script: |
+              ./scripts/jacoco/download_jacoco.sh
+              ./scripts/jacoco/merge_jacoco_exec_files.sh 
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+            displayName: 'Merge JaCoCo Execution Data Files'
+          - task: PublishBuildArtifacts@1
+            displayName: 'Publish Merged JaCoCo Execution Data File'
+            inputs:
+              PathtoPublish: '$(Build.SourcesDirectory)/merged-jacoco.exec'
+              ArtifactName: 'merged-jacoco-$(Build.BuildId)-6'
+              publishLocation: 'Container'
           - script: |
               grep "testcase" */target/surefire-reports/*.xml 
*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr 
| head -n 100
             displayName: Top 100 long-running testcases
@@ -367,9 +457,9 @@ stages:
                 -v $(Build.SourcesDirectory):/hudi
                 -i 
docker.io/apachehudi/hudi-ci-bundle-validation-base:$(Build.BuildId)
                 /bin/bash -c "mvn clean install $(MVN_OPTS_INSTALL) 
-Phudi-platform-service -Pthrift-gen-source
-                && mvn test  $(MVN_OPTS_TEST) -Punit-tests 
$(SCALA_MVN_TEST_FILTER) 
-DwildcardSuites="$(JOB7_SPARK_PROCEDURE_WILDCARD_SUITES)" -pl $(JOB345_MODULES)
-                && mvn test  $(MVN_OPTS_TEST) -Punit-tests 
-DfailIfNoTests=false -pl $(JOB7_UT_MODULES)
-                && mvn test  $(MVN_OPTS_TEST) -Pfunctional-tests 
-DfailIfNoTests=false -pl $(JOB7_FT_MODULES)"
+                && mvn test  $(MVN_OPTS_TEST) -Punit-tests 
$(SCALA_MVN_TEST_FILTER) 
-DwildcardSuites="$(JOB7_SPARK_PROCEDURE_WILDCARD_SUITES)" 
$(JACOCO_AGENT_DESTFILE1_ARG) -pl $(JOB345_MODULES)
+                && mvn test  $(MVN_OPTS_TEST) -Punit-tests 
-DfailIfNoTests=false $(JACOCO_AGENT_DESTFILE2_ARG) -pl $(JOB7_UT_MODULES)
+                && mvn test  $(MVN_OPTS_TEST) -Pfunctional-tests 
-DfailIfNoTests=false $(JACOCO_AGENT_DESTFILE3_ARG) -pl $(JOB7_FT_MODULES)"
           - task: PublishTestResults@2
             displayName: 'Publish Test Results'
             inputs:
@@ -377,6 +467,68 @@ stages:
               testResultsFiles: '**/surefire-reports/TEST-*.xml'
               searchFolder: '$(Build.SourcesDirectory)'
               failTaskOnFailedTests: true
+          - script: |
+              ./scripts/jacoco/download_jacoco.sh
+              ./scripts/jacoco/merge_jacoco_exec_files.sh 
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+            displayName: 'Merge JaCoCo Execution Data Files'
+          - task: PublishBuildArtifacts@1
+            displayName: 'Publish Merged JaCoCo Execution Data File'
+            inputs:
+              PathtoPublish: '$(Build.SourcesDirectory)/merged-jacoco.exec'
+              ArtifactName: 'merged-jacoco-$(Build.BuildId)-7'
+              publishLocation: 'Container'
           - script: |
               grep "testcase" */target/surefire-reports/*.xml 
*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr 
| head -n 100
             displayName: Top 100 long-running testcases
+      - job: MergeAndPublishCoverage
+        displayName: 'Merge and Publish JaCoCo Code Coverage'
+        dependsOn:
+          - UT_FT_1
+          - UT_FT_2
+          - UT_FT_3
+          - UT_FT_4
+          - UT_FT_5
+          - UT_FT_6
+          - UT_FT_7
+        steps:
+          - task: DownloadBuildArtifacts@0
+            displayName: 'Download JaCoCo Execution Data Files'
+            inputs:
+              buildType: 'current'
+              downloadType: 'specific'
+              downloadPath: '$(System.ArtifactsDirectory)'
+              itemPattern: |
+                **/merged-jacoco-$(Build.BuildId)-*/*.exec
+          - task: JavaToolInstaller@0
+            inputs:
+              versionSpec: '8'
+              jdkArchitectureOption: 'x64'
+              jdkSourceOption: 'PreInstalled'
+          - script: |
+              ./scripts/jacoco/download_jacoco.sh
+              ./scripts/jacoco/merge_jacoco_job_files.sh 
jacoco-lib/lib/jacococli.jar $(System.ArtifactsDirectory) 
$(Build.SourcesDirectory)
+            displayName: 'Merge JaCoCo Execution Data Files'
+          - task: PublishBuildArtifacts@1
+            displayName: 'Publish Merged JaCoCo Execution Data File'
+            inputs:
+              PathtoPublish: '$(Build.SourcesDirectory)/jacoco.exec'
+              ArtifactName: 'merged-jacoco-$(Build.BuildId)-final'
+              publishLocation: 'Container'
+          - task: Maven@4
+            displayName: 'Aggregate Source and Class Files for JaCoCo'
+            inputs:
+              mavenPomFile: 'pom.xml'
+              goals: 'clean package'
+              options: $(MVN_OPTS_INSTALL) -Pcopy-files-for-jacoco -pl 
$(JACOCO_MODULES)
+              publishJUnitResults: false
+              jdkVersionOption: '1.8'
+          - script: |
+              ./scripts/jacoco/generate_jacoco_coverage_report.sh 
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+            displayName: 'Generate JaCoCo Code Coverage Report'
+          - task: PublishCodeCoverageResults@1
+            displayName: 'Publish JaCoCo Code Coverage'
+            inputs:
+              codeCoverageTool: 'JaCoCo'
+              summaryFileLocation: 
'$(Build.SourcesDirectory)/jacoco-report.xml'
+              reportDirectory: '$(Build.SourcesDirectory)/jacoco-html-report'
+              failIfCoverageEmpty: true
diff --git a/pom.xml b/pom.xml
index 386545c29d7..86118830c96 100644
--- a/pom.xml
+++ b/pom.xml
@@ -203,7 +203,7 @@
     <utilities.bundle.hive.scope>provided</utilities.bundle.hive.scope>
     <utilities.bundle.hive.shade.prefix/>
     <argLine>-Xmx2g -Xms128m</argLine>
-    <jacoco.version>0.8.8</jacoco.version>
+    <jacoco.version>0.8.12</jacoco.version>
     <presto.bundle.bootstrap.scope>compile</presto.bundle.bootstrap.scope>
     
<presto.bundle.bootstrap.shade.prefix>org.apache.hudi.</presto.bundle.bootstrap.shade.prefix>
     <trino.bundle.bootstrap.scope>compile</trino.bundle.bootstrap.scope>
@@ -228,6 +228,10 @@
     <springboot.version>2.7.3</springboot.version>
     <spring.shell.version>2.1.1</spring.shell.version>
     <snappy.version>1.1.10.7</snappy.version>
+    <!-- The following properties are only used for Jacoco coverage report 
aggregation -->
+    <copy.files>false</copy.files>
+    
<copy.files.target.dir>${maven.multiModuleProjectDirectory}</copy.files.target.dir>
+    <jacoco.agent.dest.filename>jacoco.exec</jacoco.agent.dest.filename>
   </properties>
 
   <scm>
@@ -1953,6 +1957,9 @@
                 <goals>
                   <goal>prepare-agent</goal>
                 </goals>
+                <configuration>
+                  
<destFile>${project.build.directory}/jacoco-agent/${jacoco.agent.dest.filename}</destFile>
+                </configuration>
               </execution>
               <execution>
                 <id>post-unit-tests</id>
@@ -2006,6 +2013,9 @@
                 <goals>
                   <goal>prepare-agent</goal>
                 </goals>
+                <configuration>
+                  
<destFile>${project.build.directory}/jacoco-agent/${jacoco.agent.dest.filename}</destFile>
+                </configuration>
               </execution>
               <execution>
                 <id>post-functional-tests</id>
@@ -2640,6 +2650,75 @@
       </activation>
     </profile>
 
+    <!-- This profile is only used to copy all Java class files into one 
directory
+    for Jacoco to report test coverage across multiple source modules.
+    The class files are copied to ${copy.classes.target.dir}/aggregate-target
+    -->
+    <profile>
+      <id>copy-files-for-jacoco</id>
+      <activation>
+        <property>
+          <name>copy.files</name>
+          <value>true</value>
+        </property>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <artifactId>maven-resources-plugin</artifactId>
+            <version>3.3.1</version>
+            <executions>
+              <!-- Copy source files for Jacoco report aggregation -->
+              <execution>
+                <id>copy-source-files</id>
+                <phase>process-sources</phase>
+                <goals>
+                  <goal>copy-resources</goal>
+                </goals>
+                <configuration>
+                  
<outputDirectory>${copy.files.target.dir}/aggregate-sources</outputDirectory>
+                  <resources>
+                    <resource>
+                      <directory>${project.basedir}/src/main/java</directory>
+                      <filtering>false</filtering>
+                      <includes>
+                        <include>**/*.java</include>
+                      </includes>
+                    </resource>
+                    <resource>
+                      <directory>${project.basedir}/src/main/scala</directory>
+                      <filtering>false</filtering>
+                      <includes>
+                        <include>**/*.scala</include>
+                      </includes>
+                    </resource>
+                  </resources>
+                </configuration>
+              </execution>
+              <!-- Copy class files for Jacoco report aggregation -->
+              <execution>
+                <id>copy-class-files</id>
+                <phase>process-classes</phase>
+                <goals>
+                  <goal>copy-resources</goal>
+                </goals>
+                <configuration>
+                  
<outputDirectory>${copy.files.target.dir}/aggregate-classes</outputDirectory>
+                  <resources>
+                    <resource>
+                      <directory>${project.basedir}/target/classes</directory>
+                      <includes>
+                        <include>**/*.class</include>
+                      </includes>
+                    </resource>
+                  </resources>
+                  <useDefaultDelimiters>false</useDefaultDelimiters>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
   </profiles>
-
 </project>
diff --git a/scripts/jacoco/README.md b/scripts/jacoco/README.md
new file mode 100644
index 00000000000..c412ce10b31
--- /dev/null
+++ b/scripts/jacoco/README.md
@@ -0,0 +1,110 @@
+<!--
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+-->
+
+# JaCoCo Code Coverage Report for Hudi
+
+This README describes how code coverage report across multiple modules works 
for Hudi
+by leveraging JaCoCo.
+
+## Problem
+
+We used to report code coverage on each PR in early days (
+see https://github.com/apache/hudi/pull/1667#issuecomment-633665810, 
screenshot below).
+However, we have disabled it due to several problems:
+
+- We now use Azure DevOps Pipeline as CI to run tests in multiple jobs. Due to 
permission issues, we mirror the repo and
+  branch for running the tests. This creates hurdles in reporting aggregation.
+- Hudi project contains multiple source modules. There has been multiple code 
refactoring iterations around module
+  organization so that some functional tests cover production code logic in a 
different module, where JaCoCo simple
+  reporting cannot handle it well, leading to under-reporting on code coverage.
+
+<img width="928" alt="Screenshot 2025-02-14 at 13 54 08" 
src="https://github.com/user-attachments/assets/e0ac34f4-a099-4e76-92b6-a8eac9bd2ee6";
 />
+
+## Tools
+
+JaCoCo is a free, open-source code coverage library for Java. It helps 
developers understand how much of their codebase
+is actually being exercised by their tests. It is still the defacto standard 
for Java code coverage reporting.
+
+JaCoCo supports `report-aggregate` for multi-module project but there are 
certain limitations as of 0.8.12
+release 
([1](https://www.eclemma.org/jacoco/trunk/doc/report-aggregate-mojo.html), 
[2](https://stackoverflow.com/questions/50806424/reporting-and-merging-multi-module-jacoco-reports-with-report-aggregate),
 
[3](https://stackoverflow.com/questions/33078745/jacoco-maven-multi-module-project-coverage),
 [4](https://github.com/jacoco/jacoco/issues/1731), 
[5](https://github.com/jacoco/jacoco/issues/842), 
[6](https://github.com/jacoco/jacoco/issues?q=is%3Aissue%20state%3Aopen%20aggregate)).
+One hack includes creating a new source module for reporting aggregation, 
which is sth we want to avoid if possible.
+
+However, JaCoCo also provides a powerful CLI tool 
(https://www.jacoco.org/jacoco/trunk/doc/cli.html) which can do report
+manipulation at the file level, which we can use for custom report aggregation.
+
+## Solution
+
+At high level, here's how JaCoCo generates the code coverage report:
+
+(1) While running tests, JaCoCo generates binary execution data for reporting 
later. The execution data can be stored in
+a `jacoco.exec` file if enabled. It's not a human-readable text format. It's 
designed for consumption by JaCoCo's
+reporting tools. The following key information is stored in `jacoco.exec`:
+
+- Session Information:  Identifies the specific test run or program execution 
(session ID, start time, dump time).
+- Class Identification:  Uniquely identifies each class that was instrumented 
and monitored, including its name and a
+  checksum (ID) of the original class bytecode.
+- Execution Data (Probes): For each instrumented class, it stores an array of 
boolean values (probes). Each boolean
+  indicates whether a specific execution point (line of code or branch) within 
that class was executed (true) or not (
+  false) during the session. This is the core data that drives code coverage 
analysis.
+
+(2) Once tests finish, JaCoCo generates code coverage report in HTML and/or 
XML based on the binary execution
+data (`jacoco.exec`).
+
+To make cross-module code coverage report work in Azure DevOps Pipeline (or in 
other similar CI environments) for Hudi,
+here's the workflow:
+
+(1) When running tests from mvn command in each job, enable binary execution 
data to be written to the storage, i.e.,
+through `prepare-agent` goal (see `pom.xml`). As we run multiple `mvn test` 
commands in the same job with different
+args, to avoid collision, a unique `destFile` is configured for each command 
(see `azure-pipelines-20230430.yml`);
+
+(2) Once each job finishes, multiple `*.exec` binary execution data files are 
merged into one `merged-jacoco.exec`
+through JaCoCo CLI (see `Merge JaCoCo Execution Data Files` task in 
`azure-pipelines-20230430.yml`). The merged
+execution data file is published as an artifact for later analysis (see 
`Publish Merged JaCoCo Execution Data File` task
+in `azure-pipelines-20230430.yml`).
+
+(3) Once all jobs finish running all tests, all the JaCoCo execution data 
files are processed (
+see `MergeAndPublishCoverage` job in `azure-pipelines-20230430.yml`). The 
execution data files from multiple jobs are
+downloaded and merged again into a single file `jacoco.exec` through JaCoCo 
CLI;
+
+(4) To generate the final report, the source files (`*.java`, `*.scala`) and 
class files (`*.class`) must be under the
+same directory, not in different modules, due to the limitation of JaCoCo CLI 
taking only a single directory path for
+each. So a new maven plugin execution target is added to do that (see 
`copy-source-files` and `copy-class-files`
+in `pom.xml`). Once that's done, the final reporting is done through JaCoCo 
CLI by using the aggregated source files,
+class files, and `jacoco.exec` (see `MergeAndPublishCoverage` job in 
`azure-pipelines-20230430.yml`). Both
+the `jacoco.exec` and final reports are published.
+
+## Example Results
+
+Azure Run
+<img width="1543" alt="Screenshot 2025-02-14 at 13 28 16" 
src="https://github.com/user-attachments/assets/05e7052c-2842-4a0e-ab0a-014eeb8e7652";
 />
+
+JaCoCo Coverage Report
+<img width="1559" alt="Screenshot 2025-02-14 at 13 30 32" 
src="https://github.com/user-attachments/assets/b47a8e78-8f98-4dfb-b64d-d926bfea5198";
 />
+<img width="1570" alt="Screenshot 2025-02-14 at 13 30 40" 
src="https://github.com/user-attachments/assets/369768a0-9e82-4a29-a14d-2550048ef07f";
 />
+
+Published Artifacts
+<img width="1586" alt="Screenshot 2025-02-14 at 13 31 05" 
src="https://github.com/user-attachments/assets/02cb75b1-3f7f-4f17-8392-8e0a452d31cf";
 />
+
+## Scripts
+
+- `download_jacoco.sh`: downloads JaCoCo binaries, especially the CLI jar, for 
usage.
+- `merge_jacoco_exec_files.sh`: merges multiple JaCoCo execution data files in 
multiple modules.
+- `merge_jacoco_job_files.sh`: merges multiple JaCoCo execution data files 
from multiple Azure pipeline jobs.
+- `generate_jacoco_coverage_report.sh`: generates the JaCoCo code coverage 
report by taking the execution data file,
+  source files and class files.
\ No newline at end of file
diff --git a/scripts/jacoco/download_jacoco.sh 
b/scripts/jacoco/download_jacoco.sh
new file mode 100755
index 00000000000..e7b79d28638
--- /dev/null
+++ b/scripts/jacoco/download_jacoco.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+wget https://repo1.maven.org/maven2/org/jacoco/jacoco/0.8.12/jacoco-0.8.12.zip
+unzip jacoco-0.8.12.zip -d jacoco-lib
+ls -l jacoco-lib/lib/jacococli.jar
diff --git a/scripts/jacoco/generate_jacoco_coverage_report.sh 
b/scripts/jacoco/generate_jacoco_coverage_report.sh
new file mode 100755
index 00000000000..3031a7d3a0b
--- /dev/null
+++ b/scripts/jacoco/generate_jacoco_coverage_report.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+/bin/bash --version
+echo "Jacoco CLI jar: $1"
+echo "Hudi source directory: $2"
+java -jar $1 report $2/jacoco.exec --sourcefiles $2/aggregate-sources 
--classfiles $2/aggregate-classes --html $2/jacoco-html-report --xml 
$2/jacoco-report.xml
diff --git a/scripts/jacoco/merge_jacoco_exec_files.sh 
b/scripts/jacoco/merge_jacoco_exec_files.sh
new file mode 100755
index 00000000000..be3fcc37800
--- /dev/null
+++ b/scripts/jacoco/merge_jacoco_exec_files.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+/bin/bash --version
+shopt -s globstar
+echo "Jacoco CLI jar: $1"
+echo "Hudi source directory: $2"
+java -jar $1 merge $2/**/jacoco-agent/**/*.exec --destfile merged-jacoco.exec
diff --git a/scripts/jacoco/merge_jacoco_job_files.sh 
b/scripts/jacoco/merge_jacoco_job_files.sh
new file mode 100755
index 00000000000..6ed3a0d0511
--- /dev/null
+++ b/scripts/jacoco/merge_jacoco_job_files.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+/bin/bash --version
+shopt -s globstar
+echo "Jacoco CLI jar: $1"
+echo "JaCoCo execution file directory: $2"
+echo "Hudi source directory: $3"
+java -jar $1 merge $2/**/*.exec --destfile $3/jacoco.exec


Reply via email to