This is an automated email from the ASF dual-hosted git repository.
sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 336b5268dd4 [HUDI-7596] Enable Jacoco code coverage report across
multiple modules (#12842)
336b5268dd4 is described below
commit 336b5268dd4e1ed0857ac28fdb42c2bb85b43ff8
Author: Y Ethan Guo <[email protected]>
AuthorDate: Fri Feb 14 17:20:09 2025 -0800
[HUDI-7596] Enable Jacoco code coverage report across multiple modules
(#12842)
---
azure-pipelines-20230430.yml | 180 ++++++++++++++++++++--
pom.xml | 83 +++++++++-
scripts/jacoco/README.md | 110 +++++++++++++
scripts/jacoco/download_jacoco.sh | 22 +++
scripts/jacoco/generate_jacoco_coverage_report.sh | 23 +++
scripts/jacoco/merge_jacoco_exec_files.sh | 24 +++
scripts/jacoco/merge_jacoco_job_files.sh | 25 +++
7 files changed, 451 insertions(+), 16 deletions(-)
diff --git a/azure-pipelines-20230430.yml b/azure-pipelines-20230430.yml
index 4cf9a383168..914bd7eaddd 100644
--- a/azure-pipelines-20230430.yml
+++ b/azure-pipelines-20230430.yml
@@ -14,8 +14,10 @@
# limitations under the License.
# NOTE:
-# This config file defines how Azure CI runs tests with Spark 2.4 and Flink
1.18 profiles.
+# This config file defines how Azure CI runs tests with Spark 3.5 and Flink
1.18 profiles.
# PRs will need to keep in sync with master's version to trigger the CI runs.
+# See scripts/jacoco/README.md for how aggregated code coverage report works
+# across multiple modules.
trigger:
branches:
@@ -131,10 +133,34 @@ parameters:
- 'org.apache.spark.sql.hudi.command'
- 'org.apache.spark.sql.hudi.common'
- 'org.apache.spark.sql.hudi.ddl'
+ - name: jacocoModules
+ type: object
+ default:
+ - '!hudi-examples/hudi-examples-k8s'
+ - '!hudi-flink-datasource/hudi-flink1.14.x'
+ - '!hudi-flink-datasource/hudi-flink1.15.x'
+ - '!hudi-flink-datasource/hudi-flink1.16.x'
+ - '!hudi-flink-datasource/hudi-flink1.17.x'
+ - '!hudi-flink-datasource/hudi-flink1.19.x'
+ - '!hudi-flink-datasource/hudi-flink1.20.x'
+ - '!packaging/hudi-aws-bundle'
+ - '!packaging/hudi-cli-bundle'
+ - '!packaging/hudi-datahub-sync-bundle'
+ - '!packaging/hudi-flink-bundle'
+ - '!packaging/hudi-gcp-bundle'
+ - '!packaging/hudi-hadoop-mr-bundle'
+ - '!packaging/hudi-hive-sync-bundle'
+ - '!packaging/hudi-kafka-connect-bundle'
+ - '!packaging/hudi-metaserver-server-bundle'
+ - '!packaging/hudi-presto-bundle'
+ - '!packaging/hudi-spark-bundle'
+ - '!packaging/hudi-timeline-server-bundle'
+ - '!packaging/hudi-trino-bundle'
+ - '!packaging/hudi-utilities-slim-bundle'
variables:
BUILD_PROFILES: '-Dscala-2.12 -Dspark3.5 -Dflink1.18'
- PLUGIN_OPTS: '-Dcheckstyle.skip=true -Drat.skip=true -Djacoco.skip=true -ntp
-B -V -Pwarn-log
-Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=warn
-Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency=warn'
+ PLUGIN_OPTS: '-Dcheckstyle.skip=true -Drat.skip=true -ntp -B -V -Pwarn-log
-Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=warn
-Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency=warn'
MVN_OPTS_INSTALL: '-T 3 -Phudi-platform-service -DskipTests
$(BUILD_PROFILES) $(PLUGIN_OPTS)
-Dmaven.wagon.httpconnectionManager.ttlSeconds=25
-Dmaven.wagon.http.retryHandler.count=5'
MVN_OPTS_TEST: '-fae -Pwarn-log $(BUILD_PROFILES) $(PLUGIN_OPTS)'
JAVA_MVN_TEST_FILTER: '-DwildcardSuites=skipScalaTests -DfailIfNoTests=false'
@@ -151,6 +177,10 @@ variables:
JOB5_SPARK_DDL_OTHERS_WILDCARD_SUITES: ${{
join(',',parameters.job5HudiSparkDdlOthersWildcardSuites) }}
JOB7_UT_MODULES: ${{ join(',',parameters.job7UTModules) }}
JOB7_FT_MODULES: ${{ join(',',parameters.job7FTModules) }}
+ JACOCO_AGENT_DESTFILE1_ARG: '-Djacoco.agent.dest.filename=jacoco1.exec'
+ JACOCO_AGENT_DESTFILE2_ARG: '-Djacoco.agent.dest.filename=jacoco2.exec'
+ JACOCO_AGENT_DESTFILE3_ARG: '-Djacoco.agent.dest.filename=jacoco3.exec'
+ JACOCO_MODULES: ${{ join(',',parameters.jacocoModules) }}
stages:
- stage: test
@@ -175,7 +205,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
- options: $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB1_UT_MODULES)
+ options: $(MVN_OPTS_TEST) -Punit-tests
$(JACOCO_AGENT_DESTFILE1_ARG) -pl $(JOB1_UT_MODULES)
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx4g'
@@ -184,11 +214,21 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
- options: $(MVN_OPTS_TEST) -Pfunctional-tests -pl
$(JOB1_FT_MODULES)
+ options: $(MVN_OPTS_TEST) -Pfunctional-tests
$(JACOCO_AGENT_DESTFILE2_ARG) -pl $(JOB1_FT_MODULES)
publishJUnitResults: true
testResultsFiles: '**/surefire-reports/TEST-*.xml'
jdkVersionOption: '1.8'
mavenOptions: '-Xmx4g'
+ - script: |
+ ./scripts/jacoco/download_jacoco.sh
+ ./scripts/jacoco/merge_jacoco_exec_files.sh
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+ displayName: 'Merge JaCoCo Execution Data Files'
+ - task: PublishBuildArtifacts@1
+ displayName: 'Publish Merged JaCoCo Execution Data File'
+ inputs:
+ PathtoPublish: '$(Build.SourcesDirectory)/merged-jacoco.exec'
+ ArtifactName: 'merged-jacoco-$(Build.BuildId)-1'
+ publishLocation: 'Container'
- script: |
grep "testcase" */target/surefire-reports/*.xml
*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr
| head -n 100
displayName: Top 100 long-running testcases
@@ -209,11 +249,21 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
- options: $(MVN_OPTS_TEST) -Pfunctional-tests -pl
$(JOB2_FT_MODULES)
+ options: $(MVN_OPTS_TEST) -Pfunctional-tests
$(JACOCO_AGENT_DESTFILE1_ARG) -pl $(JOB2_FT_MODULES)
publishJUnitResults: true
testResultsFiles: '**/surefire-reports/TEST-*.xml'
jdkVersionOption: '1.8'
mavenOptions: '-Xmx4g'
+ - script: |
+ ./scripts/jacoco/download_jacoco.sh
+ ./scripts/jacoco/merge_jacoco_exec_files.sh
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+ displayName: 'Merge JaCoCo Execution Data Files'
+ - task: PublishBuildArtifacts@1
+ displayName: 'Publish Merged JaCoCo Execution Data File'
+ inputs:
+ PathtoPublish: '$(Build.SourcesDirectory)/merged-jacoco.exec'
+ ArtifactName: 'merged-jacoco-$(Build.BuildId)-2'
+ publishLocation: 'Container'
- script: |
grep "testcase" */target/surefire-reports/*.xml
*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr
| head -n 100
displayName: Top 100 long-running testcases
@@ -234,11 +284,21 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
- options: $(MVN_OPTS_TEST) -Punit-tests $(JAVA_MVN_TEST_FILTER)
$(JOB3_MVN_ARG_TEST) -pl $(JOB345_MODULES)
+ options: $(MVN_OPTS_TEST) -Punit-tests $(JAVA_MVN_TEST_FILTER)
$(JOB3_MVN_ARG_TEST) $(JACOCO_AGENT_DESTFILE1_ARG) -pl $(JOB345_MODULES)
publishJUnitResults: true
testResultsFiles: '**/surefire-reports/TEST-*.xml'
jdkVersionOption: '1.8'
mavenOptions: '-Xmx4g'
+ - script: |
+ ./scripts/jacoco/download_jacoco.sh
+ ./scripts/jacoco/merge_jacoco_exec_files.sh
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+ displayName: 'Merge JaCoCo Execution Data Files'
+ - task: PublishBuildArtifacts@1
+ displayName: 'Publish Merged JaCoCo Execution Data File'
+ inputs:
+ PathtoPublish: '$(Build.SourcesDirectory)/merged-jacoco.exec'
+ ArtifactName: 'merged-jacoco-$(Build.BuildId)-3'
+ publishLocation: 'Container'
- script: |
grep "testcase" */target/surefire-reports/*.xml
*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr
| head -n 100
displayName: Top 100 long-running testcases
@@ -259,11 +319,21 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
- options: $(MVN_OPTS_TEST) -Punit-tests $(SCALA_MVN_TEST_FILTER)
-DwildcardSuites="$(JOB4_SPARK_DML_WILDCARD_SUITES)" -pl $(JOB345_MODULES)
+ options: $(MVN_OPTS_TEST) -Punit-tests $(SCALA_MVN_TEST_FILTER)
-DwildcardSuites="$(JOB4_SPARK_DML_WILDCARD_SUITES)"
$(JACOCO_AGENT_DESTFILE1_ARG) -pl $(JOB345_MODULES)
publishJUnitResults: true
testResultsFiles: '**/surefire-reports/TEST-*.xml'
jdkVersionOption: '1.8'
mavenOptions: '-Xmx4g'
+ - script: |
+ ./scripts/jacoco/download_jacoco.sh
+ ./scripts/jacoco/merge_jacoco_exec_files.sh
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+ displayName: 'Merge JaCoCo Execution Data Files'
+ - task: PublishBuildArtifacts@1
+ displayName: 'Publish Merged JaCoCo Execution Data File'
+ inputs:
+ PathtoPublish: '$(Build.SourcesDirectory)/merged-jacoco.exec'
+ ArtifactName: 'merged-jacoco-$(Build.BuildId)-4'
+ publishLocation: 'Container'
- script: |
grep "testcase" */target/surefire-reports/*.xml
*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr
| head -n 100
displayName: Top 100 long-running testcases
@@ -284,7 +354,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
- options: $(MVN_OPTS_TEST) -Punit-tests $(JAVA_MVN_TEST_FILTER)
$(JOB5_MVN_ARG_TEST) -pl $(JOB345_MODULES)
+ options: $(MVN_OPTS_TEST) -Punit-tests $(JAVA_MVN_TEST_FILTER)
$(JOB5_MVN_ARG_TEST) $(JACOCO_AGENT_DESTFILE1_ARG) -pl $(JOB345_MODULES)
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx4g'
@@ -293,11 +363,21 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
- options: $(MVN_OPTS_TEST) -Punit-tests $(SCALA_MVN_TEST_FILTER)
-DwildcardSuites="$(JOB5_SPARK_DDL_OTHERS_WILDCARD_SUITES)" -pl
$(JOB345_MODULES)
+ options: $(MVN_OPTS_TEST) -Punit-tests $(SCALA_MVN_TEST_FILTER)
-DwildcardSuites="$(JOB5_SPARK_DDL_OTHERS_WILDCARD_SUITES)"
$(JACOCO_AGENT_DESTFILE2_ARG) -pl $(JOB345_MODULES)
publishJUnitResults: true
testResultsFiles: '**/surefire-reports/TEST-*.xml'
jdkVersionOption: '1.8'
mavenOptions: '-Xmx4g'
+ - script: |
+ ./scripts/jacoco/download_jacoco.sh
+ ./scripts/jacoco/merge_jacoco_exec_files.sh
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+ displayName: 'Merge JaCoCo Execution Data Files'
+ - task: PublishBuildArtifacts@1
+ displayName: 'Publish Merged JaCoCo Execution Data File'
+ inputs:
+ PathtoPublish: '$(Build.SourcesDirectory)/merged-jacoco.exec'
+ ArtifactName: 'merged-jacoco-$(Build.BuildId)-5'
+ publishLocation: 'Container'
- script: |
grep "testcase" */target/surefire-reports/*.xml
*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr
| head -n 100
displayName: Top 100 long-running testcases
@@ -328,8 +408,8 @@ stages:
-v $(Build.SourcesDirectory):/hudi
-i
docker.io/apachehudi/hudi-ci-bundle-validation-base:$(Build.BuildId)
/bin/bash -c "mvn clean install $(MVN_OPTS_INSTALL)
-Phudi-platform-service -Pthrift-gen-source -pl hudi-utilities -am
- && mvn test $(MVN_OPTS_TEST) -Punit-tests
-DfailIfNoTests=false -pl hudi-utilities
- && mvn test $(MVN_OPTS_TEST) -Pfunctional-tests
-DfailIfNoTests=false -pl hudi-utilities"
+ && mvn test $(MVN_OPTS_TEST) -Punit-tests
$(JACOCO_AGENT_DESTFILE1_ARG) -DfailIfNoTests=false -pl hudi-utilities
+ && mvn test $(MVN_OPTS_TEST) -Pfunctional-tests
$(JACOCO_AGENT_DESTFILE2_ARG) -DfailIfNoTests=false -pl hudi-utilities"
- task: PublishTestResults@2
displayName: 'Publish Test Results'
inputs:
@@ -337,6 +417,16 @@ stages:
testResultsFiles: '**/surefire-reports/TEST-*.xml'
searchFolder: '$(Build.SourcesDirectory)'
failTaskOnFailedTests: true
+ - script: |
+ ./scripts/jacoco/download_jacoco.sh
+ ./scripts/jacoco/merge_jacoco_exec_files.sh
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+ displayName: 'Merge JaCoCo Execution Data Files'
+ - task: PublishBuildArtifacts@1
+ displayName: 'Publish Merged JaCoCo Execution Data File'
+ inputs:
+ PathtoPublish: '$(Build.SourcesDirectory)/merged-jacoco.exec'
+ ArtifactName: 'merged-jacoco-$(Build.BuildId)-6'
+ publishLocation: 'Container'
- script: |
grep "testcase" */target/surefire-reports/*.xml
*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr
| head -n 100
displayName: Top 100 long-running testcases
@@ -367,9 +457,9 @@ stages:
-v $(Build.SourcesDirectory):/hudi
-i
docker.io/apachehudi/hudi-ci-bundle-validation-base:$(Build.BuildId)
/bin/bash -c "mvn clean install $(MVN_OPTS_INSTALL)
-Phudi-platform-service -Pthrift-gen-source
- && mvn test $(MVN_OPTS_TEST) -Punit-tests
$(SCALA_MVN_TEST_FILTER)
-DwildcardSuites="$(JOB7_SPARK_PROCEDURE_WILDCARD_SUITES)" -pl $(JOB345_MODULES)
- && mvn test $(MVN_OPTS_TEST) -Punit-tests
-DfailIfNoTests=false -pl $(JOB7_UT_MODULES)
- && mvn test $(MVN_OPTS_TEST) -Pfunctional-tests
-DfailIfNoTests=false -pl $(JOB7_FT_MODULES)"
+ && mvn test $(MVN_OPTS_TEST) -Punit-tests
$(SCALA_MVN_TEST_FILTER)
-DwildcardSuites="$(JOB7_SPARK_PROCEDURE_WILDCARD_SUITES)"
$(JACOCO_AGENT_DESTFILE1_ARG) -pl $(JOB345_MODULES)
+ && mvn test $(MVN_OPTS_TEST) -Punit-tests
-DfailIfNoTests=false $(JACOCO_AGENT_DESTFILE2_ARG) -pl $(JOB7_UT_MODULES)
+ && mvn test $(MVN_OPTS_TEST) -Pfunctional-tests
-DfailIfNoTests=false $(JACOCO_AGENT_DESTFILE3_ARG) -pl $(JOB7_FT_MODULES)"
- task: PublishTestResults@2
displayName: 'Publish Test Results'
inputs:
@@ -377,6 +467,68 @@ stages:
testResultsFiles: '**/surefire-reports/TEST-*.xml'
searchFolder: '$(Build.SourcesDirectory)'
failTaskOnFailedTests: true
+ - script: |
+ ./scripts/jacoco/download_jacoco.sh
+ ./scripts/jacoco/merge_jacoco_exec_files.sh
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+ displayName: 'Merge JaCoCo Execution Data Files'
+ - task: PublishBuildArtifacts@1
+ displayName: 'Publish Merged JaCoCo Execution Data File'
+ inputs:
+ PathtoPublish: '$(Build.SourcesDirectory)/merged-jacoco.exec'
+ ArtifactName: 'merged-jacoco-$(Build.BuildId)-7'
+ publishLocation: 'Container'
- script: |
grep "testcase" */target/surefire-reports/*.xml
*/*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr
| head -n 100
displayName: Top 100 long-running testcases
+ - job: MergeAndPublishCoverage
+ displayName: 'Merge and Publish JaCoCo Code Coverage'
+ dependsOn:
+ - UT_FT_1
+ - UT_FT_2
+ - UT_FT_3
+ - UT_FT_4
+ - UT_FT_5
+ - UT_FT_6
+ - UT_FT_7
+ steps:
+ - task: DownloadBuildArtifacts@0
+ displayName: 'Download JaCoCo Execution Data Files'
+ inputs:
+ buildType: 'current'
+ downloadType: 'specific'
+ downloadPath: '$(System.ArtifactsDirectory)'
+ itemPattern: |
+ **/merged-jacoco-$(Build.BuildId)-*/*.exec
+ - task: JavaToolInstaller@0
+ inputs:
+ versionSpec: '8'
+ jdkArchitectureOption: 'x64'
+ jdkSourceOption: 'PreInstalled'
+ - script: |
+ ./scripts/jacoco/download_jacoco.sh
+ ./scripts/jacoco/merge_jacoco_job_files.sh
jacoco-lib/lib/jacococli.jar $(System.ArtifactsDirectory)
$(Build.SourcesDirectory)
+ displayName: 'Merge JaCoCo Execution Data Files'
+ - task: PublishBuildArtifacts@1
+ displayName: 'Publish Merged JaCoCo Execution Data File'
+ inputs:
+ PathtoPublish: '$(Build.SourcesDirectory)/jacoco.exec'
+ ArtifactName: 'merged-jacoco-$(Build.BuildId)-final'
+ publishLocation: 'Container'
+ - task: Maven@4
+ displayName: 'Aggregate Source and Class Files for JaCoCo'
+ inputs:
+ mavenPomFile: 'pom.xml'
+ goals: 'clean package'
+ options: $(MVN_OPTS_INSTALL) -Pcopy-files-for-jacoco -pl
$(JACOCO_MODULES)
+ publishJUnitResults: false
+ jdkVersionOption: '1.8'
+ - script: |
+ ./scripts/jacoco/generate_jacoco_coverage_report.sh
jacoco-lib/lib/jacococli.jar $(Build.SourcesDirectory)
+ displayName: 'Generate JaCoCo Code Coverage Report'
+ - task: PublishCodeCoverageResults@1
+ displayName: 'Publish JaCoCo Code Coverage'
+ inputs:
+ codeCoverageTool: 'JaCoCo'
+ summaryFileLocation:
'$(Build.SourcesDirectory)/jacoco-report.xml'
+ reportDirectory: '$(Build.SourcesDirectory)/jacoco-html-report'
+ failIfCoverageEmpty: true
diff --git a/pom.xml b/pom.xml
index 386545c29d7..86118830c96 100644
--- a/pom.xml
+++ b/pom.xml
@@ -203,7 +203,7 @@
<utilities.bundle.hive.scope>provided</utilities.bundle.hive.scope>
<utilities.bundle.hive.shade.prefix/>
<argLine>-Xmx2g -Xms128m</argLine>
- <jacoco.version>0.8.8</jacoco.version>
+ <jacoco.version>0.8.12</jacoco.version>
<presto.bundle.bootstrap.scope>compile</presto.bundle.bootstrap.scope>
<presto.bundle.bootstrap.shade.prefix>org.apache.hudi.</presto.bundle.bootstrap.shade.prefix>
<trino.bundle.bootstrap.scope>compile</trino.bundle.bootstrap.scope>
@@ -228,6 +228,10 @@
<springboot.version>2.7.3</springboot.version>
<spring.shell.version>2.1.1</spring.shell.version>
<snappy.version>1.1.10.7</snappy.version>
+ <!-- The following properties are only used for Jacoco coverage report
aggregation -->
+ <copy.files>false</copy.files>
+
<copy.files.target.dir>${maven.multiModuleProjectDirectory}</copy.files.target.dir>
+ <jacoco.agent.dest.filename>jacoco.exec</jacoco.agent.dest.filename>
</properties>
<scm>
@@ -1953,6 +1957,9 @@
<goals>
<goal>prepare-agent</goal>
</goals>
+ <configuration>
+
<destFile>${project.build.directory}/jacoco-agent/${jacoco.agent.dest.filename}</destFile>
+ </configuration>
</execution>
<execution>
<id>post-unit-tests</id>
@@ -2006,6 +2013,9 @@
<goals>
<goal>prepare-agent</goal>
</goals>
+ <configuration>
+
<destFile>${project.build.directory}/jacoco-agent/${jacoco.agent.dest.filename}</destFile>
+ </configuration>
</execution>
<execution>
<id>post-functional-tests</id>
@@ -2640,6 +2650,75 @@
</activation>
</profile>
+ <!-- This profile is only used to copy all Java class files into one
directory
+ for Jacoco to report test coverage across multiple source modules.
+ The class files are copied to ${copy.classes.target.dir}/aggregate-target
+ -->
+ <profile>
+ <id>copy-files-for-jacoco</id>
+ <activation>
+ <property>
+ <name>copy.files</name>
+ <value>true</value>
+ </property>
+ </activation>
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-resources-plugin</artifactId>
+ <version>3.3.1</version>
+ <executions>
+ <!-- Copy source files for Jacoco report aggregation -->
+ <execution>
+ <id>copy-source-files</id>
+ <phase>process-sources</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+
<outputDirectory>${copy.files.target.dir}/aggregate-sources</outputDirectory>
+ <resources>
+ <resource>
+ <directory>${project.basedir}/src/main/java</directory>
+ <filtering>false</filtering>
+ <includes>
+ <include>**/*.java</include>
+ </includes>
+ </resource>
+ <resource>
+ <directory>${project.basedir}/src/main/scala</directory>
+ <filtering>false</filtering>
+ <includes>
+ <include>**/*.scala</include>
+ </includes>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ <!-- Copy class files for Jacoco report aggregation -->
+ <execution>
+ <id>copy-class-files</id>
+ <phase>process-classes</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+
<outputDirectory>${copy.files.target.dir}/aggregate-classes</outputDirectory>
+ <resources>
+ <resource>
+ <directory>${project.basedir}/target/classes</directory>
+ <includes>
+ <include>**/*.class</include>
+ </includes>
+ </resource>
+ </resources>
+ <useDefaultDelimiters>false</useDefaultDelimiters>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
</profiles>
-
</project>
diff --git a/scripts/jacoco/README.md b/scripts/jacoco/README.md
new file mode 100644
index 00000000000..c412ce10b31
--- /dev/null
+++ b/scripts/jacoco/README.md
@@ -0,0 +1,110 @@
+<!--
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+-->
+
+# JaCoCo Code Coverage Report for Hudi
+
+This README describes how code coverage report across multiple modules works
for Hudi
+by leveraging JaCoCo.
+
+## Problem
+
+We used to report code coverage on each PR in early days (
+see https://github.com/apache/hudi/pull/1667#issuecomment-633665810,
screenshot below).
+However, we have disabled it due to several problems:
+
+- We now use Azure DevOps Pipeline as CI to run tests in multiple jobs. Due to
permission issues, we mirror the repo and
+ branch for running the tests. This creates hurdles in reporting aggregation.
+- Hudi project contains multiple source modules. There has been multiple code
refactoring iterations around module
+ organization so that some functional tests cover production code logic in a
different module, where JaCoCo simple
+ reporting cannot handle it well, leading to under-reporting on code coverage.
+
+<img width="928" alt="Screenshot 2025-02-14 at 13 54 08"
src="https://github.com/user-attachments/assets/e0ac34f4-a099-4e76-92b6-a8eac9bd2ee6"
/>
+
+## Tools
+
+JaCoCo is a free, open-source code coverage library for Java. It helps
developers understand how much of their codebase
+is actually being exercised by their tests. It is still the defacto standard
for Java code coverage reporting.
+
+JaCoCo supports `report-aggregate` for multi-module project but there are
certain limitations as of 0.8.12
+release
([1](https://www.eclemma.org/jacoco/trunk/doc/report-aggregate-mojo.html),
[2](https://stackoverflow.com/questions/50806424/reporting-and-merging-multi-module-jacoco-reports-with-report-aggregate),
[3](https://stackoverflow.com/questions/33078745/jacoco-maven-multi-module-project-coverage),
[4](https://github.com/jacoco/jacoco/issues/1731),
[5](https://github.com/jacoco/jacoco/issues/842),
[6](https://github.com/jacoco/jacoco/issues?q=is%3Aissue%20state%3Aopen%20aggregate)).
+One hack includes creating a new source module for reporting aggregation,
which is sth we want to avoid if possible.
+
+However, JaCoCo also provides a powerful CLI tool
(https://www.jacoco.org/jacoco/trunk/doc/cli.html) which can do report
+manipulation at the file level, which we can use for custom report aggregation.
+
+## Solution
+
+At high level, here's how JaCoCo generates the code coverage report:
+
+(1) While running tests, JaCoCo generates binary execution data for reporting
later. The execution data can be stored in
+a `jacoco.exec` file if enabled. It's not a human-readable text format. It's
designed for consumption by JaCoCo's
+reporting tools. The following key information is stored in `jacoco.exec`:
+
+- Session Information: Identifies the specific test run or program execution
(session ID, start time, dump time).
+- Class Identification: Uniquely identifies each class that was instrumented
and monitored, including its name and a
+ checksum (ID) of the original class bytecode.
+- Execution Data (Probes): For each instrumented class, it stores an array of
boolean values (probes). Each boolean
+ indicates whether a specific execution point (line of code or branch) within
that class was executed (true) or not (
+ false) during the session. This is the core data that drives code coverage
analysis.
+
+(2) Once tests finish, JaCoCo generates code coverage report in HTML and/or
XML based on the binary execution
+data (`jacoco.exec`).
+
+To make cross-module code coverage report work in Azure DevOps Pipeline (or in
other similar CI environments) for Hudi,
+here's the workflow:
+
+(1) When running tests from mvn command in each job, enable binary execution
data to be written to the storage, i.e.,
+through `prepare-agent` goal (see `pom.xml`). As we run multiple `mvn test`
commands in the same job with different
+args, to avoid collision, a unique `destFile` is configured for each command
(see `azure-pipelines-20230430.yml`);
+
+(2) Once each job finishes, multiple `*.exec` binary execution data files are
merged into one `merged-jacoco.exec`
+through JaCoCo CLI (see `Merge JaCoCo Execution Data Files` task in
`azure-pipelines-20230430.yml`). The merged
+execution data file is published as an artifact for later analysis (see
`Publish Merged JaCoCo Execution Data File` task
+in `azure-pipelines-20230430.yml`).
+
+(3) Once all jobs finish running all tests, all the JaCoCo execution data
files are processed (
+see `MergeAndPublishCoverage` job in `azure-pipelines-20230430.yml`). The
execution data files from multiple jobs are
+downloaded and merged again into a single file `jacoco.exec` through JaCoCo
CLI;
+
+(4) To generate the final report, the source files (`*.java`, `*.scala`) and
class files (`*.class`) must be under the
+same directory, not in different modules, due to the limitation of JaCoCo CLI
taking only a single directory path for
+each. So a new maven plugin execution target is added to do that (see
`copy-source-files` and `copy-class-files`
+in `pom.xml`). Once that's done, the final reporting is done through JaCoCo
CLI by using the aggregated source files,
+class files, and `jacoco.exec` (see `MergeAndPublishCoverage` job in
`azure-pipelines-20230430.yml`). Both
+the `jacoco.exec` and final reports are published.
+
+## Example Results
+
+Azure Run
+<img width="1543" alt="Screenshot 2025-02-14 at 13 28 16"
src="https://github.com/user-attachments/assets/05e7052c-2842-4a0e-ab0a-014eeb8e7652"
/>
+
+JaCoCo Coverage Report
+<img width="1559" alt="Screenshot 2025-02-14 at 13 30 32"
src="https://github.com/user-attachments/assets/b47a8e78-8f98-4dfb-b64d-d926bfea5198"
/>
+<img width="1570" alt="Screenshot 2025-02-14 at 13 30 40"
src="https://github.com/user-attachments/assets/369768a0-9e82-4a29-a14d-2550048ef07f"
/>
+
+Published Artifacts
+<img width="1586" alt="Screenshot 2025-02-14 at 13 31 05"
src="https://github.com/user-attachments/assets/02cb75b1-3f7f-4f17-8392-8e0a452d31cf"
/>
+
+## Scripts
+
+- `download_jacoco.sh`: downloads JaCoCo binaries, especially the CLI jar, for
usage.
+- `merge_jacoco_exec_files.sh`: merges multiple JaCoCo execution data files in
multiple modules.
+- `merge_jacoco_job_files.sh`: merges multiple JaCoCo execution data files
from multiple Azure pipeline jobs.
+- `generate_jacoco_coverage_report.sh`: generates the JaCoCo code coverage
report by taking the execution data file,
+ source files and class files.
\ No newline at end of file
diff --git a/scripts/jacoco/download_jacoco.sh
b/scripts/jacoco/download_jacoco.sh
new file mode 100755
index 00000000000..e7b79d28638
--- /dev/null
+++ b/scripts/jacoco/download_jacoco.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+wget https://repo1.maven.org/maven2/org/jacoco/jacoco/0.8.12/jacoco-0.8.12.zip
+unzip jacoco-0.8.12.zip -d jacoco-lib
+ls -l jacoco-lib/lib/jacococli.jar
diff --git a/scripts/jacoco/generate_jacoco_coverage_report.sh
b/scripts/jacoco/generate_jacoco_coverage_report.sh
new file mode 100755
index 00000000000..3031a7d3a0b
--- /dev/null
+++ b/scripts/jacoco/generate_jacoco_coverage_report.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+/bin/bash --version
+echo "Jacoco CLI jar: $1"
+echo "Hudi source directory: $2"
+java -jar $1 report $2/jacoco.exec --sourcefiles $2/aggregate-sources
--classfiles $2/aggregate-classes --html $2/jacoco-html-report --xml
$2/jacoco-report.xml
diff --git a/scripts/jacoco/merge_jacoco_exec_files.sh
b/scripts/jacoco/merge_jacoco_exec_files.sh
new file mode 100755
index 00000000000..be3fcc37800
--- /dev/null
+++ b/scripts/jacoco/merge_jacoco_exec_files.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+/bin/bash --version
+shopt -s globstar
+echo "Jacoco CLI jar: $1"
+echo "Hudi source directory: $2"
+java -jar $1 merge $2/**/jacoco-agent/**/*.exec --destfile merged-jacoco.exec
diff --git a/scripts/jacoco/merge_jacoco_job_files.sh
b/scripts/jacoco/merge_jacoco_job_files.sh
new file mode 100755
index 00000000000..6ed3a0d0511
--- /dev/null
+++ b/scripts/jacoco/merge_jacoco_job_files.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+/bin/bash --version
+shopt -s globstar
+echo "Jacoco CLI jar: $1"
+echo "JaCoCo execution file directory: $2"
+echo "Hudi source directory: $3"
+java -jar $1 merge $2/**/*.exec --destfile $3/jacoco.exec