This is an automated email from the ASF dual-hosted git repository.
yhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 77b7b4ef824 Use wheels for Dataflow postcommit tests (#25970)
77b7b4ef824 is described below
commit 77b7b4ef82416683377bfa8bd09239ce002d7246
Author: Yi Hu <[email protected]>
AuthorDate: Thu Apr 6 19:54:55 2023 -0400
Use wheels for Dataflow postcommit tests (#25970)
* Use wheels for Dataflow postcommit tests
* Build wheel using cibuildwheels
---
.github/workflows/build_wheels.yml | 1 +
.test-infra/jenkins/job_PostCommit_Python.groovy | 1 +
...ommit_Python_ValidatesRunner_Dataflow_V2.groovy | 1 +
.test-infra/jenkins/job_PreCommit_PythonIT.groovy | 1 +
gradle.properties | 4 +-
sdks/python/build.gradle | 45 +++++++
sdks/python/test-suites/dataflow/build.gradle | 2 +-
sdks/python/test-suites/dataflow/common.gradle | 149 ++++++++++++---------
sdks/python/test-suites/direct/build.gradle | 2 +-
sdks/python/test-suites/gradle.properties | 2 -
10 files changed, 142 insertions(+), 66 deletions(-)
diff --git a/.github/workflows/build_wheels.yml
b/.github/workflows/build_wheels.yml
index 24f464171ac..ead302975d6 100644
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@@ -260,6 +260,7 @@ jobs:
if: ${{matrix.arch == 'aarch64'}}
name: Set up QEMU
- name: Install cibuildwheel
+ # note: sync cibuildwheel version with gradle task sdks:python:bdistPy*
steps
run: pip install cibuildwheel==2.9.0
- name: Build wheel
working-directory: apache-beam-source
diff --git a/.test-infra/jenkins/job_PostCommit_Python.groovy
b/.test-infra/jenkins/job_PostCommit_Python.groovy
index ba9a40f4d61..968afcfff59 100644
--- a/.test-infra/jenkins/job_PostCommit_Python.groovy
+++ b/.test-infra/jenkins/job_PostCommit_Python.groovy
@@ -42,6 +42,7 @@ ALL_SUPPORTED_VERSIONS.each { pythonVersion ->
rootBuildScriptDir(commonJobProperties.checkoutDir)
tasks(":python${versionSuffix}PostCommit")
commonJobProperties.setGradleSwitches(delegate)
+ switches("-PuseWheelDistribution")
}
}
}
diff --git
a/.test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Dataflow_V2.groovy
b/.test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Dataflow_V2.groovy
index 060cf33aabc..b4667b90808 100644
---
a/.test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Dataflow_V2.groovy
+++
b/.test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Dataflow_V2.groovy
@@ -39,6 +39,7 @@
PostcommitJobBuilder.postCommitJob('beam_PostCommit_Py_VR_Dataflow_V2', 'Run Pyt
tasks(':sdks:python:test-suites:dataflow:validatesRunnerBatchTestsV2')
tasks(':sdks:python:test-suites:dataflow:validatesRunnerStreamingTestsV2')
switches('-PuseRunnerV2')
+ switches('-PuseWheelDistribution')
commonJobProperties.setGradleSwitches(delegate)
}
}
diff --git a/.test-infra/jenkins/job_PreCommit_PythonIT.groovy
b/.test-infra/jenkins/job_PreCommit_PythonIT.groovy
index 21d249647e8..446f6c48387 100644
--- a/.test-infra/jenkins/job_PreCommit_PythonIT.groovy
+++ b/.test-infra/jenkins/job_PreCommit_PythonIT.groovy
@@ -22,6 +22,7 @@ PrecommitJobBuilder builder = new PrecommitJobBuilder(
scope: this,
nameBase: 'Python_Integration',
gradleTask: ':pythonPreCommitIT',
+ gradleSwitches: ['-PuseWheelDistribution'],
timeoutMins: 180,
triggerPathPatterns: [
'^model/.*$',
diff --git a/gradle.properties b/gradle.properties
index 717b77db6fe..d7cfb5dc69c 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -38,5 +38,7 @@ javaVersion=1.8
docker_image_default_repo_root=apache
docker_image_default_repo_prefix=beam_
+# supported flink versions
flink_versions=1.12,1.13,1.14,1.15,1.16
-
+# supported python versions
+python_versions=3.7,3.8,3.9,3.10,3.11
diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle
index 134d6db8cbb..7dac7c2ec0f 100644
--- a/sdks/python/build.gradle
+++ b/sdks/python/build.gradle
@@ -39,6 +39,7 @@ build.dependsOn tasks.named("buildPython")
// Create a Python source distribution tarball.
def tarball = "apache-beam.tar.gz"
def sdist = tasks.register("sdist") {
+ description "Create a Python source distribution tarball"
dependsOn setupVirtualenv
doLast {
@@ -64,6 +65,50 @@ artifacts {
distTarBall file: file("${buildDir}/${tarball}"), builtBy: sdist
}
+// Create Python wheels for given platform and Python version
+// build identifiers for cibuildwheel
+def platform_identifiers_map = [
+ linux: 'manylinux_*64*', // e.g. manylinux_x86_64, manylinux_aarch64
+ macos:'macosx_*64*', // e.g. macosx_x86_64, macosx_arm64
+ windows: 'win_*64*', // e.g. win_amd64, win_arm64
+]
+
+platform_identifiers_map.each { platform, idsuffix ->
+ def archs = 'auto'
+ // note: A fix for arm64 platform in gradle environment. For some reason the
+ // task fails with "Invalid archs option {<Architecture.arm64: 'arm64'>}."
+ // even though os.arch is 'aarch64'
+ // Running cibuildwheel command directly in shell it succeeded, however
+ if (platform == 'linux' &&
'aarch64'.equalsIgnoreCase(System.getProperty("os.arch"))) {
+ archs = 'aarch64'
+ }
+ getVersionsAsList('python_versions').each { it ->
+ def pyversion = it.replace('.', '')
+
+ project.tasks.register("bdistPy${pyversion}${platform}") {
+ description "Build a Python wheel distribution for Py${pyversion}
${platform}"
+ dependsOn setupVirtualenv
+ // need sdist task to generate protos
+ dependsOn ':sdks:python:sdist'
+
+ // generated installable Python SDK package
+ doLast {
+ exec {
+ environment CIBW_BUILD: "cp${pyversion}-${idsuffix}"
+ environment CIBW_ENVIRONMENT: "SETUPTOOLS_USE_DISTUTILS=stdlib"
+ environment CIBW_BEFORE_BUILD: "pip install cython numpy && pip
install --upgrade setuptools"
+ // note: sync cibuildwheel version with GitHub Action
+ // .github/workflow/build_wheel.yml:build_wheels "Install
cibuildwheel" step
+ executable 'sh'
+ args '-c', ". ${envdir}/bin/activate && " +
+ "pip install cibuildwheel==2.9.0 && " +
+ "cibuildwheel --print-build-identifiers --platform ${platform}
--archs ${archs} && " +
+ "cibuildwheel --output-dir ${buildDir} --platform ${platform}
--archs ${archs}"
+ }
+ }
+ }
+ }
+}
/*************************************************************************************************/
// Non-testing builds and analysis tasks
diff --git a/sdks/python/test-suites/dataflow/build.gradle
b/sdks/python/test-suites/dataflow/build.gradle
index 548a50246a2..992dcaa0e35 100644
--- a/sdks/python/test-suites/dataflow/build.gradle
+++ b/sdks/python/test-suites/dataflow/build.gradle
@@ -74,7 +74,7 @@ task validatesRunnerStreamingTestsV2 {
}
task validatesContainerTests {
- getVersionsAsList('dataflow_validates_container_tests').each {
+ getVersionsAsList('python_versions').each {
dependsOn.add(":sdks:python:test-suites:dataflow:py${getVersionSuffix(it)}:validatesContainer")
}
}
diff --git a/sdks/python/test-suites/dataflow/common.gradle
b/sdks/python/test-suites/dataflow/common.gradle
index 900715ccc0e..2552eac431a 100644
--- a/sdks/python/test-suites/dataflow/common.gradle
+++ b/sdks/python/test-suites/dataflow/common.gradle
@@ -20,14 +20,45 @@
evaluationDependsOn(':runners:google-cloud-dataflow-java:worker')
evaluationDependsOn(':sdks:python:test-suites:xlang')
enablePythonPerformanceTest()
+String pythonVersionNumber = project.ext.pythonVersion.replace('.', '')
String pythonVersionSuffix = project.ext.pythonVersion
- ? "-py${project.ext.pythonVersion.replace('.', '')}"
+ ? "-py${pythonVersionNumber}"
: ''
dependencies {
distTarBall project(path: ":sdks:python", configuration: "distTarBall")
}
+task initializeForDataflowJob{
+ def wheelCompatible = "amd64".equalsIgnoreCase(System.getProperty("os.arch"))
+ if (!wheelCompatible && project.hasProperty('useWheelDistribution')) {
+ throw new GradleException('-PuseWheelDistribution is set for the task
but the ' +
+ 'host system platform is not compatible with Dataflow worker container
image.')
+ }
+ dependsOn 'installGcpTest'
+
+ if (project.hasProperty('useWheelDistribution')) {
+ dependsOn ":sdks:python:bdistPy${pythonVersionNumber}linux"
+
+ doLast {
+ def collection =
project.fileTree(project.project(':sdks:python').buildDir){
+ include "**/apache_beam-*cp${pythonVersionNumber}*manylinux*.whl"
+ }
+ // sdkLocation ext is set at execution time
+ String packageFilename = collection.singleFile.toString()
+ project.ext.sdkLocation = packageFilename
+ logger.info('Use wheel {} for sdk_location.', packageFilename)
+ }
+ } else {
+ dependsOn ':sdks:python:sdist'
+
+ // sdkLocation ext is available at config time
+ String packageFilename = files(configurations.distTarBall.files).singleFile
+ project.ext.sdkLocation = packageFilename
+ logger.info('Use tarball {} for sdk_location.', packageFilename)
+ }
+}
+
def runScriptsDir = "${rootDir}/sdks/python/scripts"
// Basic test options for ITs running on Jenkins.
@@ -42,8 +73,7 @@ def preCommitIT(String runScriptsDir, String envdir, Boolean
streaming, Boolean
def suffix = runnerV2 ? '_V2' : ''
suffix = streaming ? "_streaming$suffix" : "_batch$suffix"
task "preCommitIT${suffix}" {
- dependsOn 'installGcpTest'
- dependsOn ':sdks:python:sdist'
+ dependsOn 'initializeForDataflowJob'
doLast {
// Basic integration tests to run in PreCommit
@@ -61,7 +91,7 @@ def preCommitIT(String runScriptsDir, String envdir, Boolean
streaming, Boolean
def argMap = [
"test_opts" : testOpts,
- "sdk_location":
files(configurations.distTarBall.files).singleFile,
+ "sdk_location": project.ext.sdkLocation,
"suite" : "preCommitIT-df${pythonSuffix}",
]
@@ -97,14 +127,13 @@ task preCommitIT_V2{
}
task postCommitIT {
- dependsOn 'installGcpTest'
- dependsOn ':sdks:python:sdist'
+ dependsOn 'initializeForDataflowJob'
doLast {
def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"]
def argMap = [
"test_opts": testOpts,
- "sdk_location": files(configurations.distTarBall.files).singleFile,
+ "sdk_location": project.ext.sdkLocation,
"suite": "postCommitIT-df${pythonVersionSuffix}",
"collect": "it_postcommit"
]
@@ -117,14 +146,13 @@ task postCommitIT {
}
task postCommitSickbay {
- dependsOn 'installGcpTest'
- dependsOn ':sdks:python:sdist'
+ dependsOn 'initializeForDataflowJob'
doLast {
def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"]
def argMap = [
"test_opts": testOpts,
- "sdk_location": files(configurations.distTarBall.files).singleFile,
+ "sdk_location": project.ext.sdkLocation,
"suite": "postCommitIT-df${pythonVersionSuffix}",
"collect": "it_postcommit_sickbay"
]
@@ -137,14 +165,13 @@ task postCommitSickbay {
}
task spannerioIT {
- dependsOn 'installGcpTest'
- dependsOn ':sdks:python:sdist'
+ dependsOn 'initializeForDataflowJob'
doLast {
def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"]
def argMap = [
"test_opts": testOpts,
- "sdk_location": files(configurations.distTarBall.files).singleFile,
+ "sdk_location": project.ext.sdkLocation,
"suite": "postCommitIT-df${pythonVersionSuffix}",
"collect": "spannerio_it"
]
@@ -157,15 +184,14 @@ task spannerioIT {
}
task examples {
- dependsOn 'installGcpTest'
- dependsOn ':sdks:python:sdist'
+ dependsOn 'initializeForDataflowJob'
def testOpts = basicPytestOpts
// Execute tests with xdists
doFirst {
def argMap = [
"test_opts": testOpts + ["--numprocesses=8", "--dist=loadfile"],
- "sdk_location": files(configurations.distTarBall.files).singleFile,
+ "sdk_location": project.ext.sdkLocation,
"runner_v2": "true",
"suite": "postCommitIT-df${pythonVersionSuffix}-xdist",
"collect": "examples_postcommit and not no_xdist and not
sickbay_dataflow"
@@ -181,7 +207,7 @@ task examples {
doLast {
def argMap = [
"test_opts": testOpts,
- "sdk_location": files(configurations.distTarBall.files).singleFile,
+ "sdk_location": project.ext.sdkLocation,
"runner_v2": "true",
"suite": "postCommitIT-df${pythonVersionSuffix}-no-xdist",
"collect": "examples_postcommit and no_xdist and not
sickbay_dataflow"
@@ -195,25 +221,23 @@ task examples {
}
task validatesRunnerBatchTests {
- dependsOn 'installGcpTest'
- dependsOn ':sdks:python:sdist'
+ dependsOn 'initializeForDataflowJob'
- def argMap = [
- "test_opts" : basicPytestOpts + ["--numprocesses=8"],
- "sdk_location": files(configurations.distTarBall.files).singleFile,
- "suite" : "validatesRunnerBatchTests-df${pythonVersionSuffix}",
- "collect": "it_validatesrunner and not no_sickbay_batch"
- ]
-
- if (project.hasProperty('useRunnerV2')) {
- argMap.put("runner_v2", "true")
- }
+ doLast {
+ def argMap = [
+ "test_opts" : basicPytestOpts + ["--numprocesses=8"],
+ "sdk_location": project.ext.sdkLocation,
+ "suite" : "validatesRunnerBatchTests-df${pythonVersionSuffix}",
+ "collect": "it_validatesrunner and not no_sickbay_batch"
+ ]
- if (project.hasProperty('disableRunnerV2')) {
- argMap.put("disable_runner_v2", "true")
- }
+ if (project.hasProperty('useRunnerV2')) {
+ argMap.put("runner_v2", "true")
+ }
- doLast {
+ if (project.hasProperty('disableRunnerV2')) {
+ argMap.put("disable_runner_v2", "true")
+ }
def cmdArgs = mapToArgString(argMap)
exec {
executable 'sh'
@@ -223,8 +247,7 @@ task validatesRunnerBatchTests {
}
task validatesRunnerStreamingTests {
- dependsOn 'installGcpTest'
- dependsOn ':sdks:python:sdist'
+ dependsOn 'initializeForDataflowJob'
// TODO(BEAM-3544,https://github.com/apache/beam/issues/19012): Disable
tests with 'sickbay-streaming' tag.
// Execute tests with xdists
@@ -232,7 +255,7 @@ task validatesRunnerStreamingTests {
def argMap = [
"test_opts": basicPytestOpts + ["--numprocesses=8"],
"streaming": "true",
- "sdk_location":
files(configurations.distTarBall.files).singleFile,
+ "sdk_location": project.ext.sdkLocation,
"suite":
"validatesRunnerStreamingTests-df${pythonVersionSuffix}-xdist",
"collect": "it_validatesrunner and not no_sickbay_streaming
and not no_xdist",
"runner_v2": "true",
@@ -250,7 +273,7 @@ task validatesRunnerStreamingTests {
def argMap = [
"test_opts": basicPytestOpts,
"streaming": "true",
- "sdk_location":
files(configurations.distTarBall.files).singleFile,
+ "sdk_location": project.ext.sdkLocation,
"suite":
"validatesRunnerStreamingTests-df${pythonVersionSuffix}-noxdist",
"collect": "it_validatesrunner and not no_sickbay_streaming
and no_xdist",
"runner_v2": "true",
@@ -264,28 +287,33 @@ task validatesRunnerStreamingTests {
}
}
-task runPerformanceTest(type: Exec) {
- dependsOn 'installGcpTest'
- dependsOn ':sdks:python:sdist'
+task runPerformanceTest {
+ dependsOn 'initializeForDataflowJob'
def test = project.findProperty('test')
def suite = "runPerformanceTest-df${pythonVersionSuffix}"
def xUnitFile ="pytest-${suite}.xml"
- def testOpts = project.findProperty('test-pipeline-options')
- testOpts += "
--sdk_location=${files(configurations.distTarBall.files).singleFile}"
+ doLast {
+ def testOpts = project.findProperty('test-pipeline-options')
+ testOpts += " --sdk_location=${project.ext.sdkLocation}"
- setWorkingDir "${project.rootDir}/sdks/python"
- commandLine 'sh', '-c', ". ${envdir}/bin/activate && pytest -o
junit_suite_name=${suite}" +
+ exec {
+ workingDir "${project.rootDir}/sdks/python"
+ executable 'sh'
+ args '-c', ". ${envdir}/bin/activate && pytest -o
junit_suite_name=${suite}" +
" ${test} --test-pipeline-options=\"${testOpts}\"
--junitxml=${xUnitFile} --timeout=1800"
+ }
+ }
}
task mongodbioIT {
- dependsOn 'installGcpTest'
- dependsOn ':sdks:python:sdist'
- def opts = findProperty('opts')
- opts = String.format("%s %s", opts,
"--sdk_location=${files(configurations.distTarBall.files).singleFile}")
+ dependsOn 'initializeForDataflowJob'
+
doLast {
+ def opts = findProperty('opts')
+ opts = String.format("%s %s", opts,
"--sdk_location=${project.ext.sdkLocation}")
+
exec {
executable 'sh'
args '-c', ". ${envdir}/bin/activate && python -m
apache_beam.io.mongodbio_it_test ${opts}"
@@ -294,7 +322,7 @@ task mongodbioIT {
}
task installChicagoTaxiExampleRequirements {
- dependsOn 'installGcpTest'
+ dependsOn 'initializeForDataflowJob'
doLast {
exec {
@@ -307,13 +335,12 @@ task installChicagoTaxiExampleRequirements {
task chicagoTaxiExample {
dependsOn 'installChicagoTaxiExampleRequirements'
- dependsOn ':sdks:python:sdist'
-
- def gcsRoot = findProperty('gcsRoot')
- def pipelineOptions = findProperty('pipelineOptions') ?: ""
- pipelineOptions += "
--sdk_location=\"${files(configurations.distTarBall.files).singleFile}\""
doLast {
+ def gcsRoot = findProperty('gcsRoot')
+ def pipelineOptions = findProperty('pipelineOptions') ?: ""
+ pipelineOptions += " --sdk_location=\"${project.ext.sdkLocation}\""
+
exec {
workingDir
"$rootProject.projectDir/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/"
executable 'sh'
@@ -324,8 +351,7 @@ task chicagoTaxiExample {
task validatesContainer() {
def pyversion = "${project.ext.pythonVersion.replace('.', '')}"
- dependsOn 'installGcpTest'
- dependsOn ':sdks:python:sdist'
+ dependsOn 'initializeForDataflowJob'
dependsOn ":sdks:python:container:py${pyversion}:docker"
def runScriptsPath =
"${rootDir}/sdks/python/container/run_validatescontainer.sh"
doLast {
@@ -333,7 +359,7 @@ task validatesContainer() {
executable 'sh'
args '-c', ". ${envdir}/bin/activate && cd ${rootDir} &&
${runScriptsPath} " +
"${project.ext.pythonVersion} " +
- "${files(configurations.distTarBall.files).singleFile}"
+ "${project.ext.sdkLocation}"
}
}
}
@@ -346,6 +372,9 @@ def tensorRTTests = tasks.create("tensorRTtests") {
def argMap = [
"runner": "DataflowRunner",
"machine_type":"n1-standard-4",
+ // TODO(https://github.com/apache/beam/issues/22651): Build docker image
for tensor RT tests during Run time.
+ // This would also enable to use wheel "--sdk_location" as other tasks,
and eliminate distTarBall dependency
+ // declaration for this project.
"sdk_container_image":
"us.gcr.io/apache-beam-testing/python-postcommit-it/tensor_rt:latest",
"sdk_location": files(configurations.distTarBall.files).singleFile,
"project": "apache-beam-testing",
@@ -364,7 +393,7 @@ def tensorRTTests = tasks.create("tensorRTtests") {
}
task installTFTRequirements {
- dependsOn 'installGcpTest'
+ dependsOn 'initializeForDataflowJob'
doLast {
exec {
workingDir
"$rootProject.projectDir/sdks/python/apache_beam/testing/benchmarks/cloudml/"
@@ -376,13 +405,11 @@ task installTFTRequirements {
// Tensorflow transform integration and benchmarking tests on Apache Beam.
task tftTests {
- dependsOn 'installGcpTest'
- dependsOn ':sdks:python:sdist'
dependsOn "installTFTRequirements"
doLast {
def opts = project.findProperty('opts')
- opts += "
--sdk_location=${files(configurations.distTarBall.files).singleFile}"
+ opts += " --sdk_location=${project.ext.sdkLocation}"
def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"]
def argMap = [
"test_opts": testOpts,
diff --git a/sdks/python/test-suites/direct/build.gradle
b/sdks/python/test-suites/direct/build.gradle
index 12c87caf85d..2ebbf21fd03 100644
--- a/sdks/python/test-suites/direct/build.gradle
+++ b/sdks/python/test-suites/direct/build.gradle
@@ -26,7 +26,7 @@ tasks.register("mongodbioIT") {
}
tasks.register("examplesPostCommit") {
- getVersionsAsList('direct_examples_postcommit_py_versions').each {
+ getVersionsAsList('python_versions').each {
dependsOn.add(":sdks:python:test-suites:direct:py${getVersionSuffix(it)}:examples")
}
}
\ No newline at end of file
diff --git a/sdks/python/test-suites/gradle.properties
b/sdks/python/test-suites/gradle.properties
index cce99ebec94..dc407b03bae 100644
--- a/sdks/python/test-suites/gradle.properties
+++ b/sdks/python/test-suites/gradle.properties
@@ -32,7 +32,6 @@ dataflow_chicago_taxi_example_task_py_versions=3.7
dataflow_validates_runner_batch_tests=3.7,3.9
dataflow_validates_runner_streaming_tests=3.7,3.9
-dataflow_validates_container_tests=3.7,3.8,3.9,3.10,3.11
# TODO: Enable following tests after making sure we have enough capacity.
dataflow_validates_runner_batch_tests_V2=3.7,3.11
dataflow_validates_runner_streaming_tests_V2=3.7,3.11
@@ -41,7 +40,6 @@ dataflow_examples_postcommit_py_versions=3.11
dataflow_cloudml_benchmark_tests_py_versions=3.9
# direct runner test-suites
direct_mongodbio_it_task_py_versions=3.11
-direct_examples_postcommit_py_versions=3.7,3.8,3.9,3.10,3.11
# flink runner test-suites
flink_validates_runner_precommit_py_versions=3.11