[
https://issues.apache.org/jira/browse/BEAM-5058?focusedWorklogId=170063&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-170063
]
ASF GitHub Bot logged work on BEAM-5058:
----------------------------------------
Author: ASF GitHub Bot
Created on: 28/Nov/18 00:20
Start Date: 28/Nov/18 00:20
Worklog Time Spent: 10m
Work Description: markflyhigh closed pull request #6707: [BEAM-5058]
Parallelize Python Precommit and add basic ITs
URL: https://github.com/apache/beam/pull/6707
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/.test-infra/jenkins/CommonJobProperties.groovy
b/.test-infra/jenkins/CommonJobProperties.groovy
index fcac3eca26ec..c85c77560c51 100644
--- a/.test-infra/jenkins/CommonJobProperties.groovy
+++ b/.test-infra/jenkins/CommonJobProperties.groovy
@@ -331,4 +331,24 @@ class CommonJobProperties {
static String makePathAbsolute(String path) {
return '"$WORKSPACE/' + path + '"'
}
+
+ /**
+ * Create a temporary directory.
+ * @return Absolute path of the directory.
+ */
+ static String createTempDirectory() {
+ def tempDir = File.createTempDir()
+ assert tempDir.exists()
+ return tempDir.absolutePath
+ }
+
+ /**
+ * Delete a directory recursively.
+ * @param Abusolute path of a directory to be deleted.
+ */
+ static void deleteDirectory(String path) {
+ def dir = new File(path)
+ dir.deleteDir()
+ assert !dir.exists()
+ }
}
diff --git a/.test-infra/jenkins/job_PreCommit_Python.groovy
b/.test-infra/jenkins/job_PreCommit_Python.groovy
index 35b34f2c6789..43d23004cc40 100644
--- a/.test-infra/jenkins/job_PreCommit_Python.groovy
+++ b/.test-infra/jenkins/job_PreCommit_Python.groovy
@@ -16,12 +16,16 @@
* limitations under the License.
*/
+import CommonJobProperties as commonJobProperties
import PrecommitJobBuilder
+// Generate a temporary directory to hold virtualenvs
+def tempDir = commonJobProperties.createTempDirectory()
+
PrecommitJobBuilder builder = new PrecommitJobBuilder(
scope: this,
nameBase: 'Python',
- gradleTask: ':pythonPreCommit',
+ gradleTask: ":pythonPreCommit -PenvBaseDir=$tempDir",
triggerPathPatterns: [
'^model/.*$',
'^runners/.*$',
@@ -36,3 +40,6 @@ builder.build {
archiveJunit('**/nosetests.xml')
}
}
+
+// Clean up
+commonJobProperties.deleteDirectory(tempDir)
\ No newline at end of file
diff --git a/build.gradle b/build.gradle
index 269da86f8656..e9a2cadc5b8f 100644
--- a/build.gradle
+++ b/build.gradle
@@ -240,6 +240,7 @@ task goIntegrationTests() {
task pythonPreCommit() {
dependsOn ":beam-sdks-python:preCommit"
+ dependsOn ":beam-sdks-python-precommit-dataflow:precommitIT"
}
task pythonPostCommit() {
diff --git
a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index ba17983856ce..aaf717dc2e8d 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -1492,5 +1492,79 @@ artifactId=${project.name}
dependsOn ':beam-sdks-java-container:docker'
}
}
+
+ /**
***********************************************************************************************/
+
+ project.ext.applyPythonNature = {
+
+ // Define common lifecycle tasks and artifact types
+ project.apply plugin: "base"
+
+ // For some reason base doesn't define a test task so we define it
below and make
+ // check depend on it. This makes the Python project similar to the task
layout like
+ // Java projects, see
https://docs.gradle.org/4.2.1/userguide/img/javaPluginTasks.png
+ project.task('test', type: Test) {}
+ project.check.dependsOn project.test
+
+
project.evaluationDependsOn(":beam-runners-google-cloud-dataflow-java-fn-api-worker")
+
+ project.ext.envdir = project.findProperty('envBaseDir') ?:
"${project.rootProject.buildDir}"
+ project.ext.envdir = project.ext.envdir + "/${project.name}/gradleenv"
+ project.ext.pythonRootDir = "${project.rootDir}/sdks/python"
+
+ project.task('setupVirtualenv') {
+ doLast {
+ project.exec { commandLine 'virtualenv', "${project.ext.envdir}" }
+ project.exec {
+ executable 'sh'
+ args '-c', ". ${project.ext.envdir}/bin/activate && pip install
--upgrade tox==3.0.0 grpcio-tools==1.3.5"
+ }
+ }
+ // Gradle will delete outputs whenever it thinks they are stale.
Putting a
+ // specific binary here could make gradle delete it while pip will
believe
+ // the package is fully installed.
+ outputs.dirs(project.ext.envdir)
+ }
+
+ project.configurations { distConfig }
+
+ project.task('sdist', dependsOn: 'setupVirtualenv') {
+ doLast {
+ project.exec {
+ executable 'sh'
+ args '-c', ". ${project.ext.envdir}/bin/activate && python
${project.ext.pythonRootDir}/setup.py sdist --formats zip,gztar --dist-dir
${project.buildDir}"
+ }
+ def collection = project.fileTree("${project.buildDir}"){ include
'**/*.tar.gz' exclude '**/apache-beam.tar.gz'}
+ println "sdist archive name: ${collection.singleFile}"
+ // we need a fixed name for the artifact
+ project.copy { from collection.singleFile; into
"${project.buildDir}"; rename { 'apache-beam.tar.gz' } }
+ }
+ }
+
+ project.artifacts {
+ distConfig file:
project.file("${project.buildDir}/apache-beam.tar.gz"), builtBy: project.sdist
+ }
+
+ project.task('installGcpTest', dependsOn: 'setupVirtualenv') {
+ doLast {
+ project.exec {
+ executable 'sh'
+ args '-c', ". ${project.ext.envdir}/bin/activate && pip install -e
${project.ext.pythonRootDir}/[gcp,test]"
+ }
+ }
+ }
+ project.installGcpTest.mustRunAfter project.sdist
+
+ project.task('cleanPython', dependsOn: 'setupVirtualenv') {
+ doLast {
+ project.exec {
+ executable 'sh'
+ args '-c', ". ${project.ext.envdir}/bin/activate && python
${project.ext.pythonRootDir}/setup.py clean"
+ }
+ project.delete project.buildDir
+ }
+ }
+ project.clean.dependsOn project.cleanPython
+ }
}
}
diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle
index d26e292668de..20dc7743425c 100644
--- a/sdks/python/build.gradle
+++ b/sdks/python/build.gradle
@@ -16,74 +16,19 @@
* limitations under the License.
*/
-// Define common lifecycle tasks and artifact types
-apply plugin: "base"
+apply plugin: org.apache.beam.gradle.BeamModulePlugin
+applyPythonNature()
/*************************************************************************************************/
// Basic build and Python environment setup/cleanup
-// For some reason base doesn't define a test task so we define it below and
make
-// check depend on it. This makes the Python project similar to the task
layout like
-// Java projects, see
https://docs.gradle.org/4.2.1/userguide/img/javaPluginTasks.png
-task test {}
-check.dependsOn test
-
-evaluationDependsOn(":beam-runners-google-cloud-dataflow-java-fn-api-worker")
-
-def envdir = "${project.buildDir}/gradleenv"
-
-task setupVirtualenv {
- doLast {
- exec {
- commandLine 'virtualenv', "${envdir}"
- }
- exec {
- executable 'sh'
- args '-c', ". ${envdir}/bin/activate && pip install --upgrade tox==3.0.0
grpcio-tools==1.3.5"
- }
- }
- // Gradle will delete outputs whenever it thinks they are stale. Putting a
- // specific binary here could make gradle delete it while pip will believe
- // the package is fully installed.
- outputs.dirs(envdir)
-}
-
-configurations { distConfig }
-
-task sdist(dependsOn: 'setupVirtualenv') {
- doLast {
- exec {
- executable 'sh'
- args '-c', ". ${envdir}/bin/activate && python setup.py sdist --formats
zip,gztar --dist-dir ${project.buildDir}"
- }
- def collection = fileTree("${project.buildDir}"){ include '**/*.tar.gz'
exclude '**/apache-beam.tar.gz'}
- println "sdist archive name: ${collection.singleFile}"
- // we need a fixed name for the artifact
- copy { from collection.singleFile; into "${project.buildDir}"; rename {
'apache-beam.tar.gz' } }
- }
-}
-
-artifacts {
- distConfig file: file("${project.buildDir}/apache-beam.tar.gz"), builtBy:
sdist
-}
-
-task cleanPython(dependsOn: 'setupVirtualenv') {
- doLast {
- exec {
- executable 'sh'
- args '-c', ". ${envdir}/bin/activate && python setup.py clean"
- }
- }
-}
-clean.dependsOn cleanPython
-
task buildPython(dependsOn: 'setupVirtualenv') {
doLast {
println 'Building Python Dependencies'
exec {
executable 'sh'
- args '-c', ". ${envdir}/bin/activate && python setup.py build
--build-base ${project.buildDir}"
+ args '-c', ". ${project.ext.envdir}/bin/activate && python setup.py
build --build-base ${project.buildDir}"
}
}
}
@@ -111,7 +56,7 @@ def toxTask = {
doLast {
exec {
executable 'sh'
- args '-c', ". ${envdir}/bin/activate && ./scripts/run_tox.sh $tox_env"
+ args '-c', ". ${project.ext.envdir}/bin/activate &&
./scripts/run_tox.sh $tox_env"
}
}
inputs.files pythonSdkDeps
@@ -164,16 +109,6 @@ task preCommit() {
/*************************************************************************************************/
// E2E integration testing and validates runner testing
-task installGcpTest(dependsOn: 'setupVirtualenv') {
- doLast {
- exec {
- executable 'sh'
- args '-c', ". ${envdir}/bin/activate && pip install -e .[gcp,test]"
- }
- }
-}
-installGcpTest.mustRunAfter sdist
-
// Basic test options for ITs running on Jenkins.
def basicTestOpts = [
"--nocapture", // print stdout instantly
@@ -212,7 +147,7 @@ task directRunnerIT(dependsOn: 'installGcpTest') {
def batchCmdArgs = mapToArgString(argMap)
exec {
executable 'sh'
- args '-c', ". ${envdir}/bin/activate &&
./scripts/run_integration_test.sh $batchCmdArgs"
+ args '-c', ". ${project.ext.envdir}/bin/activate &&
./scripts/run_integration_test.sh $batchCmdArgs"
}
}
@@ -229,7 +164,7 @@ task directRunnerIT(dependsOn: 'installGcpTest') {
def streamingCmdArgs = mapToArgString(argMap)
exec {
executable 'sh'
- args '-c', ". ${envdir}/bin/activate &&
./scripts/run_integration_test.sh $streamingCmdArgs"
+ args '-c', ". ${project.ext.envdir}/bin/activate &&
./scripts/run_integration_test.sh $streamingCmdArgs"
}
}
}
@@ -277,7 +212,7 @@ task portableWordCount(dependsOn: 'installGcpTest') {
options += ["--job_endpoint=${project.property('jobEndpoint')}"]
exec {
executable 'sh'
- args '-c', ". ${envdir}/bin/activate && python -m
apache_beam.examples.wordcount ${options.join(' ')}"
+ args '-c', ". ${project.ext.envdir}/bin/activate && python -m
apache_beam.examples.wordcount ${options.join(' ')}"
// TODO: Check that the output file is generated and runs.
}
}
@@ -306,7 +241,7 @@ task integrationTest(dependsOn: ['installGcpTest',
'sdist']) {
def cmdArgs = mapToArgString(argMap)
exec {
executable 'sh'
- args '-c', ". ${envdir}/bin/activate &&
./scripts/run_integration_test.sh $cmdArgs"
+ args '-c', ". ${project.ext.envdir}/bin/activate &&
./scripts/run_integration_test.sh $cmdArgs"
}
}
}
@@ -318,7 +253,7 @@ task postCommitIT(dependsOn: ['installGcpTest', 'sdist']) {
def cmdArgs = mapToArgString(["test_opts": testOpts])
exec {
executable 'sh'
- args '-c', ". ${envdir}/bin/activate &&
./scripts/run_integration_test.sh $cmdArgs"
+ args '-c', ". ${project.ext.envdir}/bin/activate &&
./scripts/run_integration_test.sh $cmdArgs"
}
}
}
@@ -329,7 +264,7 @@ task validatesRunnerBatchTests(dependsOn:
['installGcpTest', 'sdist']) {
def cmdArgs = mapToArgString(["test_opts": testOpts])
exec {
executable 'sh'
- args '-c', ". ${envdir}/bin/activate &&
./scripts/run_integration_test.sh $cmdArgs"
+ args '-c', ". ${project.ext.envdir}/bin/activate &&
./scripts/run_integration_test.sh $cmdArgs"
}
}
}
@@ -348,7 +283,7 @@ task validatesRunnerStreamingTests(dependsOn:
['installGcpTest', 'sdist']) {
def cmdArgs = mapToArgString(argMap)
exec {
executable 'sh'
- args '-c', ". ${envdir}/bin/activate &&
./scripts/run_integration_test.sh $cmdArgs"
+ args '-c', ". ${project.ext.envdir}/bin/activate &&
./scripts/run_integration_test.sh $cmdArgs"
}
}
}
@@ -357,7 +292,7 @@ task hdfsIntegrationTest(dependsOn: 'installGcpTest') {
doLast {
exec {
executable 'sh'
- args '-c', ". ${envdir}/bin/activate &&
./apache_beam/io/hdfs_integration_test/hdfs_integration_test.sh"
+ args '-c', ". ${project.ext.envdir}/bin/activate &&
./apache_beam/io/hdfs_integration_test/hdfs_integration_test.sh"
}
}
}
@@ -389,7 +324,7 @@ def flinkCompatibilityMatrix = {
doLast {
exec {
executable 'sh'
- args '-c', ". ${envdir}/bin/activate && pip install -e . && python -m
apache_beam.runners.portability.flink_runner_test
--flink_job_server_jar=${project(":beam-runners-flink_2.11-job-server:").shadowJar.archivePath}
--environment_type=${workerType} ${environment_config} ${streaming ?
'--streaming' : ''}"
+ args '-c', ". ${project.ext.envdir}/bin/activate && pip install -e .
&& python -m apache_beam.runners.portability.flink_runner_test
--flink_job_server_jar=${project(":beam-runners-flink_2.11-job-server:").shadowJar.archivePath}
--environment_type=${workerType} ${environment_config} ${streaming ?
'--streaming' : ''}"
}
}
}
@@ -426,7 +361,7 @@ task depSnapshot(dependsOn: 'installGcpTest') {
exec {
// Remove useless item "pkg-resources" from file which is introduced by
a bug in Ubuntu.
executable 'sh'
- args '-c', ". ${envdir}/bin/activate && pip freeze --local --all | grep
-v \"pkg-resources\" > ${project.buildDir}/requirements.txt"
+ args '-c', ". ${project.ext.envdir}/bin/activate && pip freeze --local
--all | grep -v \"pkg-resources\" > ${project.buildDir}/requirements.txt"
}
}
}
@@ -450,12 +385,12 @@ project.task('createProcessWorker') {
dependsOn 'setupVirtualenv'
def sdkWorkerFile = file("${project.buildDir}/sdk_worker.sh")
def workerScript =
"${project(":beam-sdks-python-container:").buildDir.absolutePath}/target/launcher/linux_amd64/boot"
- def sdkWorkerFileCode = "sh -c \". ${envdir}/bin/activate && ${workerScript}
\$* \""
+ def sdkWorkerFileCode = "sh -c \". ${project.ext.envdir}/bin/activate &&
${workerScript} \$* \""
outputs.file sdkWorkerFile
doLast {
sdkWorkerFile.write sdkWorkerFileCode
exec {
- commandLine('sh', '-c', ". ${envdir}/bin/activate && cd
${project.projectDir} && python setup.py install ")
+ commandLine('sh', '-c', ". ${project.ext.envdir}/bin/activate && cd
${project.projectDir} && python setup.py install ")
}
exec {
commandLine('chmod', '+x', sdkWorkerFile)
diff --git a/sdks/python/precommit/dataflow/build.gradle
b/sdks/python/precommit/dataflow/build.gradle
new file mode 100644
index 000000000000..8bf8fca90b8f
--- /dev/null
+++ b/sdks/python/precommit/dataflow/build.gradle
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+apply plugin: org.apache.beam.gradle.BeamModulePlugin
+applyPythonNature()
+
+task precommitIT(dependsOn: ['sdist', 'installGcpTest']) {
+ doLast {
+ // Basic integration tests to run in PreCommit
+ def precommitTests = [
+ "apache_beam.examples.wordcount_it_test:WordCountIT.test_wordcount_it",
+
"apache_beam.examples.streaming_wordcount_it_test:StreamingWordCountIT.test_streaming_wordcount_it",
+ ]
+ def testOpts = [
+ "--tests=${precommitTests.join(',')}",
+ "--nocapture",
+ "--nologcapture",
+ "--processes=4",
+ "--process-timeout=1800", // Total timeout includes all tests run.
+ ]
+
+ exec {
+ executable 'sh'
+ args '-c', ". ${project.ext.envdir}/bin/activate &&
${project.ext.pythonRootDir}/scripts/run_integration_test.sh --test_opts
\"${testOpts.join(' ')}\""
+ }
+ }
+}
diff --git a/settings.gradle b/settings.gradle
index 7c21cdc78267..b706b743d160 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -180,6 +180,8 @@ include "beam-sdks-python-container"
project(":beam-sdks-python-container").dir = file("sdks/python/container")
include "beam-sdks-python-container-py3"
project(":beam-sdks-python-container-py3").dir =
file("sdks/python/container/py3")
+include "beam-sdks-python-precommit-dataflow"
+project(":beam-sdks-python-precommit-dataflow").dir =
file("sdks/python/precommit/dataflow")
include "beam-vendor-grpc-1_13_1"
project(":beam-vendor-grpc-1_13_1").dir = file("vendor/grpc-1_13_1")
include "beam-sdks-java-test-utils"
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 170063)
Time Spent: 3.5h (was: 3h 20m)
> Python precommits should run E2E tests
> --------------------------------------
>
> Key: BEAM-5058
> URL: https://issues.apache.org/jira/browse/BEAM-5058
> Project: Beam
> Issue Type: Bug
> Components: sdk-py-core, testing
> Reporter: Udi Meiri
> Assignee: Mark Liu
> Priority: Major
> Time Spent: 3.5h
> Remaining Estimate: 0h
>
> According to [https://beam.apache.org/contribute/testing/] (which I'm working
> on), end-to-end tests should be run in precommit on each combination of
> \{batch, streaming}x\{SDK language}x\{supported runner}.
> At least 2 tests need to be added to Python's precommit: wordcount and
> wordcount_streaming on Dataflow, and possibly on other supported runners
> (direct runner and new runners plz).
> These tests should be configured to run from a Gradle sub-project, so that
> they're run in parallel to the unit tests.
> Example that parallelizes Java precommit integration tests:
> [https://github.com/apache/beam/pull/5731]
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)