tkaymak commented on code in PR #38233:
URL: https://github.com/apache/beam/pull/38233#discussion_r3101924194
##########
runners/spark/spark_runner.gradle:
##########
@@ -89,38 +89,107 @@ def hadoopVersions = [
hadoopVersions.each { kv -> configurations.create("hadoopVersion$kv.key") }
-def sourceBase = "${project.projectDir}/../src"
-def sourceBaseCopy = "${project.buildDir}/sourcebase/src"
-
-def useCopiedSourceSet = { scope, type, trigger ->
- def taskName = "copy${scope.capitalize()}${type.capitalize()}"
- trigger.dependsOn tasks.register(taskName, Copy) {
- from "$sourceBase/$scope/$type"
- into "$sourceBaseCopy/$scope/$type"
- duplicatesStrategy DuplicatesStrategy.INCLUDE
+/*
+ * Per-version source overrides (mirrors runners/flink/flink_runner.gradle).
+ *
+ * Layout:
+ * runners/spark/src/ -- shared base (lowest supported
version uses these directly)
+ * runners/spark/<major>/src/ -- version-specific overrides (later
overrides win)
+ *
+ * The lowest supported `spark_major` builds straight from the shared base.
+ * Higher versions copy <shared> + <previous majors> + <current> into a single
+ * source-overrides directory using DuplicatesStrategy.INCLUDE so the current
+ * version's files override earlier ones.
+ */
+def base_path = ".."
+
+def overrides = { versions, type, group = 'java' ->
+ // order matters: later entries override earlier ones during the Copy
+ ["${base_path}/src/${type}/${group}"] +
+ versions.collect { "${base_path}/${it}/src/${type}/${group}" } +
+ ["./src/${type}/${group}"]
+}
+
+def all_versions = spark_versions.split(",")
+def previous_versions = all_versions.findAll { it < spark_major }
+
+def main_source_overrides = overrides(previous_versions, "main")
+def test_source_overrides = overrides(previous_versions, "test")
+def main_resources_overrides = overrides(previous_versions, "main",
"resources")
+def test_resources_overrides = overrides(previous_versions, "test",
"resources")
+
+def sourceOverridesBase =
project.layout.buildDirectory.dir('source-overrides/src').get()
+
+def copySourceOverrides = tasks.register('copySourceOverrides', Copy) {
copyTask ->
+ copyTask.from main_source_overrides
+ copyTask.into "${sourceOverridesBase}/main/java"
+ copyTask.duplicatesStrategy DuplicatesStrategy.INCLUDE
+ if (project.ext.has('excluded_files') &&
project.ext.excluded_files.containsKey('main')) {
+ project.ext.excluded_files.main.each { f -> copyTask.exclude "**/${f}" }
}
- // append copied sources to srcDirs
- sourceSets."$scope"."$type".srcDirs "$sourceBaseCopy/$scope/$type"
}
-if (copySourceBase) {
- // Copy source base into build directory.
- // While this is not necessary, having multiple source sets referencing the
same shared base will typically confuse an IDE and harm developer experience.
- // The copySourceBase flag can be swapped without any implications and
allows to pick a main version that is actively worked on.
- useCopiedSourceSet("main", "java", compileJava)
- useCopiedSourceSet("main", "resources", processResources)
- useCopiedSourceSet("test", "java", compileTestJava)
- useCopiedSourceSet("test", "resources", processTestResources)
+def copyResourcesOverrides = tasks.register('copyResourcesOverrides', Copy) {
+ it.from main_resources_overrides
+ it.into "${sourceOverridesBase}/main/resources"
+ it.duplicatesStrategy DuplicatesStrategy.INCLUDE
+}
+
+def copyTestSourceOverrides = tasks.register('copyTestSourceOverrides', Copy)
{ copyTask ->
+ copyTask.from test_source_overrides
+ copyTask.into "${sourceOverridesBase}/test/java"
+ copyTask.duplicatesStrategy DuplicatesStrategy.INCLUDE
+ if (project.ext.has('excluded_files') &&
project.ext.excluded_files.containsKey('test')) {
+ project.ext.excluded_files.test.each { f -> copyTask.exclude "**/${f}" }
+ }
+}
+
+def copyTestResourcesOverrides = tasks.register('copyTestResourcesOverrides',
Copy) {
+ it.from test_resources_overrides
+ it.into "${sourceOverridesBase}/test/resources"
+ it.duplicatesStrategy DuplicatesStrategy.INCLUDE
+}
+
+def use_override = (spark_major != all_versions.first())
+def sourceBase = "${project.projectDir}/../src"
+
+if (use_override) {
+ compileJava.dependsOn copySourceOverrides
+ processResources.dependsOn copyResourcesOverrides
+ compileTestJava.dependsOn copyTestSourceOverrides
+ processTestResources.dependsOn copyTestResourcesOverrides
+
+ def sourcesJar = project.tasks.findByName('sourcesJar')
+ if (sourcesJar != null) {
+ sourcesJar.dependsOn copySourceOverrides
+ sourcesJar.dependsOn copyResourcesOverrides
+ }
+ def testSourcesJar = project.tasks.findByName('testSourcesJar')
+ if (testSourcesJar != null) {
+ testSourcesJar.dependsOn copyTestSourceOverrides
+ testSourcesJar.dependsOn copyTestResourcesOverrides
+ }
+ // Pin srcDirs explicitly so each higher version sees only its merged
overrides tree.
+ sourceSets {
+ main {
+ java { srcDirs = ["${sourceOverridesBase}/main/java"] }
+ resources { srcDirs = ["${sourceOverridesBase}/main/resources"] }
+ }
+ test {
+ java { srcDirs = ["${sourceOverridesBase}/test/java"] }
+ resources { srcDirs = ["${sourceOverridesBase}/test/resources"] }
+ }
+ }
} else {
- // append shared base sources to srcDirs
+ // Lowest supported Spark version: build straight from the shared base, no
copy step.
sourceSets {
main {
- java.srcDirs "${sourceBase}/main/java"
- resources.srcDirs "${sourceBase}/main/resources"
+ java { srcDirs = ["${sourceBase}/main/java"] }
+ resources { srcDirs = ["${sourceBase}/main/resources"] }
}
test {
- java.srcDirs "${sourceBase}/test/java"
- resources.srcDirs "${sourceBase}/test/resources"
+ java { srcDirs = ["${sourceBase}/test/java"] }
+ resources { srcDirs = ["${sourceBase}/test/resources"] }
Review Comment:
This deliberately mirrors `runners/flink/flink_runner.gradle`, where the
lowest supported version (`runners/flink/1.17/`) likewise does not add its own
`./src` to `srcDirs` — the base version *is* the shared source. Adding an
always-empty `./src/main/java` to the base would (a) diverge from the
established Flink pattern reviewers will compare against and (b) silently
encourage placing code in `runners/spark/3/src/` that the override layering
then can't see from higher versions, which is the exact failure mode this
refactor is trying to prevent. If a future Spark 3-only file is ever needed,
the cleanest move is to bump the base out from under it (or add the srcDir
then), not to pre-create the seam.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]