[GitHub] [iceberg] rdblue commented on a change in pull request #2512: Spark: Support building against both Spark 3.0 and Spark 3.1.

GitBox Sun, 02 May 2021 15:19:17 -0700


rdblue commented on a change in pull request #2512:
URL: https://github.com/apache/iceberg/pull/2512#discussion_r624788802




##########
File path: build.gradle
##########
@@ -921,33 +921,77 @@ if (jdkVersion == '8') {
 }
 
 project(':iceberg-spark3') {
-  dependencies {
-    compile project(':iceberg-api')
-    compile project(':iceberg-common')
-    compile project(':iceberg-core')
-    compile project(':iceberg-data')
-    compile project(':iceberg-orc')
-    compile project(':iceberg-parquet')
-    compile project(':iceberg-arrow')
-    compile project(':iceberg-hive-metastore')
-    compile project(':iceberg-spark')
 
-    compileOnly "org.apache.avro:avro"
-    compileOnly("org.apache.spark:spark-hive_2.12") {
-      exclude group: 'org.apache.avro', module: 'avro'
-      exclude group: 'org.apache.arrow'
+  ext {
+    commonDependencies = {
+      compile project(':iceberg-api')
+      compile project(':iceberg-common')
+      compile project(':iceberg-core')
+      compile project(':iceberg-data')
+      compile project(':iceberg-orc')
+      compile project(':iceberg-parquet')
+      compile project(':iceberg-arrow')
+      compile project(':iceberg-hive-metastore')
+      compile project(':iceberg-spark')
+
+      compileOnly "org.apache.avro:avro"
+
+      testCompile project(path: ':iceberg-spark', configuration: 
'testArtifacts')
+
+      testCompile("org.apache.hadoop:hadoop-minicluster") {
+        exclude group: 'org.apache.avro', module: 'avro'
+      }
+      testCompile project(path: ':iceberg-hive-metastore', configuration: 
'testArtifacts')
+      testCompile project(path: ':iceberg-api', configuration: 'testArtifacts')
+      testCompile project(path: ':iceberg-data', configuration: 
'testArtifacts')
     }
 
-    testCompile project(path: ':iceberg-spark', configuration: 'testArtifacts')
+    spark30Dependencies = {
+      compileOnly("org.apache.spark:spark-hive_2.12:3.0.1") {
+        exclude group: 'org.apache.avro', module: 'avro'
+        exclude group: 'org.apache.arrow'
+      }
+    }
 
-    testCompile("org.apache.hadoop:hadoop-minicluster") {
-      exclude group: 'org.apache.avro', module: 'avro'
+    spark31Dependencies = {
+      compileOnly("org.apache.spark:spark-hive_2.12:3.1.1") {
+        exclude group: 'org.apache.avro', module: 'avro'
+        exclude group: 'org.apache.arrow'
+      }
+    }
+  }
+
+  dependencies project.ext.spark30Dependencies
+  dependencies project.ext.commonDependencies
+
+  test {
+    // For vectorized reads
+    // Allow unsafe memory access to avoid the costly check arrow does to 
check if index is within bounds
+    systemProperty("arrow.enable_unsafe_memory_access", "true")
+    // Disable expensive null check for every get(index) call.
+    // Iceberg manages nullability checks itself instead of relying on arrow.
+    systemProperty("arrow.enable_null_check_for_get", "false")
+
+    // Vectorized reads need more memory
+    maxHeapSize '2500m'
+  }
+}
+
+project(':iceberg-spark31') {

Review comment:
       Do the changes work when using the compiled artifact from 
`iceberg-spark3` but testing in Spark 3.1? That's what we're ideally trying to 
achieve. It is nice to share source, but we would prefer not to have a separate 
module for every Spark version.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [iceberg] rdblue commented on a change in pull request #2512: Spark: Support building against both Spark 3.0 and Spark 3.1.

Reply via email to