This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 7ea2d9867c GH-35245: [Java][Dataset][Linux] Enable GCS (#35246)
7ea2d9867c is described below
commit 7ea2d9867cb90f9d0ca1db9b3bffa2fe8426e8ed
Author: henrymai <[email protected]>
AuthorDate: Thu Apr 20 10:35:09 2023 -0400
GH-35245: [Java][Dataset][Linux] Enable GCS (#35246)
### Rationale for this change
Enables GCS when building the Arrow Dataset for Java and also fixes various
java build failures.
Currently we are using our own custom Arrow Dataset build with GCS turned
on, but we would rather this be enabled in the official releases from Arrow.
GCS support is already enabled for cpp, python, ruby, python, and r
already, so there should be no reason not to enable this on java as well.
### What changes are included in this PR?
- Changes to enable GCS for Java Arrow Dataset on just Linux for now.
- Fixes to flight-sql-jdbc-driver/pom.xml. Without these fixes the
flight-sql-jdbc-driver build will fail with the following errors:
```
[WARNING] Used undeclared dependencies found:
[WARNING] org.bouncycastle:bcpkix-jdk15on:jar:1.61:runtime
[WARNING]
org.apache.arrow:arrow-memory-core:jar:12.0.0-SNAPSHOT:runtime
[WARNING] org.hamcrest:hamcrest:jar:2.2:runtime
[WARNING] org.apache.arrow:flight-sql:jar:12.0.0-SNAPSHOT:runtime
[WARNING] org.mockito:mockito-core:jar:2.25.1:test
[WARNING] org.apache.arrow:flight-core:jar:12.0.0-SNAPSHOT:runtime
[WARNING] org.slf4j:slf4j-api:jar:1.7.25:runtime
[WARNING] io.netty:netty-common:jar:4.1.82.Final:runtime
[WARNING] joda-time:joda-time:jar:2.10.14:runtime
[WARNING] org.apache.calcite.avatica:avatica:jar:1.18.0:runtime
[WARNING] com.google.protobuf:protobuf-java:jar:3.21.6:runtime
[WARNING] org.apache.arrow:arrow-vector:jar:12.0.0-SNAPSHOT:runtime
[WARNING] com.google.guava:guava:jar:31.1-jre:runtime
[...]
[ERROR] Failed to execute goal
org.apache.maven.plugins:maven-dependency-plugin:3.0.1:analyze-only (analyze)
on project flight-sql-jdbc-driver: Dependency problems found -> [Help 1]
```
```
Caused by: java.lang.NullPointerException: Could not find test data
path. Set the environment variable ARROW_TEST_DATA or the JVM property
arrow.test.dataRoot.
at java.util.Objects.requireNonNull(Objects.java:228)
at
org.apache.arrow.driver.jdbc.utils.FlightSqlTestCertificates.getTestDataRoot(FlightSqlTestCertificates.java:40)
at
org.apache.arrow.driver.jdbc.utils.FlightSqlTestCertificates.getFlightTestDataRoot(FlightSqlTestCertificates.java:51)
at
org.apache.arrow.driver.jdbc.utils.FlightSqlTestCertificates.exampleTlsCerts(FlightSqlTestCertificates.java:60)
at
org.apache.arrow.driver.jdbc.ConnectionTlsTest.<clinit>(ConnectionTlsTest.java:59)
```
### Are these changes tested?
I've tested the build by running:
```
$HOME/.local/bin/archery docker run java-jni-manylinux-2014
```
I've also tested the resulting
`./java/dataset/target/arrow-dataset-12.0.0-SNAPSHOT.jar` from running the
command and have verified that GCS support is enabled.
### Are there any user-facing changes?
Yes, Java Arrow Dataset will now work with GCS.
* Closes: #35245
Authored-by: Henry Mai <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
ci/docker/java-jni-manylinux-201x.dockerfile | 4 +
ci/scripts/java_jni_manylinux_build.sh | 2 +
java/flight/flight-sql-jdbc-driver/pom.xml | 123 +++++++++++++++++++++++++++
3 files changed, 129 insertions(+)
diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile
b/ci/docker/java-jni-manylinux-201x.dockerfile
index 810949ab26..6374e40641 100644
--- a/ci/docker/java-jni-manylinux-201x.dockerfile
+++ b/ci/docker/java-jni-manylinux-201x.dockerfile
@@ -37,6 +37,10 @@ ARG java=1.8.0
RUN yum install -y java-$java-openjdk-devel rh-maven35 && yum clean all
ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/
+# Install the gcs testbench
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
+RUN PYTHON=python /arrow/ci/scripts/install_gcs_testbench.sh default
+
# For ci/scripts/{cpp,java}_*.sh
ENV ARROW_HOME=/tmp/local \
ARROW_JAVA_CDATA=ON \
diff --git a/ci/scripts/java_jni_manylinux_build.sh
b/ci/scripts/java_jni_manylinux_build.sh
index 7480aa75ae..3aadc71ebb 100755
--- a/ci/scripts/java_jni_manylinux_build.sh
+++ b/ci/scripts/java_jni_manylinux_build.sh
@@ -39,6 +39,7 @@ export ARROW_ACERO
export ARROW_DATASET
: ${ARROW_GANDIVA:=ON}
export ARROW_GANDIVA
+: ${ARROW_GCS:=ON}
: ${ARROW_JEMALLOC:=ON}
: ${ARROW_RPATH_ORIGIN:=ON}
: ${ARROW_ORC:=ON}
@@ -75,6 +76,7 @@ cmake \
-DARROW_DEPENDENCY_USE_SHARED=OFF \
-DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \
-DARROW_GANDIVA=${ARROW_GANDIVA} \
+ -DARROW_GCS=${ARROW_GCS} \
-DARROW_JEMALLOC=${ARROW_JEMALLOC} \
-DARROW_ORC=${ARROW_ORC} \
-DARROW_PARQUET=${ARROW_PARQUET} \
diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml
b/java/flight/flight-sql-jdbc-driver/pom.xml
index 0f8ccd43e3..1f2a08547e 100644
--- a/java/flight/flight-sql-jdbc-driver/pom.xml
+++ b/java/flight/flight-sql-jdbc-driver/pom.xml
@@ -28,16 +28,139 @@
<url>https://arrow.apache.org</url>
<dependencies>
+ <!-- https://mvnrepository.com/artifact/org.hamcrest/hamcrest-core -->
+ <dependency>
+ <groupId>org.hamcrest</groupId>
+ <artifactId>hamcrest-core</artifactId>
+ <version>1.3</version>
+ <scope>test</scope>
+ </dependency>
+
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>flight-sql-jdbc-core</artifactId>
<version>${project.version}</version>
<scope>runtime</scope>
</dependency>
+
+ <dependency>
+ <groupId>org.bouncycastle</groupId>
+ <artifactId>bcpkix-jdk15on</artifactId>
+ <version>1.61</version>
+ <scope>runtime</scope>
+ </dependency>
+
+ <!--
https://mvnrepository.com/artifact/org.apache.arrow/arrow-memory-core -->
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ <scope>runtime</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>flight-sql</artifactId>
+ <version>${project.version}</version>
+ <scope>runtime</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-core</artifactId>
+ <version>3.12.4</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-inline</artifactId>
+ <version>3.12.4</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>flight-core</artifactId>
+ <version>${project.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-transport-native-kqueue</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-transport-native-epoll</artifactId>
+ </exclusion>
+ </exclusions>
+ <scope>runtime</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <scope>runtime</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-common</artifactId>
+ <scope>runtime</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>joda-time</groupId>
+ <artifactId>joda-time</artifactId>
+ <version>2.10.14</version>
+ <scope>runtime</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.calcite.avatica</groupId>
+ <artifactId>avatica</artifactId>
+ <version>1.18.0</version>
+ <scope>runtime</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ <scope>runtime</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-vector</artifactId>
+ <version>${project.version}</version>
+ <classifier>${arrow.vector.classifier}</classifier>
+ <scope>runtime</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <scope>runtime</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>2.7</version>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
<plugins>
+ <plugin>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <enableAssertions>false</enableAssertions>
+ <systemPropertyVariables>
+
<arrow.test.dataRoot>${project.basedir}/../../../testing/data</arrow.test.dataRoot>
+ </systemPropertyVariables>
+ </configuration>
+ </plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>