[SPARK-13595][BUILD] Move docker, extras modules into external ## What changes were proposed in this pull request?
Move `docker` dirs out of top level into `external/`; move `extras/*` into `external/` ## How was this patch tested? This is tested with Jenkins tests. Author: Sean Owen <so...@cloudera.com> Closes #11523 from srowen/SPARK-13595. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/256704c7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/256704c7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/256704c7 Branch: refs/heads/master Commit: 256704c771d301700af9ebf0d180c1ba7c4116c0 Parents: 7791d0c Author: Sean Owen <so...@cloudera.com> Authored: Wed Mar 9 18:27:44 2016 +0000 Committer: Sean Owen <so...@cloudera.com> Committed: Wed Mar 9 18:27:44 2016 +0000 ---------------------------------------------------------------------- dev/sparktestsupport/modules.py | 4 +- docker-integration-tests/pom.xml | 184 ---- .../sql/jdbc/DockerJDBCIntegrationSuite.scala | 160 ---- .../spark/sql/jdbc/MySQLIntegrationSuite.scala | 153 ---- .../spark/sql/jdbc/OracleIntegrationSuite.scala | 78 -- .../sql/jdbc/PostgresIntegrationSuite.scala | 105 --- .../org/apache/spark/util/DockerUtils.scala | 68 -- docker/README.md | 7 - docker/build | 22 - docker/spark-mesos/Dockerfile | 30 - docker/spark-test/README.md | 11 - docker/spark-test/base/Dockerfile | 37 - docker/spark-test/build | 22 - docker/spark-test/master/Dockerfile | 21 - docker/spark-test/master/default_cmd | 28 - docker/spark-test/worker/Dockerfile | 22 - docker/spark-test/worker/default_cmd | 28 - docs/streaming-kinesis-integration.md | 10 +- external/docker-integration-tests/pom.xml | 184 ++++ .../sql/jdbc/DockerJDBCIntegrationSuite.scala | 160 ++++ .../spark/sql/jdbc/MySQLIntegrationSuite.scala | 153 ++++ .../spark/sql/jdbc/OracleIntegrationSuite.scala | 78 ++ .../sql/jdbc/PostgresIntegrationSuite.scala | 105 +++ .../org/apache/spark/util/DockerUtils.scala | 68 ++ external/docker/README.md | 7 + external/docker/build | 22 + external/docker/spark-mesos/Dockerfile | 30 + external/docker/spark-test/README.md | 11 + external/docker/spark-test/base/Dockerfile | 37 + external/docker/spark-test/build | 22 + external/docker/spark-test/master/Dockerfile | 21 + external/docker/spark-test/master/default_cmd | 28 + external/docker/spark-test/worker/Dockerfile | 22 + external/docker/spark-test/worker/default_cmd | 28 + external/java8-tests/README.md | 24 + external/java8-tests/pom.xml | 161 ++++ .../java/org/apache/spark/Java8APISuite.java | 393 ++++++++ .../apache/spark/streaming/Java8APISuite.java | 905 +++++++++++++++++++ .../src/test/resources/log4j.properties | 28 + .../scala/org/apache/spark/JDK8ScalaSuite.scala | 27 + external/kinesis-asl-assembly/pom.xml | 181 ++++ external/kinesis-asl/pom.xml | 87 ++ .../streaming/JavaKinesisWordCountASL.java | 189 ++++ .../examples/streaming/kinesis_wordcount_asl.py | 83 ++ .../src/main/resources/log4j.properties | 37 + .../streaming/KinesisWordCountASL.scala | 276 ++++++ .../kinesis/KinesisBackedBlockRDD.scala | 288 ++++++ .../streaming/kinesis/KinesisCheckpointer.scala | 133 +++ .../streaming/kinesis/KinesisInputDStream.scala | 76 ++ .../streaming/kinesis/KinesisReceiver.scala | 361 ++++++++ .../kinesis/KinesisRecordProcessor.scala | 177 ++++ .../streaming/kinesis/KinesisTestUtils.scala | 260 ++++++ .../spark/streaming/kinesis/KinesisUtils.scala | 560 ++++++++++++ .../kinesis/JavaKinesisStreamSuite.java | 62 ++ .../src/test/resources/log4j.properties | 27 + .../kinesis/KPLBasedKinesisTestUtils.scala | 72 ++ .../kinesis/KinesisBackedBlockRDDSuite.scala | 259 ++++++ .../kinesis/KinesisCheckpointerSuite.scala | 152 ++++ .../streaming/kinesis/KinesisFunSuite.scala | 46 + .../kinesis/KinesisReceiverSuite.scala | 210 +++++ .../streaming/kinesis/KinesisStreamSuite.scala | 297 ++++++ external/spark-ganglia-lgpl/pom.xml | 49 + .../apache/spark/metrics/sink/GangliaSink.scala | 90 ++ extras/README.md | 1 - extras/java8-tests/README.md | 24 - extras/java8-tests/pom.xml | 161 ---- .../java/org/apache/spark/Java8APISuite.java | 393 -------- .../apache/spark/streaming/Java8APISuite.java | 905 ------------------- .../src/test/resources/log4j.properties | 28 - .../scala/org/apache/spark/JDK8ScalaSuite.scala | 27 - extras/kinesis-asl-assembly/pom.xml | 181 ---- extras/kinesis-asl/pom.xml | 87 -- .../streaming/JavaKinesisWordCountASL.java | 189 ---- .../examples/streaming/kinesis_wordcount_asl.py | 83 -- .../src/main/resources/log4j.properties | 37 - .../streaming/KinesisWordCountASL.scala | 276 ------ .../kinesis/KinesisBackedBlockRDD.scala | 288 ------ .../streaming/kinesis/KinesisCheckpointer.scala | 133 --- .../streaming/kinesis/KinesisInputDStream.scala | 76 -- .../streaming/kinesis/KinesisReceiver.scala | 361 -------- .../kinesis/KinesisRecordProcessor.scala | 177 ---- .../streaming/kinesis/KinesisTestUtils.scala | 260 ------ .../spark/streaming/kinesis/KinesisUtils.scala | 560 ------------ .../kinesis/JavaKinesisStreamSuite.java | 62 -- .../src/test/resources/log4j.properties | 27 - .../kinesis/KPLBasedKinesisTestUtils.scala | 72 -- .../kinesis/KinesisBackedBlockRDDSuite.scala | 259 ------ .../kinesis/KinesisCheckpointerSuite.scala | 152 ---- .../streaming/kinesis/KinesisFunSuite.scala | 46 - .../kinesis/KinesisReceiverSuite.scala | 210 ----- .../streaming/kinesis/KinesisStreamSuite.scala | 297 ------ extras/spark-ganglia-lgpl/pom.xml | 49 - .../apache/spark/metrics/sink/GangliaSink.scala | 90 -- pom.xml | 10 +- python/pyspark/streaming/tests.py | 2 +- 95 files changed, 6499 insertions(+), 6500 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/dev/sparktestsupport/modules.py ---------------------------------------------------------------------- diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index e4f2eda..1781de4 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -195,8 +195,8 @@ streaming_kinesis_asl = Module( name="streaming-kinesis-asl", dependencies=[], source_file_regexes=[ - "extras/kinesis-asl/", - "extras/kinesis-asl-assembly/", + "external/kinesis-asl/", + "external/kinesis-asl-assembly/", ], build_profile_flags=[ "-Pkinesis-asl", http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker-integration-tests/pom.xml ---------------------------------------------------------------------- diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml deleted file mode 100644 index 048e58d..0000000 --- a/docker-integration-tests/pom.xml +++ /dev/null @@ -1,184 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - ~ Licensed to the Apache Software Foundation (ASF) under one or more - ~ contributor license agreements. See the NOTICE file distributed with - ~ this work for additional information regarding copyright ownership. - ~ The ASF licenses this file to You under the Apache License, Version 2.0 - ~ (the "License"); you may not use this file except in compliance with - ~ the License. You may obtain a copy of the License at - ~ - ~ http://www.apache.org/licenses/LICENSE-2.0 - ~ - ~ Unless required by applicable law or agreed to in writing, software - ~ distributed under the License is distributed on an "AS IS" BASIS, - ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ~ See the License for the specific language governing permissions and - ~ limitations under the License. - --> - -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - <parent> - <groupId>org.apache.spark</groupId> - <artifactId>spark-parent_2.11</artifactId> - <version>2.0.0-SNAPSHOT</version> - <relativePath>../pom.xml</relativePath> - </parent> - - <artifactId>spark-docker-integration-tests_2.11</artifactId> - <packaging>jar</packaging> - <name>Spark Project Docker Integration Tests</name> - <url>http://spark.apache.org/</url> - <properties> - <sbt.project.name>docker-integration-tests</sbt.project.name> - </properties> - - <dependencies> - <dependency> - <groupId>com.spotify</groupId> - <artifactId>docker-client</artifactId> - <classifier>shaded</classifier> - <scope>test</scope> - <!-- - See https://github.com/spotify/docker-client/pull/272#issuecomment-155249101 - for an explanation of why these exclusions are (necessarily) a mess. - --> - <exclusions> - <exclusion> - <groupId>com.fasterxml.jackson.jaxrs</groupId> - <artifactId>jackson-jaxrs-json-provider</artifactId> - </exclusion> - <exclusion> - <groupId>com.fasterxml.jackson.datatype</groupId> - <artifactId>jackson-datatype-guava</artifactId> - </exclusion> - <exclusion> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-databind</artifactId> - </exclusion> - <exclusion> - <groupId>org.glassfish.jersey.core</groupId> - <artifactId>jersey-client</artifactId> - </exclusion> - <exclusion> - <groupId>org.glassfish.jersey.connectors</groupId> - <artifactId>jersey-apache-connector</artifactId> - </exclusion> - <exclusion> - <groupId>org.glassfish.jersey.media</groupId> - <artifactId>jersey-media-json-jackson</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.httpcomponents</groupId> - <artifactId>httpclient</artifactId> - <version>4.5</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.apache.httpcomponents</groupId> - <artifactId>httpcore</artifactId> - <version>4.4.1</version> - <scope>test</scope> - </dependency> - <!-- Necessary in order to avoid errors in log messages: --> - <dependency> - <groupId>com.google.guava</groupId> - <artifactId>guava</artifactId> - <version>18.0</version> - </dependency> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-core_${scala.binary.version}</artifactId> - <version>${project.version}</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-core_${scala.binary.version}</artifactId> - <version>${project.version}</version> - <type>test-jar</type> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-sql_${scala.binary.version}</artifactId> - <version>${project.version}</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-sql_${scala.binary.version}</artifactId> - <version>${project.version}</version> - <type>test-jar</type> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-test-tags_${scala.binary.version}</artifactId> - <version>${project.version}</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>mysql</groupId> - <artifactId>mysql-connector-java</artifactId> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.postgresql</groupId> - <artifactId>postgresql</artifactId> - <scope>test</scope> - </dependency> - <!-- Oracle ojdbc jar, used for oracle integration suite for docker testing. - See https://github.com/apache/spark/pull/11306 for background on why we need - to use a an ojdbc jar for the testcase. The maven dependency here is commented - because currently the maven repository does not contain the ojdbc jar mentioned. - Once the jar is available in maven, this could be uncommented. --> - <!-- - <dependency> - <groupId>com.oracle</groupId> - <artifactId>ojdbc6</artifactId> - <version>11.2.0.2.0</version> - <scope>test</scope> - </dependency> - --> - <!-- Jersey dependencies, used to override version. - See https://github.com/apache/spark/pull/9503#issuecomment-154369560 for - background on why we need to use a newer Jersey only in this test module; - we can remove this once https://github.com/spotify/docker-client/pull/272 is - merged and a new docker-client release is published. --> - <dependency> - <groupId>com.sun.jersey</groupId> - <artifactId>jersey-server</artifactId> - <version>1.19</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>com.sun.jersey</groupId> - <artifactId>jersey-core</artifactId> - <version>1.19</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>com.sun.jersey</groupId> - <artifactId>jersey-servlet</artifactId> - <version>1.19</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>com.sun.jersey</groupId> - <artifactId>jersey-json</artifactId> - <version>1.19</version> - <scope>test</scope> - <exclusions> - <exclusion> - <groupId>stax</groupId> - <artifactId>stax-api</artifactId> - </exclusion> - </exclusions> - </dependency> - <!-- End Jersey dependencies --> - </dependencies> -</project> http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala ---------------------------------------------------------------------- diff --git a/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala deleted file mode 100644 index f73231f..0000000 --- a/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.jdbc - -import java.net.ServerSocket -import java.sql.Connection - -import scala.collection.JavaConverters._ -import scala.util.control.NonFatal - -import com.spotify.docker.client._ -import com.spotify.docker.client.messages.{ContainerConfig, HostConfig, PortBinding} -import org.scalatest.BeforeAndAfterAll -import org.scalatest.concurrent.Eventually -import org.scalatest.time.SpanSugar._ - -import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.test.SharedSQLContext -import org.apache.spark.util.DockerUtils - -abstract class DatabaseOnDocker { - /** - * The docker image to be pulled. - */ - val imageName: String - - /** - * Environment variables to set inside of the Docker container while launching it. - */ - val env: Map[String, String] - - /** - * The container-internal JDBC port that the database listens on. - */ - val jdbcPort: Int - - /** - * Return a JDBC URL that connects to the database running at the given IP address and port. - */ - def getJdbcUrl(ip: String, port: Int): String -} - -abstract class DockerJDBCIntegrationSuite - extends SparkFunSuite - with BeforeAndAfterAll - with Eventually - with SharedSQLContext { - - val db: DatabaseOnDocker - - private var docker: DockerClient = _ - private var containerId: String = _ - protected var jdbcUrl: String = _ - - override def beforeAll() { - super.beforeAll() - try { - docker = DefaultDockerClient.fromEnv.build() - // Check that Docker is actually up - try { - docker.ping() - } catch { - case NonFatal(e) => - log.error("Exception while connecting to Docker. Check whether Docker is running.") - throw e - } - // Ensure that the Docker image is installed: - try { - docker.inspectImage(db.imageName) - } catch { - case e: ImageNotFoundException => - log.warn(s"Docker image ${db.imageName} not found; pulling image from registry") - docker.pull(db.imageName) - } - // Configure networking (necessary for boot2docker / Docker Machine) - val externalPort: Int = { - val sock = new ServerSocket(0) - val port = sock.getLocalPort - sock.close() - port - } - val dockerIp = DockerUtils.getDockerIp() - val hostConfig: HostConfig = HostConfig.builder() - .networkMode("bridge") - .portBindings( - Map(s"${db.jdbcPort}/tcp" -> List(PortBinding.of(dockerIp, externalPort)).asJava).asJava) - .build() - // Create the database container: - val config = ContainerConfig.builder() - .image(db.imageName) - .networkDisabled(false) - .env(db.env.map { case (k, v) => s"$k=$v" }.toSeq.asJava) - .hostConfig(hostConfig) - .exposedPorts(s"${db.jdbcPort}/tcp") - .build() - containerId = docker.createContainer(config).id - // Start the container and wait until the database can accept JDBC connections: - docker.startContainer(containerId) - jdbcUrl = db.getJdbcUrl(dockerIp, externalPort) - eventually(timeout(60.seconds), interval(1.seconds)) { - val conn = java.sql.DriverManager.getConnection(jdbcUrl) - conn.close() - } - // Run any setup queries: - val conn: Connection = java.sql.DriverManager.getConnection(jdbcUrl) - try { - dataPreparation(conn) - } finally { - conn.close() - } - } catch { - case NonFatal(e) => - try { - afterAll() - } finally { - throw e - } - } - } - - override def afterAll() { - try { - if (docker != null) { - try { - if (containerId != null) { - docker.killContainer(containerId) - docker.removeContainer(containerId) - } - } catch { - case NonFatal(e) => - logWarning(s"Could not stop container $containerId", e) - } finally { - docker.close() - } - } - } finally { - super.afterAll() - } - } - - /** - * Prepare databases and tables for testing. - */ - def dataPreparation(connection: Connection): Unit -} http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala ---------------------------------------------------------------------- diff --git a/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala deleted file mode 100644 index c68e4dc..0000000 --- a/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.jdbc - -import java.math.BigDecimal -import java.sql.{Connection, Date, Timestamp} -import java.util.Properties - -import org.apache.spark.tags.DockerTest - -@DockerTest -class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { - override val db = new DatabaseOnDocker { - override val imageName = "mysql:5.7.9" - override val env = Map( - "MYSQL_ROOT_PASSWORD" -> "rootpass" - ) - override val jdbcPort: Int = 3306 - override def getJdbcUrl(ip: String, port: Int): String = - s"jdbc:mysql://$ip:$port/mysql?user=root&password=rootpass" - } - - override def dataPreparation(conn: Connection): Unit = { - conn.prepareStatement("CREATE DATABASE foo").executeUpdate() - conn.prepareStatement("CREATE TABLE tbl (x INTEGER, y TEXT(8))").executeUpdate() - conn.prepareStatement("INSERT INTO tbl VALUES (42,'fred')").executeUpdate() - conn.prepareStatement("INSERT INTO tbl VALUES (17,'dave')").executeUpdate() - - conn.prepareStatement("CREATE TABLE numbers (onebit BIT(1), tenbits BIT(10), " - + "small SMALLINT, med MEDIUMINT, nor INT, big BIGINT, deci DECIMAL(40,20), flt FLOAT, " - + "dbl DOUBLE)").executeUpdate() - conn.prepareStatement("INSERT INTO numbers VALUES (b'0', b'1000100101', " - + "17, 77777, 123456789, 123456789012345, 123456789012345.123456789012345, " - + "42.75, 1.0000000000000002)").executeUpdate() - - conn.prepareStatement("CREATE TABLE dates (d DATE, t TIME, dt DATETIME, ts TIMESTAMP, " - + "yr YEAR)").executeUpdate() - conn.prepareStatement("INSERT INTO dates VALUES ('1991-11-09', '13:31:24', " - + "'1996-01-01 01:23:45', '2009-02-13 23:31:30', '2001')").executeUpdate() - - // TODO: Test locale conversion for strings. - conn.prepareStatement("CREATE TABLE strings (a CHAR(10), b VARCHAR(10), c TINYTEXT, " - + "d TEXT, e MEDIUMTEXT, f LONGTEXT, g BINARY(4), h VARBINARY(10), i BLOB)" - ).executeUpdate() - conn.prepareStatement("INSERT INTO strings VALUES ('the', 'quick', 'brown', 'fox', " + - "'jumps', 'over', 'the', 'lazy', 'dog')").executeUpdate() - } - - test("Basic test") { - val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties) - val rows = df.collect() - assert(rows.length == 2) - val types = rows(0).toSeq.map(x => x.getClass.toString) - assert(types.length == 2) - assert(types(0).equals("class java.lang.Integer")) - assert(types(1).equals("class java.lang.String")) - } - - test("Numeric types") { - val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties) - val rows = df.collect() - assert(rows.length == 1) - val types = rows(0).toSeq.map(x => x.getClass.toString) - assert(types.length == 9) - assert(types(0).equals("class java.lang.Boolean")) - assert(types(1).equals("class java.lang.Long")) - assert(types(2).equals("class java.lang.Integer")) - assert(types(3).equals("class java.lang.Integer")) - assert(types(4).equals("class java.lang.Integer")) - assert(types(5).equals("class java.lang.Long")) - assert(types(6).equals("class java.math.BigDecimal")) - assert(types(7).equals("class java.lang.Double")) - assert(types(8).equals("class java.lang.Double")) - assert(rows(0).getBoolean(0) == false) - assert(rows(0).getLong(1) == 0x225) - assert(rows(0).getInt(2) == 17) - assert(rows(0).getInt(3) == 77777) - assert(rows(0).getInt(4) == 123456789) - assert(rows(0).getLong(5) == 123456789012345L) - val bd = new BigDecimal("123456789012345.12345678901234500000") - assert(rows(0).getAs[BigDecimal](6).equals(bd)) - assert(rows(0).getDouble(7) == 42.75) - assert(rows(0).getDouble(8) == 1.0000000000000002) - } - - test("Date types") { - val df = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties) - val rows = df.collect() - assert(rows.length == 1) - val types = rows(0).toSeq.map(x => x.getClass.toString) - assert(types.length == 5) - assert(types(0).equals("class java.sql.Date")) - assert(types(1).equals("class java.sql.Timestamp")) - assert(types(2).equals("class java.sql.Timestamp")) - assert(types(3).equals("class java.sql.Timestamp")) - assert(types(4).equals("class java.sql.Date")) - assert(rows(0).getAs[Date](0).equals(Date.valueOf("1991-11-09"))) - assert(rows(0).getAs[Timestamp](1).equals(Timestamp.valueOf("1970-01-01 13:31:24"))) - assert(rows(0).getAs[Timestamp](2).equals(Timestamp.valueOf("1996-01-01 01:23:45"))) - assert(rows(0).getAs[Timestamp](3).equals(Timestamp.valueOf("2009-02-13 23:31:30"))) - assert(rows(0).getAs[Date](4).equals(Date.valueOf("2001-01-01"))) - } - - test("String types") { - val df = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties) - val rows = df.collect() - assert(rows.length == 1) - val types = rows(0).toSeq.map(x => x.getClass.toString) - assert(types.length == 9) - assert(types(0).equals("class java.lang.String")) - assert(types(1).equals("class java.lang.String")) - assert(types(2).equals("class java.lang.String")) - assert(types(3).equals("class java.lang.String")) - assert(types(4).equals("class java.lang.String")) - assert(types(5).equals("class java.lang.String")) - assert(types(6).equals("class [B")) - assert(types(7).equals("class [B")) - assert(types(8).equals("class [B")) - assert(rows(0).getString(0).equals("the")) - assert(rows(0).getString(1).equals("quick")) - assert(rows(0).getString(2).equals("brown")) - assert(rows(0).getString(3).equals("fox")) - assert(rows(0).getString(4).equals("jumps")) - assert(rows(0).getString(5).equals("over")) - assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](6), Array[Byte](116, 104, 101, 0))) - assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](7), Array[Byte](108, 97, 122, 121))) - assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](8), Array[Byte](100, 111, 103))) - } - - test("Basic write test") { - val df1 = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties) - val df2 = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties) - val df3 = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties) - df1.write.jdbc(jdbcUrl, "numberscopy", new Properties) - df2.write.jdbc(jdbcUrl, "datescopy", new Properties) - df3.write.jdbc(jdbcUrl, "stringscopy", new Properties) - } -} http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala ---------------------------------------------------------------------- diff --git a/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala deleted file mode 100644 index 8a0f938..0000000 --- a/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.jdbc - -import java.sql.Connection -import java.util.Properties - -import org.apache.spark.sql.test.SharedSQLContext -import org.apache.spark.tags.DockerTest - -/** - * This patch was tested using the Oracle docker. Created this integration suite for the same. - * The ojdbc6-11.2.0.2.0.jar was to be downloaded from the maven repository. Since there was - * no jdbc jar available in the maven repository, the jar was downloaded from oracle site - * manually and installed in the local; thus tested. So, for SparkQA test case run, the - * ojdbc jar might be manually placed in the local maven repository(com/oracle/ojdbc6/11.2.0.2.0) - * while Spark QA test run. - * - * The following would be the steps to test this - * 1. Pull oracle 11g image - docker pull wnameless/oracle-xe-11g - * 2. Start docker - sudo service docker start - * 3. Download oracle 11g driver jar and put it in maven local repo: - * (com/oracle/ojdbc6/11.2.0.2.0/ojdbc6-11.2.0.2.0.jar) - * 4. The timeout and interval parameter to be increased from 60,1 to a high value for oracle test - * in DockerJDBCIntegrationSuite.scala (Locally tested with 200,200 and executed successfully). - * 5. Run spark test - ./build/sbt "test-only org.apache.spark.sql.jdbc.OracleIntegrationSuite" - * - * All tests in this suite are ignored because of the dependency with the oracle jar from maven - * repository. - */ -@DockerTest -class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLContext { - import testImplicits._ - - override val db = new DatabaseOnDocker { - override val imageName = "wnameless/oracle-xe-11g:latest" - override val env = Map( - "ORACLE_ROOT_PASSWORD" -> "oracle" - ) - override val jdbcPort: Int = 1521 - override def getJdbcUrl(ip: String, port: Int): String = - s"jdbc:oracle:thin:system/oracle@//$ip:$port/xe" - } - - override def dataPreparation(conn: Connection): Unit = { - } - - ignore("SPARK-12941: String datatypes to be mapped to Varchar in Oracle") { - // create a sample dataframe with string type - val df1 = sparkContext.parallelize(Seq(("foo"))).toDF("x") - // write the dataframe to the oracle table tbl - df1.write.jdbc(jdbcUrl, "tbl2", new Properties) - // read the table from the oracle - val dfRead = sqlContext.read.jdbc(jdbcUrl, "tbl2", new Properties) - // get the rows - val rows = dfRead.collect() - // verify the data type is inserted - val types = rows(0).toSeq.map(x => x.getClass.toString) - assert(types(0).equals("class java.lang.String")) - // verify the value is the inserted correct or not - assert(rows(0).getString(0).equals("foo")) - } -} http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala ---------------------------------------------------------------------- diff --git a/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala deleted file mode 100644 index d55cdcf..0000000 --- a/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.jdbc - -import java.sql.Connection -import java.util.Properties - -import org.apache.spark.sql.Column -import org.apache.spark.sql.catalyst.expressions.Literal -import org.apache.spark.sql.types.{ArrayType, DecimalType} -import org.apache.spark.tags.DockerTest - -@DockerTest -class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { - override val db = new DatabaseOnDocker { - override val imageName = "postgres:9.4.5" - override val env = Map( - "POSTGRES_PASSWORD" -> "rootpass" - ) - override val jdbcPort = 5432 - override def getJdbcUrl(ip: String, port: Int): String = - s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass" - } - - override def dataPreparation(conn: Connection): Unit = { - conn.prepareStatement("CREATE DATABASE foo").executeUpdate() - conn.setCatalog("foo") - conn.prepareStatement("CREATE TYPE enum_type AS ENUM ('d1', 'd2')").executeUpdate() - conn.prepareStatement("CREATE TABLE bar (c0 text, c1 integer, c2 double precision, c3 bigint, " - + "c4 bit(1), c5 bit(10), c6 bytea, c7 boolean, c8 inet, c9 cidr, " - + "c10 integer[], c11 text[], c12 real[], c13 numeric(2,2)[], c14 enum_type)").executeUpdate() - conn.prepareStatement("INSERT INTO bar VALUES ('hello', 42, 1.25, 123456789012345, B'0', " - + "B'1000100101', E'\\\\xDEADBEEF', true, '172.16.0.42', '192.168.0.0/16', " - + """'{1, 2}', '{"a", null, "b"}', '{0.11, 0.22}', '{0.11, 0.22}', 'd1')""").executeUpdate() - } - - test("Type mapping for various types") { - val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties) - val rows = df.collect() - assert(rows.length == 1) - val types = rows(0).toSeq.map(x => x.getClass) - assert(types.length == 15) - assert(classOf[String].isAssignableFrom(types(0))) - assert(classOf[java.lang.Integer].isAssignableFrom(types(1))) - assert(classOf[java.lang.Double].isAssignableFrom(types(2))) - assert(classOf[java.lang.Long].isAssignableFrom(types(3))) - assert(classOf[java.lang.Boolean].isAssignableFrom(types(4))) - assert(classOf[Array[Byte]].isAssignableFrom(types(5))) - assert(classOf[Array[Byte]].isAssignableFrom(types(6))) - assert(classOf[java.lang.Boolean].isAssignableFrom(types(7))) - assert(classOf[String].isAssignableFrom(types(8))) - assert(classOf[String].isAssignableFrom(types(9))) - assert(classOf[Seq[Int]].isAssignableFrom(types(10))) - assert(classOf[Seq[String]].isAssignableFrom(types(11))) - assert(classOf[Seq[Double]].isAssignableFrom(types(12))) - assert(classOf[Seq[BigDecimal]].isAssignableFrom(types(13))) - assert(classOf[String].isAssignableFrom(types(14))) - assert(rows(0).getString(0).equals("hello")) - assert(rows(0).getInt(1) == 42) - assert(rows(0).getDouble(2) == 1.25) - assert(rows(0).getLong(3) == 123456789012345L) - assert(!rows(0).getBoolean(4)) - // BIT(10)'s come back as ASCII strings of ten ASCII 0's and 1's... - assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](5), - Array[Byte](49, 48, 48, 48, 49, 48, 48, 49, 48, 49))) - assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](6), - Array[Byte](0xDE.toByte, 0xAD.toByte, 0xBE.toByte, 0xEF.toByte))) - assert(rows(0).getBoolean(7)) - assert(rows(0).getString(8) == "172.16.0.42") - assert(rows(0).getString(9) == "192.168.0.0/16") - assert(rows(0).getSeq(10) == Seq(1, 2)) - assert(rows(0).getSeq(11) == Seq("a", null, "b")) - assert(rows(0).getSeq(12).toSeq == Seq(0.11f, 0.22f)) - assert(rows(0).getSeq(13) == Seq("0.11", "0.22").map(BigDecimal(_).bigDecimal)) - assert(rows(0).getString(14) == "d1") - } - - test("Basic write test") { - val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties) - // Test only that it doesn't crash. - df.write.jdbc(jdbcUrl, "public.barcopy", new Properties) - // Test that written numeric type has same DataType as input - assert(sqlContext.read.jdbc(jdbcUrl, "public.barcopy", new Properties).schema(13).dataType == - ArrayType(DecimalType(2, 2), true)) - // Test write null values. - df.select(df.queryExecution.analyzed.output.map { a => - Column(Literal.create(null, a.dataType)).as(a.name) - }: _*).write.jdbc(jdbcUrl, "public.barcopy2", new Properties) - } -} http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker-integration-tests/src/test/scala/org/apache/spark/util/DockerUtils.scala ---------------------------------------------------------------------- diff --git a/docker-integration-tests/src/test/scala/org/apache/spark/util/DockerUtils.scala b/docker-integration-tests/src/test/scala/org/apache/spark/util/DockerUtils.scala deleted file mode 100644 index fda377e..0000000 --- a/docker-integration-tests/src/test/scala/org/apache/spark/util/DockerUtils.scala +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.util - -import java.net.{Inet4Address, InetAddress, NetworkInterface} - -import scala.collection.JavaConverters._ -import scala.sys.process._ -import scala.util.Try - -private[spark] object DockerUtils { - - def getDockerIp(): String = { - /** If docker-machine is setup on this box, attempts to find the ip from it. */ - def findFromDockerMachine(): Option[String] = { - sys.env.get("DOCKER_MACHINE_NAME").flatMap { name => - Try(Seq("/bin/bash", "-c", s"docker-machine ip $name 2>/dev/null").!!.trim).toOption - } - } - sys.env.get("DOCKER_IP") - .orElse(findFromDockerMachine()) - .orElse(Try(Seq("/bin/bash", "-c", "boot2docker ip 2>/dev/null").!!.trim).toOption) - .getOrElse { - // This block of code is based on Utils.findLocalInetAddress(), but is modified to blacklist - // certain interfaces. - val address = InetAddress.getLocalHost - // Address resolves to something like 127.0.1.1, which happens on Debian; try to find - // a better address using the local network interfaces - // getNetworkInterfaces returns ifs in reverse order compared to ifconfig output order - // on unix-like system. On windows, it returns in index order. - // It's more proper to pick ip address following system output order. - val blackListedIFs = Seq( - "vboxnet0", // Mac - "docker0" // Linux - ) - val activeNetworkIFs = NetworkInterface.getNetworkInterfaces.asScala.toSeq.filter { i => - !blackListedIFs.contains(i.getName) - } - val reOrderedNetworkIFs = activeNetworkIFs.reverse - for (ni <- reOrderedNetworkIFs) { - val addresses = ni.getInetAddresses.asScala - .filterNot(addr => addr.isLinkLocalAddress || addr.isLoopbackAddress).toSeq - if (addresses.nonEmpty) { - val addr = addresses.find(_.isInstanceOf[Inet4Address]).getOrElse(addresses.head) - // because of Inet6Address.toHostName may add interface at the end if it knows about it - val strippedAddress = InetAddress.getByAddress(addr.getAddress) - return strippedAddress.getHostAddress - } - } - address.getHostAddress - } - } -} http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker/README.md ---------------------------------------------------------------------- diff --git a/docker/README.md b/docker/README.md deleted file mode 100644 index 40ba9c3..0000000 --- a/docker/README.md +++ /dev/null @@ -1,7 +0,0 @@ -Spark docker files -=========== - -Drawn from Matt Massie's docker files (https://github.com/massie/dockerfiles), -as well as some updates from Andre Schumacher (https://github.com/AndreSchumacher/docker). - -Tested with Docker version 0.8.1. http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker/build ---------------------------------------------------------------------- diff --git a/docker/build b/docker/build deleted file mode 100755 index 253a2fc..0000000 --- a/docker/build +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -docker images > /dev/null || { echo Please install docker in non-sudo mode. ; exit; } - -./spark-test/build \ No newline at end of file http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker/spark-mesos/Dockerfile ---------------------------------------------------------------------- diff --git a/docker/spark-mesos/Dockerfile b/docker/spark-mesos/Dockerfile deleted file mode 100644 index fb3f267..0000000 --- a/docker/spark-mesos/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -# This is an example Dockerfile for creating a Spark image which can be -# references by the Spark property 'spark.mesos.executor.docker.image' -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -FROM mesosphere/mesos:0.20.1 - -# Update the base ubuntu image with dependencies needed for Spark -RUN apt-get update && \ - apt-get install -y python libnss3 openjdk-7-jre-headless curl - -RUN mkdir /opt/spark && \ - curl http://www.apache.org/dyn/closer.lua/spark/spark-1.4.0/spark-1.4.0-bin-hadoop2.4.tgz \ - | tar -xzC /opt -ENV SPARK_HOME /opt/spark -ENV MESOS_NATIVE_JAVA_LIBRARY /usr/local/lib/libmesos.so http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker/spark-test/README.md ---------------------------------------------------------------------- diff --git a/docker/spark-test/README.md b/docker/spark-test/README.md deleted file mode 100644 index ec0baf6..0000000 --- a/docker/spark-test/README.md +++ /dev/null @@ -1,11 +0,0 @@ -Spark Docker files usable for testing and development purposes. - -These images are intended to be run like so: - - docker run -v $SPARK_HOME:/opt/spark spark-test-master - docker run -v $SPARK_HOME:/opt/spark spark-test-worker spark://<master_ip>:7077 - -Using this configuration, the containers will have their Spark directories -mounted to your actual `SPARK_HOME`, allowing you to modify and recompile -your Spark source and have them immediately usable in the docker images -(without rebuilding them). http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker/spark-test/base/Dockerfile ---------------------------------------------------------------------- diff --git a/docker/spark-test/base/Dockerfile b/docker/spark-test/base/Dockerfile deleted file mode 100644 index 76f550f..0000000 --- a/docker/spark-test/base/Dockerfile +++ /dev/null @@ -1,37 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -FROM ubuntu:precise - -# Upgrade package index -# install a few other useful packages plus Open Jdk 7 -# Remove unneeded /var/lib/apt/lists/* after install to reduce the -# docker image size (by ~30MB) -RUN apt-get update && \ - apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server && \ - rm -rf /var/lib/apt/lists/* - -ENV SCALA_VERSION 2.11.7 -ENV CDH_VERSION cdh4 -ENV SCALA_HOME /opt/scala-$SCALA_VERSION -ENV SPARK_HOME /opt/spark -ENV PATH $SPARK_HOME:$SCALA_HOME/bin:$PATH - -# Install Scala -ADD http://www.scala-lang.org/files/archive/scala-$SCALA_VERSION.tgz / -RUN (cd / && gunzip < scala-$SCALA_VERSION.tgz)|(cd /opt && tar -xvf -) -RUN rm /scala-$SCALA_VERSION.tgz http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker/spark-test/build ---------------------------------------------------------------------- diff --git a/docker/spark-test/build b/docker/spark-test/build deleted file mode 100755 index 6f9e197..0000000 --- a/docker/spark-test/build +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -docker build -t spark-test-base spark-test/base/ -docker build -t spark-test-master spark-test/master/ -docker build -t spark-test-worker spark-test/worker/ http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker/spark-test/master/Dockerfile ---------------------------------------------------------------------- diff --git a/docker/spark-test/master/Dockerfile b/docker/spark-test/master/Dockerfile deleted file mode 100644 index f729534..0000000 --- a/docker/spark-test/master/Dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -# Spark Master -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -FROM spark-test-base -ADD default_cmd /root/ -CMD ["/root/default_cmd"] http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker/spark-test/master/default_cmd ---------------------------------------------------------------------- diff --git a/docker/spark-test/master/default_cmd b/docker/spark-test/master/default_cmd deleted file mode 100755 index 5a7da34..0000000 --- a/docker/spark-test/master/default_cmd +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') -echo "CONTAINER_IP=$IP" -export SPARK_LOCAL_IP=$IP -export SPARK_PUBLIC_DNS=$IP - -# Avoid the default Docker behavior of mapping our IP address to an unreachable host name -umount /etc/hosts - -/opt/spark/bin/spark-class org.apache.spark.deploy.master.Master -i $IP http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker/spark-test/worker/Dockerfile ---------------------------------------------------------------------- diff --git a/docker/spark-test/worker/Dockerfile b/docker/spark-test/worker/Dockerfile deleted file mode 100644 index 890febe..0000000 --- a/docker/spark-test/worker/Dockerfile +++ /dev/null @@ -1,22 +0,0 @@ -# Spark Worker -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -FROM spark-test-base -ENV SPARK_WORKER_PORT 8888 -ADD default_cmd /root/ -ENTRYPOINT ["/root/default_cmd"] http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docker/spark-test/worker/default_cmd ---------------------------------------------------------------------- diff --git a/docker/spark-test/worker/default_cmd b/docker/spark-test/worker/default_cmd deleted file mode 100755 index 31b06cb..0000000 --- a/docker/spark-test/worker/default_cmd +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') -echo "CONTAINER_IP=$IP" -export SPARK_LOCAL_IP=$IP -export SPARK_PUBLIC_DNS=$IP - -# Avoid the default Docker behavior of mapping our IP address to an unreachable host name -umount /etc/hosts - -/opt/spark/bin/spark-class org.apache.spark.deploy.worker.Worker $1 http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/docs/streaming-kinesis-integration.md ---------------------------------------------------------------------- diff --git a/docs/streaming-kinesis-integration.md b/docs/streaming-kinesis-integration.md index 2a868e8..5b9a755 100644 --- a/docs/streaming-kinesis-integration.md +++ b/docs/streaming-kinesis-integration.md @@ -37,7 +37,7 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m [region name], [initial position], [checkpoint interval], StorageLevel.MEMORY_AND_DISK_2) See the [API docs](api/scala/index.html#org.apache.spark.streaming.kinesis.KinesisUtils$) - and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala). Refer to the [Running the Example](#running-the-example) subsection for instructions on how to run the example. + and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala). Refer to the [Running the Example](#running-the-example) subsection for instructions on how to run the example. </div> <div data-lang="java" markdown="1"> @@ -50,7 +50,7 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m [region name], [initial position], [checkpoint interval], StorageLevel.MEMORY_AND_DISK_2); See the [API docs](api/java/index.html?org/apache/spark/streaming/kinesis/KinesisUtils.html) - and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java). Refer to the [Running the Example](#running-the-example) subsection for instructions to run the example. + and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java). Refer to the [Running the Example](#running-the-example) subsection for instructions to run the example. </div> <div data-lang="python" markdown="1"> @@ -61,7 +61,7 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m [region name], [initial position], [checkpoint interval], StorageLevel.MEMORY_AND_DISK_2) See the [API docs](api/python/pyspark.streaming.html#pyspark.streaming.kinesis.KinesisUtils) - and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/extras/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py). Refer to the [Running the Example](#running-the-example) subsection for instructions to run the example. + and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py). Refer to the [Running the Example](#running-the-example) subsection for instructions to run the example. </div> </div> @@ -190,9 +190,9 @@ To run the example, </div> <div data-lang="python" markdown="1"> - bin/spark-submit --jars extras/kinesis-asl/target/scala-*/\ + bin/spark-submit --jars external/kinesis-asl/target/scala-*/\ spark-streaming-kinesis-asl-assembly_*.jar \ - extras/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py \ + external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py \ [Kinesis app name] [Kinesis stream name] [endpoint URL] [region name] </div> http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/external/docker-integration-tests/pom.xml ---------------------------------------------------------------------- diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml new file mode 100644 index 0000000..1764aa9 --- /dev/null +++ b/external/docker-integration-tests/pom.xml @@ -0,0 +1,184 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one or more + ~ contributor license agreements. See the NOTICE file distributed with + ~ this work for additional information regarding copyright ownership. + ~ The ASF licenses this file to You under the Apache License, Version 2.0 + ~ (the "License"); you may not use this file except in compliance with + ~ the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. + --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.spark</groupId> + <artifactId>spark-parent_2.11</artifactId> + <version>2.0.0-SNAPSHOT</version> + <relativePath>../../pom.xml</relativePath> + </parent> + + <artifactId>spark-docker-integration-tests_2.11</artifactId> + <packaging>jar</packaging> + <name>Spark Project Docker Integration Tests</name> + <url>http://spark.apache.org/</url> + <properties> + <sbt.project.name>docker-integration-tests</sbt.project.name> + </properties> + + <dependencies> + <dependency> + <groupId>com.spotify</groupId> + <artifactId>docker-client</artifactId> + <classifier>shaded</classifier> + <scope>test</scope> + <!-- + See https://github.com/spotify/docker-client/pull/272#issuecomment-155249101 + for an explanation of why these exclusions are (necessarily) a mess. + --> + <exclusions> + <exclusion> + <groupId>com.fasterxml.jackson.jaxrs</groupId> + <artifactId>jackson-jaxrs-json-provider</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.datatype</groupId> + <artifactId>jackson-datatype-guava</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + </exclusion> + <exclusion> + <groupId>org.glassfish.jersey.core</groupId> + <artifactId>jersey-client</artifactId> + </exclusion> + <exclusion> + <groupId>org.glassfish.jersey.connectors</groupId> + <artifactId>jersey-apache-connector</artifactId> + </exclusion> + <exclusion> + <groupId>org.glassfish.jersey.media</groupId> + <artifactId>jersey-media-json-jackson</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.httpcomponents</groupId> + <artifactId>httpclient</artifactId> + <version>4.5</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.httpcomponents</groupId> + <artifactId>httpcore</artifactId> + <version>4.4.1</version> + <scope>test</scope> + </dependency> + <!-- Necessary in order to avoid errors in log messages: --> + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + <version>18.0</version> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core_${scala.binary.version}</artifactId> + <version>${project.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core_${scala.binary.version}</artifactId> + <version>${project.version}</version> + <type>test-jar</type> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-sql_${scala.binary.version}</artifactId> + <version>${project.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-sql_${scala.binary.version}</artifactId> + <version>${project.version}</version> + <type>test-jar</type> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-test-tags_${scala.binary.version}</artifactId> + <version>${project.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>mysql</groupId> + <artifactId>mysql-connector-java</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.postgresql</groupId> + <artifactId>postgresql</artifactId> + <scope>test</scope> + </dependency> + <!-- Oracle ojdbc jar, used for oracle integration suite for docker testing. + See https://github.com/apache/spark/pull/11306 for background on why we need + to use a an ojdbc jar for the testcase. The maven dependency here is commented + because currently the maven repository does not contain the ojdbc jar mentioned. + Once the jar is available in maven, this could be uncommented. --> + <!-- + <dependency> + <groupId>com.oracle</groupId> + <artifactId>ojdbc6</artifactId> + <version>11.2.0.2.0</version> + <scope>test</scope> + </dependency> + --> + <!-- Jersey dependencies, used to override version. + See https://github.com/apache/spark/pull/9503#issuecomment-154369560 for + background on why we need to use a newer Jersey only in this test module; + we can remove this once https://github.com/spotify/docker-client/pull/272 is + merged and a new docker-client release is published. --> + <dependency> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-server</artifactId> + <version>1.19</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-core</artifactId> + <version>1.19</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-servlet</artifactId> + <version>1.19</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-json</artifactId> + <version>1.19</version> + <scope>test</scope> + <exclusions> + <exclusion> + <groupId>stax</groupId> + <artifactId>stax-api</artifactId> + </exclusion> + </exclusions> + </dependency> + <!-- End Jersey dependencies --> + </dependencies> +</project> http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala ---------------------------------------------------------------------- diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala new file mode 100644 index 0000000..f73231f --- /dev/null +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc + +import java.net.ServerSocket +import java.sql.Connection + +import scala.collection.JavaConverters._ +import scala.util.control.NonFatal + +import com.spotify.docker.client._ +import com.spotify.docker.client.messages.{ContainerConfig, HostConfig, PortBinding} +import org.scalatest.BeforeAndAfterAll +import org.scalatest.concurrent.Eventually +import org.scalatest.time.SpanSugar._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.util.DockerUtils + +abstract class DatabaseOnDocker { + /** + * The docker image to be pulled. + */ + val imageName: String + + /** + * Environment variables to set inside of the Docker container while launching it. + */ + val env: Map[String, String] + + /** + * The container-internal JDBC port that the database listens on. + */ + val jdbcPort: Int + + /** + * Return a JDBC URL that connects to the database running at the given IP address and port. + */ + def getJdbcUrl(ip: String, port: Int): String +} + +abstract class DockerJDBCIntegrationSuite + extends SparkFunSuite + with BeforeAndAfterAll + with Eventually + with SharedSQLContext { + + val db: DatabaseOnDocker + + private var docker: DockerClient = _ + private var containerId: String = _ + protected var jdbcUrl: String = _ + + override def beforeAll() { + super.beforeAll() + try { + docker = DefaultDockerClient.fromEnv.build() + // Check that Docker is actually up + try { + docker.ping() + } catch { + case NonFatal(e) => + log.error("Exception while connecting to Docker. Check whether Docker is running.") + throw e + } + // Ensure that the Docker image is installed: + try { + docker.inspectImage(db.imageName) + } catch { + case e: ImageNotFoundException => + log.warn(s"Docker image ${db.imageName} not found; pulling image from registry") + docker.pull(db.imageName) + } + // Configure networking (necessary for boot2docker / Docker Machine) + val externalPort: Int = { + val sock = new ServerSocket(0) + val port = sock.getLocalPort + sock.close() + port + } + val dockerIp = DockerUtils.getDockerIp() + val hostConfig: HostConfig = HostConfig.builder() + .networkMode("bridge") + .portBindings( + Map(s"${db.jdbcPort}/tcp" -> List(PortBinding.of(dockerIp, externalPort)).asJava).asJava) + .build() + // Create the database container: + val config = ContainerConfig.builder() + .image(db.imageName) + .networkDisabled(false) + .env(db.env.map { case (k, v) => s"$k=$v" }.toSeq.asJava) + .hostConfig(hostConfig) + .exposedPorts(s"${db.jdbcPort}/tcp") + .build() + containerId = docker.createContainer(config).id + // Start the container and wait until the database can accept JDBC connections: + docker.startContainer(containerId) + jdbcUrl = db.getJdbcUrl(dockerIp, externalPort) + eventually(timeout(60.seconds), interval(1.seconds)) { + val conn = java.sql.DriverManager.getConnection(jdbcUrl) + conn.close() + } + // Run any setup queries: + val conn: Connection = java.sql.DriverManager.getConnection(jdbcUrl) + try { + dataPreparation(conn) + } finally { + conn.close() + } + } catch { + case NonFatal(e) => + try { + afterAll() + } finally { + throw e + } + } + } + + override def afterAll() { + try { + if (docker != null) { + try { + if (containerId != null) { + docker.killContainer(containerId) + docker.removeContainer(containerId) + } + } catch { + case NonFatal(e) => + logWarning(s"Could not stop container $containerId", e) + } finally { + docker.close() + } + } + } finally { + super.afterAll() + } + } + + /** + * Prepare databases and tables for testing. + */ + def dataPreparation(connection: Connection): Unit +} http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala ---------------------------------------------------------------------- diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala new file mode 100644 index 0000000..c68e4dc --- /dev/null +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc + +import java.math.BigDecimal +import java.sql.{Connection, Date, Timestamp} +import java.util.Properties + +import org.apache.spark.tags.DockerTest + +@DockerTest +class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { + override val db = new DatabaseOnDocker { + override val imageName = "mysql:5.7.9" + override val env = Map( + "MYSQL_ROOT_PASSWORD" -> "rootpass" + ) + override val jdbcPort: Int = 3306 + override def getJdbcUrl(ip: String, port: Int): String = + s"jdbc:mysql://$ip:$port/mysql?user=root&password=rootpass" + } + + override def dataPreparation(conn: Connection): Unit = { + conn.prepareStatement("CREATE DATABASE foo").executeUpdate() + conn.prepareStatement("CREATE TABLE tbl (x INTEGER, y TEXT(8))").executeUpdate() + conn.prepareStatement("INSERT INTO tbl VALUES (42,'fred')").executeUpdate() + conn.prepareStatement("INSERT INTO tbl VALUES (17,'dave')").executeUpdate() + + conn.prepareStatement("CREATE TABLE numbers (onebit BIT(1), tenbits BIT(10), " + + "small SMALLINT, med MEDIUMINT, nor INT, big BIGINT, deci DECIMAL(40,20), flt FLOAT, " + + "dbl DOUBLE)").executeUpdate() + conn.prepareStatement("INSERT INTO numbers VALUES (b'0', b'1000100101', " + + "17, 77777, 123456789, 123456789012345, 123456789012345.123456789012345, " + + "42.75, 1.0000000000000002)").executeUpdate() + + conn.prepareStatement("CREATE TABLE dates (d DATE, t TIME, dt DATETIME, ts TIMESTAMP, " + + "yr YEAR)").executeUpdate() + conn.prepareStatement("INSERT INTO dates VALUES ('1991-11-09', '13:31:24', " + + "'1996-01-01 01:23:45', '2009-02-13 23:31:30', '2001')").executeUpdate() + + // TODO: Test locale conversion for strings. + conn.prepareStatement("CREATE TABLE strings (a CHAR(10), b VARCHAR(10), c TINYTEXT, " + + "d TEXT, e MEDIUMTEXT, f LONGTEXT, g BINARY(4), h VARBINARY(10), i BLOB)" + ).executeUpdate() + conn.prepareStatement("INSERT INTO strings VALUES ('the', 'quick', 'brown', 'fox', " + + "'jumps', 'over', 'the', 'lazy', 'dog')").executeUpdate() + } + + test("Basic test") { + val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties) + val rows = df.collect() + assert(rows.length == 2) + val types = rows(0).toSeq.map(x => x.getClass.toString) + assert(types.length == 2) + assert(types(0).equals("class java.lang.Integer")) + assert(types(1).equals("class java.lang.String")) + } + + test("Numeric types") { + val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties) + val rows = df.collect() + assert(rows.length == 1) + val types = rows(0).toSeq.map(x => x.getClass.toString) + assert(types.length == 9) + assert(types(0).equals("class java.lang.Boolean")) + assert(types(1).equals("class java.lang.Long")) + assert(types(2).equals("class java.lang.Integer")) + assert(types(3).equals("class java.lang.Integer")) + assert(types(4).equals("class java.lang.Integer")) + assert(types(5).equals("class java.lang.Long")) + assert(types(6).equals("class java.math.BigDecimal")) + assert(types(7).equals("class java.lang.Double")) + assert(types(8).equals("class java.lang.Double")) + assert(rows(0).getBoolean(0) == false) + assert(rows(0).getLong(1) == 0x225) + assert(rows(0).getInt(2) == 17) + assert(rows(0).getInt(3) == 77777) + assert(rows(0).getInt(4) == 123456789) + assert(rows(0).getLong(5) == 123456789012345L) + val bd = new BigDecimal("123456789012345.12345678901234500000") + assert(rows(0).getAs[BigDecimal](6).equals(bd)) + assert(rows(0).getDouble(7) == 42.75) + assert(rows(0).getDouble(8) == 1.0000000000000002) + } + + test("Date types") { + val df = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties) + val rows = df.collect() + assert(rows.length == 1) + val types = rows(0).toSeq.map(x => x.getClass.toString) + assert(types.length == 5) + assert(types(0).equals("class java.sql.Date")) + assert(types(1).equals("class java.sql.Timestamp")) + assert(types(2).equals("class java.sql.Timestamp")) + assert(types(3).equals("class java.sql.Timestamp")) + assert(types(4).equals("class java.sql.Date")) + assert(rows(0).getAs[Date](0).equals(Date.valueOf("1991-11-09"))) + assert(rows(0).getAs[Timestamp](1).equals(Timestamp.valueOf("1970-01-01 13:31:24"))) + assert(rows(0).getAs[Timestamp](2).equals(Timestamp.valueOf("1996-01-01 01:23:45"))) + assert(rows(0).getAs[Timestamp](3).equals(Timestamp.valueOf("2009-02-13 23:31:30"))) + assert(rows(0).getAs[Date](4).equals(Date.valueOf("2001-01-01"))) + } + + test("String types") { + val df = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties) + val rows = df.collect() + assert(rows.length == 1) + val types = rows(0).toSeq.map(x => x.getClass.toString) + assert(types.length == 9) + assert(types(0).equals("class java.lang.String")) + assert(types(1).equals("class java.lang.String")) + assert(types(2).equals("class java.lang.String")) + assert(types(3).equals("class java.lang.String")) + assert(types(4).equals("class java.lang.String")) + assert(types(5).equals("class java.lang.String")) + assert(types(6).equals("class [B")) + assert(types(7).equals("class [B")) + assert(types(8).equals("class [B")) + assert(rows(0).getString(0).equals("the")) + assert(rows(0).getString(1).equals("quick")) + assert(rows(0).getString(2).equals("brown")) + assert(rows(0).getString(3).equals("fox")) + assert(rows(0).getString(4).equals("jumps")) + assert(rows(0).getString(5).equals("over")) + assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](6), Array[Byte](116, 104, 101, 0))) + assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](7), Array[Byte](108, 97, 122, 121))) + assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](8), Array[Byte](100, 111, 103))) + } + + test("Basic write test") { + val df1 = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties) + val df2 = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties) + val df3 = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties) + df1.write.jdbc(jdbcUrl, "numberscopy", new Properties) + df2.write.jdbc(jdbcUrl, "datescopy", new Properties) + df3.write.jdbc(jdbcUrl, "stringscopy", new Properties) + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala ---------------------------------------------------------------------- diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala new file mode 100644 index 0000000..8a0f938 --- /dev/null +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc + +import java.sql.Connection +import java.util.Properties + +import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.tags.DockerTest + +/** + * This patch was tested using the Oracle docker. Created this integration suite for the same. + * The ojdbc6-11.2.0.2.0.jar was to be downloaded from the maven repository. Since there was + * no jdbc jar available in the maven repository, the jar was downloaded from oracle site + * manually and installed in the local; thus tested. So, for SparkQA test case run, the + * ojdbc jar might be manually placed in the local maven repository(com/oracle/ojdbc6/11.2.0.2.0) + * while Spark QA test run. + * + * The following would be the steps to test this + * 1. Pull oracle 11g image - docker pull wnameless/oracle-xe-11g + * 2. Start docker - sudo service docker start + * 3. Download oracle 11g driver jar and put it in maven local repo: + * (com/oracle/ojdbc6/11.2.0.2.0/ojdbc6-11.2.0.2.0.jar) + * 4. The timeout and interval parameter to be increased from 60,1 to a high value for oracle test + * in DockerJDBCIntegrationSuite.scala (Locally tested with 200,200 and executed successfully). + * 5. Run spark test - ./build/sbt "test-only org.apache.spark.sql.jdbc.OracleIntegrationSuite" + * + * All tests in this suite are ignored because of the dependency with the oracle jar from maven + * repository. + */ +@DockerTest +class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLContext { + import testImplicits._ + + override val db = new DatabaseOnDocker { + override val imageName = "wnameless/oracle-xe-11g:latest" + override val env = Map( + "ORACLE_ROOT_PASSWORD" -> "oracle" + ) + override val jdbcPort: Int = 1521 + override def getJdbcUrl(ip: String, port: Int): String = + s"jdbc:oracle:thin:system/oracle@//$ip:$port/xe" + } + + override def dataPreparation(conn: Connection): Unit = { + } + + ignore("SPARK-12941: String datatypes to be mapped to Varchar in Oracle") { + // create a sample dataframe with string type + val df1 = sparkContext.parallelize(Seq(("foo"))).toDF("x") + // write the dataframe to the oracle table tbl + df1.write.jdbc(jdbcUrl, "tbl2", new Properties) + // read the table from the oracle + val dfRead = sqlContext.read.jdbc(jdbcUrl, "tbl2", new Properties) + // get the rows + val rows = dfRead.collect() + // verify the data type is inserted + val types = rows(0).toSeq.map(x => x.getClass.toString) + assert(types(0).equals("class java.lang.String")) + // verify the value is the inserted correct or not + assert(rows(0).getString(0).equals("foo")) + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/256704c7/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala ---------------------------------------------------------------------- diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala new file mode 100644 index 0000000..d55cdcf --- /dev/null +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc + +import java.sql.Connection +import java.util.Properties + +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.types.{ArrayType, DecimalType} +import org.apache.spark.tags.DockerTest + +@DockerTest +class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { + override val db = new DatabaseOnDocker { + override val imageName = "postgres:9.4.5" + override val env = Map( + "POSTGRES_PASSWORD" -> "rootpass" + ) + override val jdbcPort = 5432 + override def getJdbcUrl(ip: String, port: Int): String = + s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass" + } + + override def dataPreparation(conn: Connection): Unit = { + conn.prepareStatement("CREATE DATABASE foo").executeUpdate() + conn.setCatalog("foo") + conn.prepareStatement("CREATE TYPE enum_type AS ENUM ('d1', 'd2')").executeUpdate() + conn.prepareStatement("CREATE TABLE bar (c0 text, c1 integer, c2 double precision, c3 bigint, " + + "c4 bit(1), c5 bit(10), c6 bytea, c7 boolean, c8 inet, c9 cidr, " + + "c10 integer[], c11 text[], c12 real[], c13 numeric(2,2)[], c14 enum_type)").executeUpdate() + conn.prepareStatement("INSERT INTO bar VALUES ('hello', 42, 1.25, 123456789012345, B'0', " + + "B'1000100101', E'\\\\xDEADBEEF', true, '172.16.0.42', '192.168.0.0/16', " + + """'{1, 2}', '{"a", null, "b"}', '{0.11, 0.22}', '{0.11, 0.22}', 'd1')""").executeUpdate() + } + + test("Type mapping for various types") { + val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties) + val rows = df.collect() + assert(rows.length == 1) + val types = rows(0).toSeq.map(x => x.getClass) + assert(types.length == 15) + assert(classOf[String].isAssignableFrom(types(0))) + assert(classOf[java.lang.Integer].isAssignableFrom(types(1))) + assert(classOf[java.lang.Double].isAssignableFrom(types(2))) + assert(classOf[java.lang.Long].isAssignableFrom(types(3))) + assert(classOf[java.lang.Boolean].isAssignableFrom(types(4))) + assert(classOf[Array[Byte]].isAssignableFrom(types(5))) + assert(classOf[Array[Byte]].isAssignableFrom(types(6))) + assert(classOf[java.lang.Boolean].isAssignableFrom(types(7))) + assert(classOf[String].isAssignableFrom(types(8))) + assert(classOf[String].isAssignableFrom(types(9))) + assert(classOf[Seq[Int]].isAssignableFrom(types(10))) + assert(classOf[Seq[String]].isAssignableFrom(types(11))) + assert(classOf[Seq[Double]].isAssignableFrom(types(12))) + assert(classOf[Seq[BigDecimal]].isAssignableFrom(types(13))) + assert(classOf[String].isAssignableFrom(types(14))) + assert(rows(0).getString(0).equals("hello")) + assert(rows(0).getInt(1) == 42) + assert(rows(0).getDouble(2) == 1.25) + assert(rows(0).getLong(3) == 123456789012345L) + assert(!rows(0).getBoolean(4)) + // BIT(10)'s come back as ASCII strings of ten ASCII 0's and 1's... + assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](5), + Array[Byte](49, 48, 48, 48, 49, 48, 48, 49, 48, 49))) + assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](6), + Array[Byte](0xDE.toByte, 0xAD.toByte, 0xBE.toByte, 0xEF.toByte))) + assert(rows(0).getBoolean(7)) + assert(rows(0).getString(8) == "172.16.0.42") + assert(rows(0).getString(9) == "192.168.0.0/16") + assert(rows(0).getSeq(10) == Seq(1, 2)) + assert(rows(0).getSeq(11) == Seq("a", null, "b")) + assert(rows(0).getSeq(12).toSeq == Seq(0.11f, 0.22f)) + assert(rows(0).getSeq(13) == Seq("0.11", "0.22").map(BigDecimal(_).bigDecimal)) + assert(rows(0).getString(14) == "d1") + } + + test("Basic write test") { + val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties) + // Test only that it doesn't crash. + df.write.jdbc(jdbcUrl, "public.barcopy", new Properties) + // Test that written numeric type has same DataType as input + assert(sqlContext.read.jdbc(jdbcUrl, "public.barcopy", new Properties).schema(13).dataType == + ArrayType(DecimalType(2, 2), true)) + // Test write null values. + df.select(df.queryExecution.analyzed.output.map { a => + Column(Literal.create(null, a.dataType)).as(a.name) + }: _*).write.jdbc(jdbcUrl, "public.barcopy2", new Properties) + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org