[
https://issues.apache.org/jira/browse/SPARK-4876?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Leonid Mikhailov updated SPARK-4876:
------------------------------------
Description:
I am running Spark version 1.1.1 (built it on Mac using:
mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -DskipTests clean package)
I start JDBC server like this:
./sbin/start-thriftserver.sh
In my IDE I am running the following example:
{code:title= TestSparkSQLJdbcAccess.java|borderStyle=solid}
package com.bla.spark.sql;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class TestSparkSQLJdbcAccess {
privatestatic String driverName = "org.apache.hive.jdbc.HiveDriver";
/**
* @param args
* @throws SQLException
*/
public static void main(String[] args) throws SQLException {
try {
Class.forName(driverName);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.exit(1);
}
//replace "hive" here with the name of the user the queries should run
as
Connection con =
DriverManager.getConnection("jdbc:hive2://localhost:10000/default", "", "");
Statement stmt = con.createStatement();
String tableName = "testHiveDriverTable";
stmt.execute("drop table if exists " + tableName);
stmt.execute("create table " + tableName + " (key int, value string)");
// show tables
String sql = "show tables '" + tableName + "'";
System.out.println("Running: " + sql);
ResultSet res = stmt.executeQuery(sql);
if (res.next()) {
System.out.println(res.getString(1));
}
// describe table
sql = "describe " + tableName;
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
while (res.next()) {
System.out.println(res.getString(1) + "\t" + res.getString(2));
}
// load data into table
// NOTE: filepath has to be local to the hive server
// NOTE: /tmp/a.txt is a ctrl-A separated file with two fields per line
String filepath = "/tmp/a.txt";
sql = "load data local inpath '" + filepath + "' into table " +
tableName;
System.out.println("Running: " + sql);
stmt.execute(sql);
// select * query
sql = "select * from " + tableName;
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
while (res.next()) {
System.out.println(String.valueOf(res.getInt(1)) + "\t" +
res.getString(2));
}
// regular hive query
sql = "select count(1) from " + tableName;
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
while (res.next()) {
System.out.println(res.getString(1));
}
}
}
{code}
To pom.xml is as follows:
{code:xml}
<projectxmlns="http://maven.apache.org/POM/4.0.0"xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.esri.spark</groupId>
<artifactId>HiveJDBCTest</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>HiveJDBCTest</name>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>0.12.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>0.20.2</version>
</dependency>
</dependencies>
</project>
{code}
I am getting an exception:
{noformat}
Exception in thread "main" java.sql.SQLException:
org.apache.spark.sql.execution.QueryExecutionException: FAILED: Execution
Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask.
MetaException(message:file:/user/hive/warehouse/testhivedrivertable is not a
directory or unable to create one)
at org.apache.hive.jdbc.Utils.verifySuccess(Utils.java:165)
at org.apache.hive.jdbc.Utils.verifySuccessWithInfo(Utils.java:153)
at org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:198)
at
com.esri.spark.sql.TestSparkSQLJdbcAccess.main(TestSparkSQLJdbcAccess.java:29)
{noformat}
BTW, I cannot use hive-jdbc version 0.13.0, because there is some backward
compatibility problem described here :
https://issues.apache.org/jira/browse/HIVE-6050
was:
I am running Spark version 1.1.1 (built it on Mac using:
mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -DskipTests clean package)
I start spark like this:
./sbin/start-thriftserver.sh
In my IDE I am running the following example:
package com.esri.spark.sql;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class TestSparkSQLJdbcAccess {
privatestatic String driverName = "org.apache.hive.jdbc.HiveDriver";
/**
* @param args
* @throws SQLException
*/
public static void main(String[] args) throws SQLException {
try {
Class.forName(driverName);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.exit(1);
}
//replace "hive" here with the name of the user the queries should run
as
Connection con =
DriverManager.getConnection("jdbc:hive2://localhost:10000/default", "", "");
Statement stmt = con.createStatement();
String tableName = "testHiveDriverTable";
stmt.execute("drop table if exists " + tableName);
stmt.execute("create table " + tableName + " (key int, value string)");
// show tables
String sql = "show tables '" + tableName + "'";
System.out.println("Running: " + sql);
ResultSet res = stmt.executeQuery(sql);
if (res.next()) {
System.out.println(res.getString(1));
}
// describe table
sql = "describe " + tableName;
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
while (res.next()) {
System.out.println(res.getString(1) + "\t" + res.getString(2));
}
// load data into table
// NOTE: filepath has to be local to the hive server
// NOTE: /tmp/a.txt is a ctrl-A separated file with two fields per line
String filepath = "/tmp/a.txt";
sql = "load data local inpath '" + filepath + "' into table " +
tableName;
System.out.println("Running: " + sql);
stmt.execute(sql);
// select * query
sql = "select * from " + tableName;
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
while (res.next()) {
System.out.println(String.valueOf(res.getInt(1)) + "\t" +
res.getString(2));
}
// regular hive query
sql = "select count(1) from " + tableName;
System.out.println("Running: " + sql);
res = stmt.executeQuery(sql);
while (res.next()) {
System.out.println(res.getString(1));
}
}
}
To pom.xml is as follows:
<projectxmlns="http://maven.apache.org/POM/4.0.0"xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.esri.spark</groupId>
<artifactId>HiveJDBCTest</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>HiveJDBCTest</name>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>0.12.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>0.20.2</version>
</dependency>
</dependencies>
</project>
I am getting an exception:
Exception in thread "main" java.sql.SQLException:
org.apache.spark.sql.execution.QueryExecutionException: FAILED: Execution
Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask.
MetaException(message:file:/user/hive/warehouse/testhivedrivertable is not a
directory or unable to create one)
at org.apache.hive.jdbc.Utils.verifySuccess(Utils.java:165)
at org.apache.hive.jdbc.Utils.verifySuccessWithInfo(Utils.java:153)
at org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:198)
at
com.esri.spark.sql.TestSparkSQLJdbcAccess.main(TestSparkSQLJdbcAccess.java:29)
BTW, I cannot use hive-jdbc version 0.13.0, because there is some backward
compatibility problem described here :
https://issues.apache.org/jira/browse/HIVE-6050
> An exception thrown when accessing a Spark SQL table using a JDBC driver from
> a standalone app.
> -----------------------------------------------------------------------------------------------
>
> Key: SPARK-4876
> URL: https://issues.apache.org/jira/browse/SPARK-4876
> Project: Spark
> Issue Type: Bug
> Components: Spark Core, SQL
> Affects Versions: 1.1.1
> Environment: Mac OS X 10.10.1, Apache Spark 1.1.1,
> Reporter: Leonid Mikhailov
>
> I am running Spark version 1.1.1 (built it on Mac using:
> mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -DskipTests clean
> package)
> I start JDBC server like this:
> ./sbin/start-thriftserver.sh
> In my IDE I am running the following example:
> {code:title= TestSparkSQLJdbcAccess.java|borderStyle=solid}
> package com.bla.spark.sql;
> import java.sql.Connection;
> import java.sql.DriverManager;
> import java.sql.ResultSet;
> import java.sql.SQLException;
> import java.sql.Statement;
> public class TestSparkSQLJdbcAccess {
> privatestatic String driverName = "org.apache.hive.jdbc.HiveDriver";
>
> /**
> * @param args
> * @throws SQLException
> */
> public static void main(String[] args) throws SQLException {
> try {
> Class.forName(driverName);
> } catch (ClassNotFoundException e) {
> // TODO Auto-generated catch block
> e.printStackTrace();
> System.exit(1);
> }
> //replace "hive" here with the name of the user the queries should
> run as
> Connection con =
> DriverManager.getConnection("jdbc:hive2://localhost:10000/default", "", "");
> Statement stmt = con.createStatement();
> String tableName = "testHiveDriverTable";
> stmt.execute("drop table if exists " + tableName);
> stmt.execute("create table " + tableName + " (key int, value
> string)");
> // show tables
> String sql = "show tables '" + tableName + "'";
> System.out.println("Running: " + sql);
> ResultSet res = stmt.executeQuery(sql);
> if (res.next()) {
> System.out.println(res.getString(1));
> }
> // describe table
> sql = "describe " + tableName;
> System.out.println("Running: " + sql);
> res = stmt.executeQuery(sql);
> while (res.next()) {
> System.out.println(res.getString(1) + "\t" + res.getString(2));
> }
>
> // load data into table
> // NOTE: filepath has to be local to the hive server
> // NOTE: /tmp/a.txt is a ctrl-A separated file with two fields per
> line
> String filepath = "/tmp/a.txt";
> sql = "load data local inpath '" + filepath + "' into table " +
> tableName;
> System.out.println("Running: " + sql);
> stmt.execute(sql);
>
> // select * query
> sql = "select * from " + tableName;
> System.out.println("Running: " + sql);
> res = stmt.executeQuery(sql);
> while (res.next()) {
> System.out.println(String.valueOf(res.getInt(1)) + "\t" +
> res.getString(2));
> }
>
> // regular hive query
> sql = "select count(1) from " + tableName;
> System.out.println("Running: " + sql);
> res = stmt.executeQuery(sql);
> while (res.next()) {
> System.out.println(res.getString(1));
> }
> }
> }
> {code}
> To pom.xml is as follows:
> {code:xml}
> <projectxmlns="http://maven.apache.org/POM/4.0.0"xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
> http://maven.apache.org/xsd/maven-4.0.0.xsd">
> <modelVersion>4.0.0</modelVersion>
> <groupId>com.esri.spark</groupId>
> <artifactId>HiveJDBCTest</artifactId>
> <version>0.0.1-SNAPSHOT</version>
> <name>HiveJDBCTest</name>
> <dependencies>
> <dependency>
> <groupId>org.apache.hive</groupId>
> <artifactId>hive-jdbc</artifactId>
> <version>0.12.0</version>
> </dependency>
> <dependency>
> <groupId>org.apache.hadoop</groupId>
> <artifactId>hadoop-core</artifactId>
> <version>0.20.2</version>
> </dependency>
> </dependencies>
> </project>
> {code}
> I am getting an exception:
> {noformat}
> Exception in thread "main" java.sql.SQLException:
> org.apache.spark.sql.execution.QueryExecutionException: FAILED: Execution
> Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask.
> MetaException(message:file:/user/hive/warehouse/testhivedrivertable is not a
> directory or unable to create one)
> at org.apache.hive.jdbc.Utils.verifySuccess(Utils.java:165)
> at org.apache.hive.jdbc.Utils.verifySuccessWithInfo(Utils.java:153)
> at org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:198)
> at
> com.esri.spark.sql.TestSparkSQLJdbcAccess.main(TestSparkSQLJdbcAccess.java:29)
> {noformat}
> BTW, I cannot use hive-jdbc version 0.13.0, because there is some backward
> compatibility problem described here :
> https://issues.apache.org/jira/browse/HIVE-6050
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]