Abhishek Shrivastava created SPARK-23652:
--------------------------------------------
Summary: Spark Connection with S3
Key: SPARK-23652
URL: https://issues.apache.org/jira/browse/SPARK-23652
Project: Spark
Issue Type: Question
Components: Spark Shell, Spark Submit
Affects Versions: 1.6.0
Reporter: Abhishek Shrivastava
In below spark-shell I am trying to connect to S3 and load file to create
dataframe:
{{spark-shell --packages com.databricks:spark-csv_2.10:1.5.0 scala> val
sqlContext = new org.apache.spark.sql.SQLContext(sc) scala>
sc.hadoopConfiguration.set("fs.s3a.access.key", "") scala>
sc.hadoopConfiguration.set("fs.s3a.secret.key", "") scala> val weekly =
sqlContext.read.format("com.databricks.spark.csv").option("header",
"true").option("delimiter", ",").load("s3://usr_bucket/data/file.csv") scala>
print(weekly) scala> weekly.show()}}
{{Error:}}
{{java.lang.VerifyError: Bad type on operand stack Exception Details: Location:
org/apache/hadoop/fs/s3/Jets3tFileSystemStore.initialize(Ljava/net/URI;Lorg/apache/hadoop/conf/Configuration;)V
@43: invokespecial Reason: Type 'org/jets3t/service/security/AWSCredentials'
(current frame, stack[3]) is not assignable to
'org/jets3t/service/security/ProviderCredentials' Current Frame: bci: @43
flags: \{ } locals: \{ 'org/apache/hadoop/fs/s3/Jets3tFileSystemStore',
'java/net/URI', 'org/apache/hadoop/conf/Configuration',
'org/apache/hadoop/fs/s3/S3Credentials',
'org/jets3t/service/security/AWSCredentials' } stack: \{
'org/apache/hadoop/fs/s3/Jets3tFileSystemStore', uninitialized 37,
uninitialized 37, 'org/jets3t/service/security/AWSCredentials' } Bytecode:
0000000: 2a2c b500 02bb 0003 59b7 0004 4e2d 2b2c 0000010: b600 05bb 0006 592d
b600 072d b600 08b7 0000020: 0009 3a04 2abb 000a 5919 04b7 000b b500 0000030:
0ca7 0023 3a04 1904 b600 0ec1 000f 9900 0000040: 0c19 04b6 000e c000 0fbf bb00
1059 1904 0000050: b700 11bf 2abb 0012 592b b600 13b7 0014 0000060: b500 152a
2c12 1611 1000 b600 17b5 0018 0000070: b1 Exception Handler Table: bci [19, 49]
=> handler: 52 Stackmap Table:
full_frame(@52,\{Object[#194],Object[#195],Object[#196],Object[#197]},\{Object[#198]})
append_frame(@74,Object[#198]) chop_frame(@84,1) at
org.apache.hadoop.fs.s3.S3FileSystem.createDefaultStore(S3FileSystem.java:119)
at org.apache.hadoop.fs.s3.S3FileSystem.initialize(S3FileSystem.java:109) at
org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2816) at
org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:98) at
org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2853) at
org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2835) at
org.apache.hadoop.fs.FileSystem.get(FileSystem.java:387) at
org.apache.hadoop.fs.Path.getFileSystem(Path.java:296) at
org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:258)
at
org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:229)
at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:315)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:202) at
org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239) at
org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237) at
scala.Option.getOrElse(Option.scala:120) at
org.apache.spark.rdd.RDD.partitions(RDD.scala:237) at
org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239) at
org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237) at
scala.Option.getOrElse(Option.scala:120) at
org.apache.spark.rdd.RDD.partitions(RDD.scala:237) at
org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239) at
org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237) at
scala.Option.getOrElse(Option.scala:120) at
org.apache.spark.rdd.RDD.partitions(RDD.scala:237) at
org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1307) at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) at
org.apache.spark.rdd.RDD.take(RDD.scala:1302) at
org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1342) at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) at
org.apache.spark.rdd.RDD.first(RDD.scala:1341) at
com.databricks.spark.csv.CsvRelation.firstLine$lzycompute(CsvRelation.scala:269)
at com.databricks.spark.csv.CsvRelation.firstLine(CsvRelation.scala:265) at
com.databricks.spark.csv.CsvRelation.inferSchema(CsvRelation.scala:242) at
com.databricks.spark.csv.CsvRelation.<init>(CsvRelation.scala:74) at
com.databricks.spark.csv.DefaultSource.createRelation(DefaultSource.scala:171)
at
com.databricks.spark.csv.DefaultSource.createRelation(DefaultSource.scala:44)
at
org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:158)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:119) at
org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:109) at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35) at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40) at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42) at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44) at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46) at
$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:48) at
$iwC$$iwC$$iwC$$iwC.<init>(<console>:50) at $iwC$$iwC$$iwC.<init>(<console>:52)
at $iwC$$iwC.<init>(<console>:54) at $iwC.<init>(<console>:56) at
<init>(<console>:58) at .<init>(<console>:62) at .<clinit>(<console>) at
.<init>(<console>:7) at .<clinit>(<console>) at $print(<console>) at
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606) at
org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1045) at
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1326) at
org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:821) at
org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:852) at
org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:800) at
org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) at
org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) at
org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) at
org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) at
org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665) at
org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at
scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at
org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1064) at
org.apache.spark.repl.Main$.main(Main.scala:35) at
org.apache.spark.repl.Main.main(Main.scala) at
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606) at
org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:730)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at
org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at
org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) at
org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)}}{{}}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]