[
https://issues.apache.org/jira/browse/SPARK-14331?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15296416#comment-15296416
]
Thomas Graves commented on SPARK-14331:
---------------------------------------
I was trying to reproduce this to get you the rest but now I get a different
exception, this is using sqlContext directly:
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
It looks like its perhaps using the wrong filesystem (using hadoop when it
should use local) .
16/05/23 14:15:08 ERROR ApplicationMaster: User class threw exception:
org.apache.spark.SparkException: Unable to create database default as failed to
create its directory
hdfs://nn1.com:8020/hadoop/tmp/yarn-local/usercache/tgraves/appcache/application_1463805142339_520258/container_e11_1463805142339_520258_01_000001/spark-warehouse
org.apache.spark.SparkException: Unable to create database default as failed to
create its directory
hdfs://nn1.com:8020/hadoop/tmp/yarn-local/usercache/tgraves/appcache/application_1463805142339_520258/container_e11_1463805142339_520258_01_000001/spark-warehouse
at
org.apache.spark.sql.catalyst.catalog.InMemoryCatalog.liftedTree1$1(InMemoryCatalog.scala:126)
at
org.apache.spark.sql.catalyst.catalog.InMemoryCatalog.createDatabase(InMemoryCatalog.scala:122)
at
org.apache.spark.sql.catalyst.catalog.SessionCatalog.createDatabase(SessionCatalog.scala:142)
at
org.apache.spark.sql.catalyst.catalog.SessionCatalog.<init>(SessionCatalog.scala:84)
at
org.apache.spark.sql.internal.SessionState.catalog$lzycompute(SessionState.scala:94)
at
org.apache.spark.sql.internal.SessionState.catalog(SessionState.scala:94)
at
org.apache.spark.sql.internal.SessionState$$anon$1.<init>(SessionState.scala:110)
at
org.apache.spark.sql.internal.SessionState.analyzer$lzycompute(SessionState.scala:110)
at
org.apache.spark.sql.internal.SessionState.analyzer(SessionState.scala:109)
at
org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:48)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:62)
at
org.apache.spark.sql.SparkSession.baseRelationToDataFrame(SparkSession.scala:383)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:154)
at
org.apache.spark.sql.DataFrameReader.parquet(DataFrameReader.scala:419)
at yahoo.spark.SparkFlickrLargeJoin$.main(SparkFlickrLargeJoin.scala:26)
at yahoo.spark.SparkFlickrLargeJoin.main(SparkFlickrLargeJoin.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at
org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:617)
Caused by: org.apache.hadoop.security.AccessControlException: Permission
denied: user=tgraves, access=WRITE, inode="/":hdfs:hdfs:drwxr-xr-x
at
org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:298)
at
org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:204)
at
org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:182)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem$PathResolver.verifyPermissions(FSNamesystem.java:8622)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirs(FSNamesystem.java:3961)
at
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.mkdirs(NameNodeRpcServer.java:989)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.mkdirs(ClientNamenodeProtocolServerSideTranslatorPB.java:622)
at
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:608)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:982)
at org.apache.hadoop.ipc.Server.call(Server.java:2267)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:648)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:615)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1720)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2217)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
at
org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
at
org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73)
at org.apache.hadoop.hdfs.DFSClient.primitiveMkdir(DFSClient.java:3018)
at org.apache.hadoop.hdfs.DFSClient.mkdirs(DFSClient.java:2986)
at
org.apache.hadoop.hdfs.DistributedFileSystem$21.doCall(DistributedFileSystem.java:1047)
at
org.apache.hadoop.hdfs.DistributedFileSystem$21.doCall(DistributedFileSystem.java:1043)
at
org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at
org.apache.hadoop.hdfs.DistributedFileSystem.mkdirsInternal(DistributedFileSystem.java:1061)
at
org.apache.hadoop.hdfs.DistributedFileSystem.mkdirs(DistributedFileSystem.java:1036)
at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:1880)
at
org.apache.spark.sql.catalyst.catalog.InMemoryCatalog.liftedTree1$1(InMemoryCatalog.scala:123)
... 20 more
Caused by:
org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException):
Permission denied: user=tgraves, access=WRITE, inode="/":hdfs:hdfs:drwxr-xr-x
at
org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:298)
at
org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:204)
at
org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:182)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem$PathResolver.verifyPermissions(FSNamesystem.java:8622)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirs(FSNamesystem.java:3961)
at
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.mkdirs(NameNodeRpcServer.java:989)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.mkdirs(ClientNamenodeProtocolServerSideTranslatorPB.java:622)
at
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:608)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:982)
at org.apache.hadoop.ipc.Server.call(Server.java:2267)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:648)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:615)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1720)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2217)
at org.apache.hadoop.ipc.Client.call(Client.java:1471)
at org.apache.hadoop.ipc.Client.call(Client.java:1408)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:230)
at com.sun.proxy.$Proxy10.mkdirs(Unknown Source)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.mkdirs(ClientNamenodeProtocolTranslatorPB.java:558)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at
org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:191)
at
org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
at com.sun.proxy.$Proxy11.mkdirs(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.primitiveMkdir(DFSClient.java:3016)
... 28 more
> Exceptions saving to parquetFile after join from dataframes in master
> ---------------------------------------------------------------------
>
> Key: SPARK-14331
> URL: https://issues.apache.org/jira/browse/SPARK-14331
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 2.0.0
> Reporter: Thomas Graves
> Priority: Critical
>
> I'm trying to use master and write to a parquet file when using a dataframe
> but am seeing the exception below. Not sure exact state of dataframes right
> now so if this is known issue let me know.
> I read 2 sources of parquet files, joined them, then saved them back.
> val df_pixels = sqlContext.read.parquet("data1")
> val df_pixels_renamed = df_pixels.withColumnRenamed("photo_id",
> "pixels_photo_id")
> val df_meta = sqlContext.read.parquet("data2")
> val df = df_meta.as("meta").join(df_pixels_renamed, $"meta.photo_id" ===
> $"pixels_photo_id", "inner").drop("pixels_photo_id")
> df.write.parquet(args(0))
> 16/04/01 17:21:34 ERROR InsertIntoHadoopFsRelation: Aborting job.
> org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
> Exchange hashpartitioning(pixels_photo_id#3, 20000), None
> +- WholeStageCodegen
> : +- Filter isnotnull(pixels_photo_id#3)
> : +- INPUT
> +- Coalesce 0
> +- WholeStageCodegen
> : +- Project [img_data#0,photo_id#1 AS pixels_photo_id#3]
> : +- Scan HadoopFiles[img_data#0,photo_id#1] Format:
> ParquetFormat, PushedFilters: [], ReadSchema:
> struct<img_data:binary,photo_id:string>
> at
> org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:47)
> at
> org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:109)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:137)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> at
> org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:134)
> at
> org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:117)
> at
> org.apache.spark.sql.execution.InputAdapter.upstreams(WholeStageCodegen.scala:236)
> at org.apache.spark.sql.execution.Sort.upstreams(Sort.scala:104)
> at
> org.apache.spark.sql.execution.WholeStageCodegen.doExecute(WholeStageCodegen.scala:351)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:137)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> at
> org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:134)
> at
> org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:117)
> at
> org.apache.spark.sql.execution.InputAdapter.doExecute(WholeStageCodegen.scala:228)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:118)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:137)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> at
> org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:134)
> at
> org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:117)
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]