harishchanderramesh commented on issue #2089:
URL: https://github.com/apache/hudi/issues/2089#issuecomment-694364558


   Hi,
   Sorry for the delay in response.
   I Tried with `s3://` as @umehrot2 suggested and got below error.
   
   ```
   Traceback (most recent call last):
     File "test_emr_kafka_indigo_persist_hudi_endpoints.py", line 572, in 
<module>
       main()
     File "test_emr_kafka_indigo_persist_hudi_endpoints.py", line 568, in main
       startStream(resumeStream)
     File "test_emr_kafka_indigo_persist_hudi_endpoints.py", line 539, in 
startStream
       stream.awaitTermination()
     File 
"/mnt/yarn/usercache/hadoop/appcache/application_1592910106194_136013/container_1592910106194_136013_01_000001/pyspark.zip/pyspark/streaming/context.py",
 line 192, in awaitTermination
     File 
"/mnt/yarn/usercache/hadoop/appcache/application_1592910106194_136013/container_1592910106194_136013_01_000001/py4j-0.10.7-src.zip/py4j/java_gateway.py",
 line 1257, in __call__
     File 
"/mnt/yarn/usercache/hadoop/appcache/application_1592910106194_136013/container_1592910106194_136013_01_000001/pyspark.zip/pyspark/sql/utils.py",
 line 63, in deco
     File 
"/mnt/yarn/usercache/hadoop/appcache/application_1592910106194_136013/container_1592910106194_136013_01_000001/py4j-0.10.7-src.zip/py4j/protocol.py",
 line 328, in get_return_value
   py4j.protocol.Py4JJavaError: An error occurred while calling 
o118.awaitTermination.
   : org.apache.spark.SparkException: An exception was raised by Python:
   Traceback (most recent call last):
     File 
"/mnt/yarn/usercache/hadoop/appcache/application_1592910106194_136013/container_1592910106194_136013_01_000001/pyspark.zip/pyspark/streaming/util.py",
 line 68, in call
       r = self.func(t, *rdds)
     File 
"/mnt/yarn/usercache/hadoop/appcache/application_1592910106194_136013/container_1592910106194_136013_01_000001/pyspark.zip/pyspark/streaming/kafka.py",
 line 403, in <lambda>
       func = lambda r, rdd: old_func(rdd)
     File "test_emr_kafka_indigo_persist_hudi_endpoints.py", line 322, in 
f_process_stream_rdd
       raise e
   Py4JJavaError: An error occurred while calling o1433.save.
   : java.io.IOException: Cannot create S3 buffer directory: 
/mnt/s3,/mnt1/s3,/mnt2/s3,/mnt3/s3
        at 
org.apache.hadoop.fs.s3.S3OutputStream.newBackupFile(S3OutputStream.java:95)
        at org.apache.hadoop.fs.s3.S3OutputStream.<init>(S3OutputStream.java:85)
        at org.apache.hadoop.fs.s3.S3FileSystem.create(S3FileSystem.java:289)
        at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:932)
        at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:913)
        at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:810)
        at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:799)
        at 
org.apache.hudi.common.table.HoodieTableConfig.createHoodieProperties(HoodieTableConfig.java:116)
        at 
org.apache.hudi.common.table.HoodieTableMetaClient.initTableAndGetMetaClient(HoodieTableMetaClient.java:375)
        at 
org.apache.hudi.common.table.HoodieTableMetaClient.initTableType(HoodieTableMetaClient.java:332)
        at 
org.apache.hudi.common.table.HoodieTableMetaClient.initTableType(HoodieTableMetaClient.java:303)
        at 
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:130)
        at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:108)
        at 
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:173)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:169)
        at 
org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:197)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at 
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:194)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:169)
        at 
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:114)
        at 
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:112)
        at 
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676)
        at 
org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676)
        at 
org.apache.spark.sql.execution.SQLExecution$.org$apache$spark$sql$execution$SQLExecution$$executeQuery$1(SQLExecution.scala:83)
        at 
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1$$anonfun$apply$1.apply(SQLExecution.scala:94)
        at 
org.apache.spark.sql.execution.QueryExecutionMetrics$.withMetrics(QueryExecutionMetrics.scala:141)
        at 
org.apache.spark.sql.execution.SQLExecution$.org$apache$spark$sql$execution$SQLExecution$$withMetrics(SQLExecution.scala:178)
        at 
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:93)
        at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:200)
        at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:92)
        at 
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676)
        at 
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:285)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:229)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
        at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
        at py4j.Gateway.invoke(Gateway.java:282)
        at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
        at py4j.commands.CallCommand.execute(CallCommand.java:79)
        at py4j.GatewayConnection.run(GatewayConnection.java:238)
        at java.lang.Thread.run(Thread.java:748)
   
   
        at 
org.apache.spark.streaming.api.python.TransformFunction.callPythonTransformFunction(PythonDStream.scala:95)
        at 
org.apache.spark.streaming.api.python.TransformFunction.apply(PythonDStream.scala:78)
        at 
org.apache.spark.streaming.api.python.PythonDStream$$anonfun$callForeachRDD$1.apply(PythonDStream.scala:179)
        at 
org.apache.spark.streaming.api.python.PythonDStream$$anonfun$callForeachRDD$1.apply(PythonDStream.scala:179)
        at 
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:51)
        at 
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51)
        at 
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51)
        at 
org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:416)
        at 
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:50)
        at 
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:50)
        at 
org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:50)
        at scala.util.Try$.apply(Try.scala:192)
        at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
        at 
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:257)
        at 
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:257)
        at 
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:257)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
        at 
org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:256)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
   
   ```
   
   Thanks,
   Harish


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to