mahesh2247 commented on issue #7688:
URL: https://github.com/apache/hudi/issues/7688#issuecomment-1386594913

   Resulting in
   ```
   23/01/18 06:44:20 ERROR ProcessLauncher: Error from Python:Traceback (most 
recent call last):
     File "/tmp/glue_job_script.py", line 77, in <module>
       glueContext.forEachBatch(frame = data_frame_DataSource0, batch_function 
= processBatch, options = {"windowSize": "10 seconds", "checkpointLocation":  
s3_path_spark})
     File "/opt/amazon/lib/python3.6/site-packages/awsglue/context.py", line 
678, in forEachBatch
       raise e
     File "/opt/amazon/lib/python3.6/site-packages/awsglue/context.py", line 
668, in forEachBatch
       query.start().awaitTermination()
     File "/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/streaming.py", 
line 101, in awaitTermination
       return self._jsq.awaitTermination()
     File 
"/opt/amazon/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 
1305, in __call__
       answer, self.gateway_client, self.target_id, self.name)
     File "/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 
117, in deco
       raise converted from None
   pyspark.sql.utils.StreamingQueryException: An exception was raised by the 
Python Proxy. Return Message: Traceback (most recent call last):
     File 
"/opt/amazon/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 
2442, in _call_proxy
       return_value = getattr(self.pool[obj_id], method)(*params)
     File "/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 
196, in call
       raise e
     File "/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 
193, in call
       self.func(DataFrame(jdf, self.sql_ctx), batch_id)
     File "/opt/amazon/lib/python3.6/site-packages/awsglue/context.py", line 
653, in batch_function_with_persist
       batch_function(data_frame, batchId)
     File "/tmp/glue_job_script.py", line 69, in processBatch
       inputDf = inputDf.filter("eventName !='REMOVE' AND id !='
   {}
   '".format(abc['id']))
     File "/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/dataframe.py", 
line 1717, in filter
       jdf = self._jdf.filter(condition)
     File 
"/opt/amazon/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 
1305, in __call__
       answer, self.gateway_client, self.target_id, self.name)
     File "/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 
117, in deco
       raise converted from None
   pyspark.sql.utils.ParseException: 
   extraneous input ''>'' expecting <EOF>(line 1, pos 41)
   
   == SQL ==
   eventName !='REMOVE' AND id !='Column<'id'>'
   -----------------------------------------^^^
   
   
   === Streaming Query ===
   
   
   Identifier: [id = 35151fa1-0206-4f5d-b966-d79ead941926, runId = 
e1e910b1-7a33-49e0-ac03-7fada17d6138]Current Committed Offsets: {}Current 
Available Offsets: {KinesisSource[hudi_demo_stream_424f5840]: 
{"metadata":{"streamName":"hudi_demo_stream_424f5840","batchId":"0"},"shardId-000000000000":{"iteratorType":"TRIM_HORIZON","iteratorPosition":""}}}Current
 State: ACTIVEThread State: RUNNABLELogical Plan:Project [cast(data#18 as 
string) AS $json$data_infer_schema$_temporary$#27]+- Project [UDF(data#5) AS 
data#18, streamName#6, partitionKey#7, sequenceNumber#8, 
approximateArrivalTimestamp#9]   +- StreamingExecutionRelation 
KinesisSource[hudi_demo_stream_424f5840], [data#5, streamName#6, 
partitionKey#7, sequenceNumber#8, approximateArrivalTimestamp#9] | 23/01/18 
06:44:20 ERROR ProcessLauncher: Error from Python:Traceback (most recent call 
last): File "/tmp/glue_job_script.py", line 77, in <module> 
glueContext.forEachBatch(frame = data_frame_DataSource0, batch_function = 
processBatch, opt
 ions = {"windowSize": "10 seconds", "checkpointLocation": s3_path_spark}) File 
"/opt/amazon/lib/python3.6/site-packages/awsglue/context.py", line 678, in 
forEachBatch raise e File 
"/opt/amazon/lib/python3.6/site-packages/awsglue/context.py", line 668, in 
forEachBatch query.start().awaitTermination() File 
"/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/streaming.py", line 101, 
in awaitTermination return self._jsq.awaitTermination() File 
"/opt/amazon/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 
1305, in __call__ answer, self.gateway_client, self.target_id, self.name) File 
"/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 117, in 
deco raise converted from None pyspark.sql.utils.StreamingQueryException: An 
exception was raised by the Python Proxy. Return Message: Traceback (most 
recent call last): File 
"/opt/amazon/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 
2442, in _call_proxy return_value = getattr(self.pool[obj_id], 
 method)(*params) File 
"/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 196, in 
call raise e File 
"/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 193, in 
call self.func(DataFrame(jdf, self.sql_ctx), batch_id) File 
"/opt/amazon/lib/python3.6/site-packages/awsglue/context.py", line 653, in 
batch_function_with_persist batch_function(data_frame, batchId) File 
"/tmp/glue_job_script.py", line 69, in processBatch inputDf = 
inputDf.filter("eventName !='REMOVE' AND id !='{}'".format(abc['id'])) File 
"/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/dataframe.py", line 1717, 
in filter jdf = self._jdf.filter(condition) File 
"/opt/amazon/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 
1305, in __call__ answer, self.gateway_client, self.target_id, self.name) File 
"/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 117, in 
deco raise converted from None pyspark.sql.utils.ParseException: extraneous 
input ''>'' expecti
 ng <EOF>(line 1, pos 41) == SQL == eventName !='REMOVE' AND id 
!='Column<'id'>' -----------------------------------------^^^ === Streaming 
Query === Identifier: [id = 35151fa1-0206-4f5d-b966-d79ead941926, runId = 
e1e910b1-7a33-49e0-ac03-7fada17d6138] Current Committed Offsets: {} Current 
Available Offsets: {KinesisSource[hudi_demo_stream_424f5840]: 
{"metadata":{"streamName":"hudi_demo_stream_424f5840","batchId":"0"},"shardId-000000000000":{"iteratorType":"TRIM_HORIZON","iteratorPosition":""}}}
 Current State: ACTIVE Thread State: RUNNABLE Logical Plan: Project 
[cast(data#18 as string) AS $json$data_infer_schema$_temporary$#27] +- Project 
[UDF(data#5) AS data#18, streamName#6, partitionKey#7, sequenceNumber#8, 
approximateArrivalTimestamp#9] +- StreamingExecutionRelation 
KinesisSource[hudi_demo_stream_424f5840], [data#5, streamName#6, 
partitionKey#7, sequenceNumber#8, approximateArrivalTimestamp#9]
   -- | --
   
   
   
   
   
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to