[GitHub] [hudi] lshg opened a new issue #2490: spark read hudi data from hive

GitBox Mon, 25 Jan 2021 19:39:11 -0800


lshg opened a new issue #2490:
URL: https://github.com/apache/hudi/issues/2490



   package com.gjr.recommend
   
   import org.apache.spark.sql.hive.HiveContext
   import org.apache.spark.sql.{Row, SparkSession}
   import org.apache.spark.{SparkConf, SparkContext}
   
   object DWDTenderLog {
     def main(args: Array[String]): Unit = {
   
   
   
       val conf = new 
SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[2]").set("spark.executor.memory",
 "512m")
       val sc: SparkContext = new SparkContext(conf)
   
       val spark: SparkSession = 
SparkSession.builder().config(conf).getOrCreate()
   
   
       val hc = new HiveContext(sc)
       hc.setConf("spark.sql.crossJoin.enabled","true");
   
   
       val tenderLog: Array[Row] = hc.sql(
         """
           |  SELECT
           |projectid,
           |provinceid,
           |typeId,
           |tender_tag
           |FROM
           |(
           |SELECT
           |projectid,
           |provinceid,
           |typeId,
           |antistop
           |FROM
           |app.dwd_recommend_tender_ds
           |WHERE
           |createTime >= 1608280608479 AND createTime <= 1611628847000
           |AND antistop != ''
           |GROUP BY
           |projectid,
           |provinceid,
           |typeId,
           |antistop
           |) AS a lateral VIEW explode (split(antistop, "#")) table_tmp AS 
tender_tag
           """.stripMargin).collect()
   
   
       println(tenderLog.toBuffer)
   
   
       sc.stop()
   
     }
   }
   
   0    [main] INFO  org.apache.spark.SparkContext  - Running Spark version 
2.4.7
   346  [main] INFO  org.apache.spark.SparkContext  - Submitted application: 
DWDTenderLog$
   390  [main] INFO  org.apache.spark.SecurityManager  - Changing view acls to: 
lsh
   390  [main] INFO  org.apache.spark.SecurityManager  - Changing modify acls 
to: lsh
   390  [main] INFO  org.apache.spark.SecurityManager  - Changing view acls 
groups to: 
   390  [main] INFO  org.apache.spark.SecurityManager  - Changing modify acls 
groups to: 
   391  [main] INFO  org.apache.spark.SecurityManager  - SecurityManager: 
authentication disabled; ui acls disabled; users  with view permissions: 
Set(lsh); groups with view permissions: Set(); users  with modify permissions: 
Set(lsh); groups with modify permissions: Set()
   2533 [main] INFO  org.apache.spark.util.Utils  - Successfully started 
service 'sparkDriver' on port 54347.
   2575 [main] INFO  org.apache.spark.SparkEnv  - Registering MapOutputTracker
   2588 [main] INFO  org.apache.spark.SparkEnv  - Registering BlockManagerMaster
   2589 [main] INFO  org.apache.spark.storage.BlockManagerMasterEndpoint  - 
Using org.apache.spark.storage.DefaultTopologyMapper for getting topology 
information
   2590 [main] INFO  org.apache.spark.storage.BlockManagerMasterEndpoint  - 
BlockManagerMasterEndpoint up
   2596 [main] INFO  org.apache.spark.storage.DiskBlockManager  - Created local 
directory at 
C:\Users\lsh\AppData\Local\Temp\blockmgr-d134fb11-0552-4b4b-8f20-ea7e04fd086d
   2609 [main] INFO  org.apache.spark.storage.memory.MemoryStore  - MemoryStore 
started with capacity 1979.1 MB
   2619 [main] INFO  org.apache.spark.SparkEnv  - Registering 
OutputCommitCoordinator
   2675 [main] INFO  org.spark_project.jetty.util.log  - Logging initialized 
@23630ms
   2720 [main] INFO  org.spark_project.jetty.server.Server  - 
jetty-9.3.z-SNAPSHOT, build timestamp: 2019-02-16T00:53:49+08:00, git hash: 
eb70b240169fcf1abbd86af36482d1c49826fa0b
   2731 [main] INFO  org.spark_project.jetty.server.Server  - Started @23687ms
   2747 [main] INFO  org.spark_project.jetty.server.AbstractConnector  - 
Started ServerConnector@4d63b624{HTTP/1.1,[http/1.1]}{0.0.0.0:4040}
   2747 [main] INFO  org.apache.spark.util.Utils  - Successfully started 
service 'SparkUI' on port 4040.
   2767 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started o.s.j.s.ServletContextHandler@27eb3298{/jobs,null,AVAILABLE,@Spark}
   2768 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started o.s.j.s.ServletContextHandler@1b58ff9e{/jobs/json,null,AVAILABLE,@Spark}
   2768 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started o.s.j.s.ServletContextHandler@2f66e802{/jobs/job,null,AVAILABLE,@Spark}
   2769 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@76318a7d{/jobs/job/json,null,AVAILABLE,@Spark}
   2770 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started o.s.j.s.ServletContextHandler@2a492f2a{/stages,null,AVAILABLE,@Spark}
   2770 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@3277e499{/stages/json,null,AVAILABLE,@Spark}
   2771 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@585811a4{/stages/stage,null,AVAILABLE,@Spark}
   2772 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@4c4d362a{/stages/stage/json,null,AVAILABLE,@Spark}
   2773 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@5400db36{/stages/pool,null,AVAILABLE,@Spark}
   2773 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@76b74e9c{/stages/pool/json,null,AVAILABLE,@Spark}
   2774 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started o.s.j.s.ServletContextHandler@2d72f75e{/storage,null,AVAILABLE,@Spark}
   2775 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@8ab78bc{/storage/json,null,AVAILABLE,@Spark}
   2776 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@5aa0dbf4{/storage/rdd,null,AVAILABLE,@Spark}
   2776 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@16afbd92{/storage/rdd/json,null,AVAILABLE,@Spark}
   2777 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@2c5d601e{/environment,null,AVAILABLE,@Spark}
   2777 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@7fe083b1{/environment/json,null,AVAILABLE,@Spark}
   2777 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started o.s.j.s.ServletContextHandler@23c388c2{/executors,null,AVAILABLE,@Spark}
   2778 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@486be205{/executors/json,null,AVAILABLE,@Spark}
   2778 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@f713686{/executors/threadDump,null,AVAILABLE,@Spark}
   2778 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@74f7d1d2{/executors/threadDump/json,null,AVAILABLE,@Spark}
   2783 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started o.s.j.s.ServletContextHandler@4b4dd216{/static,null,AVAILABLE,@Spark}
   2784 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started o.s.j.s.ServletContextHandler@54afd745{/,null,AVAILABLE,@Spark}
   2786 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started o.s.j.s.ServletContextHandler@677dbd89{/api,null,AVAILABLE,@Spark}
   2787 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@2ca47471{/jobs/job/kill,null,AVAILABLE,@Spark}
   2788 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@5a021cb9{/stages/stage/kill,null,AVAILABLE,@Spark}
   2790 [main] INFO  org.apache.spark.ui.SparkUI  - Bound SparkUI to 0.0.0.0, 
and started at http://DESKTOP-E6TA5L3:4040
   2873 [main] INFO  org.apache.spark.executor.Executor  - Starting executor ID 
driver on host localhost
   2921 [main] INFO  org.apache.spark.util.Utils  - Successfully started 
service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 
54366.
   2922 [main] INFO  org.apache.spark.network.netty.NettyBlockTransferService  
- Server created on DESKTOP-E6TA5L3:54366
   2923 [main] INFO  org.apache.spark.storage.BlockManager  - Using 
org.apache.spark.storage.RandomBlockReplicationPolicy for block replication 
policy
   2939 [main] INFO  org.apache.spark.storage.BlockManagerMaster  - Registering 
BlockManager BlockManagerId(driver, DESKTOP-E6TA5L3, 54366, None)
   2942 [dispatcher-event-loop-0] INFO  
org.apache.spark.storage.BlockManagerMasterEndpoint  - Registering block 
manager DESKTOP-E6TA5L3:54366 with 1979.1 MB RAM, BlockManagerId(driver, 
DESKTOP-E6TA5L3, 54366, None)
   2945 [main] INFO  org.apache.spark.storage.BlockManagerMaster  - Registered 
BlockManager BlockManagerId(driver, DESKTOP-E6TA5L3, 54366, None)
   2945 [main] INFO  org.apache.spark.storage.BlockManager  - Initialized 
BlockManager: BlockManagerId(driver, DESKTOP-E6TA5L3, 54366, None)
   3086 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@466d49f0{/metrics/json,null,AVAILABLE,@Spark}
   3109 [main] WARN  org.apache.spark.SparkContext  - Using an existing 
SparkContext; some configuration may not take effect.
   3244 [main] INFO  org.apache.spark.sql.internal.SharedState  - loading hive 
config file: 
file:/D:/GJR_PROJECT/tt/tenderRecommend/target/classes/hive-site.xml
   3279 [main] INFO  org.apache.spark.sql.internal.SharedState  - 
spark.sql.warehouse.dir is not set, but hive.metastore.warehouse.dir is set. 
Setting spark.sql.warehouse.dir to the value of hive.metastore.warehouse.dir 
('/user/hive/warehouse').
   3279 [main] INFO  org.apache.spark.sql.internal.SharedState  - Warehouse 
path is '/user/hive/warehouse'.
   3285 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started o.s.j.s.ServletContextHandler@796065aa{/SQL,null,AVAILABLE,@Spark}
   3286 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started o.s.j.s.ServletContextHandler@28a6301f{/SQL/json,null,AVAILABLE,@Spark}
   3286 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@1436a7ab{/SQL/execution,null,AVAILABLE,@Spark}
   3286 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@3b7b05a8{/SQL/execution/json,null,AVAILABLE,@Spark}
   3287 [main] INFO  org.spark_project.jetty.server.handler.ContextHandler  - 
Started 
o.s.j.s.ServletContextHandler@336365bc{/static/sql,null,AVAILABLE,@Spark}
   3707 [main] INFO  
org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorRef  - 
Registered StateStoreCoordinator endpoint
   4072 [main] INFO  org.apache.spark.sql.hive.HiveUtils  - Initializing 
HiveMetastoreConnection version 1.2.1 using Spark classes.
   4479 [main] WARN  org.apache.hadoop.hive.conf.HiveConf  - HiveConf of name 
hive.server2.webui.port does not exist
   4479 [main] WARN  org.apache.hadoop.hive.conf.HiveConf  - HiveConf of name 
hive.server2.webui.host does not exist
   4584 [main] INFO  hive.metastore  - Trying to connect to metastore with URI 
thrift://t1:9083
   4759 [main] INFO  hive.metastore  - Connected to metastore.
   8437 [main] INFO  org.apache.hadoop.hive.ql.session.SessionState  - Created 
local directory: 
C:/Users/lsh/AppData/Local/Temp/99f90c54-0932-45d6-924a-b4cdd357db61_resources
   8477 [main] INFO  org.apache.hadoop.hive.ql.session.SessionState  - Created 
HDFS directory: /user/hive/tmp/lsh/99f90c54-0932-45d6-924a-b4cdd357db61
   8496 [main] INFO  org.apache.hadoop.hive.ql.session.SessionState  - Created 
local directory: 
C:/Users/lsh/AppData/Local/Temp/lsh/99f90c54-0932-45d6-924a-b4cdd357db61
   8534 [main] INFO  org.apache.hadoop.hive.ql.session.SessionState  - Created 
HDFS directory: 
/user/hive/tmp/lsh/99f90c54-0932-45d6-924a-b4cdd357db61/_tmp_space.db
   8560 [main] INFO  org.apache.spark.sql.hive.client.HiveClientImpl  - 
Warehouse location for Hive client (version 1.2.2) is /user/hive/warehouse
   10106 [main] INFO  
org.apache.spark.sql.execution.datasources.FileSourceStrategy  - Pruning 
directories with: 
   10108 [main] INFO  
org.apache.spark.sql.execution.datasources.FileSourceStrategy  - Post-Scan 
Filters: isnotnull(createTime#25L),isnotnull(antistop#8),(createTime#25L >= 
1608280608479),(createTime#25L <= 1611628847000),NOT (antistop#8 = )
   10110 [main] INFO  
org.apache.spark.sql.execution.datasources.FileSourceStrategy  - Output Data 
Schema: struct<projectId: int, antistop: string, provinceId: int, typeId: int, 
createTime: bigint ... 3 more fields>
   10118 [main] INFO  org.apache.spark.sql.execution.FileSourceScanExec  - 
Pushed Filters: 
IsNotNull(createTime),IsNotNull(antistop),GreaterThanOrEqual(createTime,1608280608479),LessThanOrEqual(createTime,1611628847000),Not(EqualTo(antistop,))
   10165 [main] WARN  org.apache.spark.util.Utils  - Truncated the string 
representation of a plan since it was too large. This behavior can be adjusted 
by setting 'spark.debug.maxToStringFields' in SparkEnv.conf.
   11262 [main] INFO  
org.apache.spark.sql.execution.datasources.PrunedInMemoryFileIndex  - It took 
738 ms to list leaf files for 10 paths.
   11568 [Spark Context Cleaner] INFO  org.apache.spark.ContextCleaner  - 
Cleaned accumulator 0
   11593 [main] INFO  
org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator  - Code 
generated in 223.244 ms
   11751 [main] INFO  
org.apache.spark.sql.execution.aggregate.HashAggregateExec  - 
spark.sql.codegen.aggregate.map.twolevel.enabled is set to true, but current 
version of codegened fast hashmap does not support this aggregate.
   11842 [main] INFO  
org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator  - Code 
generated in 52.0291 ms
   11843 [main] INFO  
org.apache.spark.sql.execution.aggregate.HashAggregateExec  - 
spark.sql.codegen.aggregate.map.twolevel.enabled is set to true, but current 
version of codegened fast hashmap does not support this aggregate.
   11906 [main] INFO  
org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator  - Code 
generated in 38.8988 ms
   11968 [main] INFO  org.apache.spark.storage.memory.MemoryStore  - Block 
broadcast_0 stored as values in memory (estimated size 242.1 KB, free 1978.9 MB)
   12016 [main] INFO  org.apache.spark.storage.memory.MemoryStore  - Block 
broadcast_0_piece0 stored as bytes in memory (estimated size 23.6 KB, free 
1978.8 MB)
   12018 [dispatcher-event-loop-1] INFO  
org.apache.spark.storage.BlockManagerInfo  - Added broadcast_0_piece0 in memory 
on DESKTOP-E6TA5L3:54366 (size: 23.6 KB, free: 1979.1 MB)
   12020 [main] INFO  org.apache.spark.SparkContext  - Created broadcast 0 from 
collect at DWDTenderLog.scala:54
   12024 [main] INFO  org.apache.spark.sql.execution.FileSourceScanExec  - 
Planning scan with bin packing, max size: 49910044 bytes, open cost is 
considered as scanning 4194304 bytes.
   12175 [main] INFO  org.apache.spark.SparkContext  - Starting job: collect at 
DWDTenderLog.scala:54
   12189 [dag-scheduler-event-loop] INFO  
org.apache.spark.scheduler.DAGScheduler  - Registering RDD 2 (collect at 
DWDTenderLog.scala:54) as input to shuffle 0
   12191 [dag-scheduler-event-loop] INFO  
org.apache.spark.scheduler.DAGScheduler  - Got job 0 (collect at 
DWDTenderLog.scala:54) with 200 output partitions
   12191 [dag-scheduler-event-loop] INFO  
org.apache.spark.scheduler.DAGScheduler  - Final stage: ResultStage 1 (collect 
at DWDTenderLog.scala:54)
   12192 [dag-scheduler-event-loop] INFO  
org.apache.spark.scheduler.DAGScheduler  - Parents of final stage: 
List(ShuffleMapStage 0)
   12194 [dag-scheduler-event-loop] INFO  
org.apache.spark.scheduler.DAGScheduler  - Missing parents: 
List(ShuffleMapStage 0)
   12198 [dag-scheduler-event-loop] INFO  
org.apache.spark.scheduler.DAGScheduler  - Submitting ShuffleMapStage 0 
(MapPartitionsRDD[2] at collect at DWDTenderLog.scala:54), which has no missing 
parents
   12214 [dag-scheduler-event-loop] INFO  
org.apache.spark.storage.memory.MemoryStore  - Block broadcast_1 stored as 
values in memory (estimated size 25.4 KB, free 1978.8 MB)
   12217 [dag-scheduler-event-loop] INFO  
org.apache.spark.storage.memory.MemoryStore  - Block broadcast_1_piece0 stored 
as bytes in memory (estimated size 10.8 KB, free 1978.8 MB)
   12218 [dispatcher-event-loop-0] INFO  
org.apache.spark.storage.BlockManagerInfo  - Added broadcast_1_piece0 in memory 
on DESKTOP-E6TA5L3:54366 (size: 10.8 KB, free: 1979.1 MB)
   12218 [dag-scheduler-event-loop] INFO  org.apache.spark.SparkContext  - 
Created broadcast 1 from broadcast at DAGScheduler.scala:1184
   12227 [dag-scheduler-event-loop] INFO  
org.apache.spark.scheduler.DAGScheduler  - Submitting 2 missing tasks from 
ShuffleMapStage 0 (MapPartitionsRDD[2] at collect at DWDTenderLog.scala:54) 
(first 15 tasks are for partitions Vector(0, 1))
   12228 [dag-scheduler-event-loop] INFO  
org.apache.spark.scheduler.TaskSchedulerImpl  - Adding task set 0.0 with 2 tasks
   12253 [dispatcher-event-loop-1] INFO  
org.apache.spark.scheduler.TaskSetManager  - Starting task 0.0 in stage 0.0 
(TID 0, localhost, executor driver, partition 0, ANY, 10221 bytes)
   12254 [dispatcher-event-loop-1] INFO  
org.apache.spark.scheduler.TaskSetManager  - Starting task 1.0 in stage 0.0 
(TID 1, localhost, executor driver, partition 1, ANY, 10288 bytes)
   12260 [Executor task launch worker for task 1] INFO  
org.apache.spark.executor.Executor  - Running task 1.0 in stage 0.0 (TID 1)
   12260 [Executor task launch worker for task 0] INFO  
org.apache.spark.executor.Executor  - Running task 0.0 in stage 0.0 (TID 0)
   12341 [Executor task launch worker for task 1] INFO  
org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator  - Code 
generated in 20.998 ms
   12361 [Executor task launch worker for task 0] INFO  
org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator  - Code 
generated in 7.7712 ms
   12375 [Executor task launch worker for task 0] INFO  
org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator  - Code 
generated in 10.5504 ms
   12388 [Executor task launch worker for task 1] INFO  
org.apache.spark.sql.execution.datasources.FileScanRDD  - Reading File path: 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536800-8517888_20210126111108.parquet,
 range: 0-791226, partition values: [2021-01-26]
   12388 [Executor task launch worker for task 0] INFO  
org.apache.spark.sql.execution.datasources.FileScanRDD  - Reading File path: 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/23/bf784cc0-1918-4ddd-8145-9de3f840c558-0_0-1553263-3646025_20210124000006.parquet,
 range: 0-1673011, partition values: [2021-01-23]
   14219 [Executor task launch worker for task 1] INFO  
org.apache.parquet.filter2.compat.FilterCompat  - Filtering using predicate: 
and(and(and(and(noteq(createTime, null), noteq(antistop, null)), 
gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), 
noteq(antistop, Binary{""}))
   14219 [Executor task launch worker for task 0] INFO  
org.apache.parquet.filter2.compat.FilterCompat  - Filtering using predicate: 
and(and(and(and(noteq(createTime, null), noteq(antistop, null)), 
gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), 
noteq(antistop, Binary{""}))
   15102 [Executor task launch worker for task 0] INFO  
org.apache.parquet.filter2.compat.FilterCompat  - Filtering using predicate: 
and(and(and(and(noteq(createTime, null), noteq(antistop, null)), 
gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), 
noteq(antistop, Binary{""}))
   15262 [Executor task launch worker for task 0] INFO  
org.apache.parquet.filter2.compat.FilterCompat  - Filtering using predicate: 
and(and(and(and(noteq(createTime, null), noteq(antistop, null)), 
gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), 
noteq(antistop, Binary{""}))
   15594 [Executor task launch worker for task 0] INFO  
org.apache.hadoop.io.compress.zlib.ZlibFactory  - Successfully loaded & 
initialized native-zlib library
   15594 [Executor task launch worker for task 0] INFO  
org.apache.hadoop.io.compress.CodecPool  - Got brand-new decompressor [.gz]
   16218 [Executor task launch worker for task 1] INFO  
org.apache.parquet.filter2.compat.FilterCompat  - Filtering using predicate: 
and(and(and(and(noteq(createTime, null), noteq(antistop, null)), 
gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), 
noteq(antistop, Binary{""}))
   16439 [Executor task launch worker for task 1] INFO  
org.apache.parquet.filter2.compat.FilterCompat  - Filtering using predicate: 
and(and(and(and(noteq(createTime, null), noteq(antistop, null)), 
gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), 
noteq(antistop, Binary{""}))
   16943 [Executor task launch worker for task 1] INFO  
org.apache.hadoop.io.compress.CodecPool  - Got brand-new decompressor [.gz]
   18239 [Executor task launch worker for task 0] INFO  
org.apache.spark.sql.execution.datasources.FileScanRDD  - Reading File path: 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/24/7a98c96a-fdeb-47a3-98a3-e2c4b4d6ec81-0_0-2634501-6263163_20210125000002.parquet,
 range: 0-1538381, partition values: [2021-01-24]
   18679 [Executor task launch worker for task 1] INFO  
org.apache.spark.sql.execution.datasources.FileScanRDD  - Reading File path: 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536707-8517665_20210126111054.parquet,
 range: 0-791122, partition values: [2021-01-26]
   20217 [Executor task launch worker for task 0] INFO  
org.apache.parquet.filter2.compat.FilterCompat  - Filtering using predicate: 
and(and(and(and(noteq(createTime, null), noteq(antistop, null)), 
gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), 
noteq(antistop, Binary{""}))
   20400 [Executor task launch worker for task 1] INFO  
org.apache.parquet.filter2.compat.FilterCompat  - Filtering using predicate: 
and(and(and(and(noteq(createTime, null), noteq(antistop, null)), 
gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), 
noteq(antistop, Binary{""}))
   21685 [Executor task launch worker for task 0] INFO  
org.apache.parquet.filter2.compat.FilterCompat  - Filtering using predicate: 
and(and(and(and(noteq(createTime, null), noteq(antistop, null)), 
gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), 
noteq(antistop, Binary{""}))
   21988 [Executor task launch worker for task 0] INFO  
org.apache.parquet.filter2.compat.FilterCompat  - Filtering using predicate: 
and(and(and(and(noteq(createTime, null), noteq(antistop, null)), 
gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), 
noteq(antistop, Binary{""}))
   22055 [Executor task launch worker for task 1] INFO  
org.apache.parquet.filter2.compat.FilterCompat  - Filtering using predicate: 
and(and(and(and(noteq(createTime, null), noteq(antistop, null)), 
gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), 
noteq(antistop, Binary{""}))
   22148 [Executor task launch worker for task 1] INFO  
org.apache.parquet.filter2.compat.FilterCompat  - Filtering using predicate: 
and(and(and(and(noteq(createTime, null), noteq(antistop, null)), 
gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), 
noteq(antistop, Binary{""}))
   24301 [Executor task launch worker for task 1] INFO  
org.apache.spark.sql.execution.datasources.FileScanRDD  - Reading File path: 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536622-8517450_20210126111049.parquet,
 range: 0-791045, partition values: [2021-01-26]
   24639 [Executor task launch worker for task 0] INFO  
org.apache.spark.sql.execution.datasources.FileScanRDD  - Reading File path: 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/25/3baf7087-95d2-4836-9a4c-3f5b4ced568c-0_0-3188990-7636378_20210125235941.parquet,
 range: 0-994567, partition values: [2021-01-25]
   25837 [Executor task launch worker for task 0] INFO  
org.apache.parquet.filter2.compat.FilterCompat  - Filtering using predicate: 
and(and(and(and(noteq(createTime, null), noteq(antistop, null)), 
gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), 
noteq(antistop, Binary{""}))
   26843 [Executor task launch worker for task 1] ERROR 
org.apache.spark.executor.Executor  - Exception in task 1.0 in stage 0.0 (TID 1)
   java.io.FileNotFoundException: File does not exist: 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536622-8517450_20210126111049.parquet
   It is possible the underlying files have been updated. You can explicitly 
invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in 
SQL or by recreating the Dataset/DataFrame involved.
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:127)
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177)
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithKeys_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
 Source)
        at 
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
        at 
org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
        at 
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
        at org.apache.spark.scheduler.Task.run(Task.scala:123)
        at 
org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
   26859 [task-result-getter-0] WARN  org.apache.spark.scheduler.TaskSetManager 
 - Lost task 1.0 in stage 0.0 (TID 1, localhost, executor driver): 
java.io.FileNotFoundException: File does not exist: 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536622-8517450_20210126111049.parquet
   It is possible the underlying files have been updated. You can explicitly 
invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in 
SQL or by recreating the Dataset/DataFrame involved.
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:127)
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177)
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithKeys_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
 Source)
        at 
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
        at 
org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
        at 
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
        at org.apache.spark.scheduler.Task.run(Task.scala:123)
        at 
org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
   
   26861 [task-result-getter-0] ERROR org.apache.spark.scheduler.TaskSetManager 
 - Task 1 in stage 0.0 failed 1 times; aborting job
   26866 [dag-scheduler-event-loop] INFO  
org.apache.spark.scheduler.TaskSchedulerImpl  - Cancelling stage 0
   26867 [dag-scheduler-event-loop] INFO  
org.apache.spark.scheduler.TaskSchedulerImpl  - Killing all running tasks in 
stage 0: Stage cancelled
   26869 [dispatcher-event-loop-0] INFO  org.apache.spark.executor.Executor  - 
Executor is trying to kill task 0.0 in stage 0.0 (TID 0), reason: Stage 
cancelled
   26869 [dag-scheduler-event-loop] INFO  
org.apache.spark.scheduler.TaskSchedulerImpl  - Stage 0 was cancelled
   26870 [dag-scheduler-event-loop] INFO  
org.apache.spark.scheduler.DAGScheduler  - ShuffleMapStage 0 (collect at 
DWDTenderLog.scala:54) failed in 14.660 s due to Job aborted due to stage 
failure: Task 1 in stage 0.0 failed 1 times, most recent failure: Lost task 1.0 
in stage 0.0 (TID 1, localhost, executor driver): 
java.io.FileNotFoundException: File does not exist: 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536622-8517450_20210126111049.parquet
   It is possible the underlying files have been updated. You can explicitly 
invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in 
SQL or by recreating the Dataset/DataFrame involved.
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:127)
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177)
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithKeys_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
 Source)
        at 
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
        at 
org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
        at 
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
        at org.apache.spark.scheduler.Task.run(Task.scala:123)
        at 
org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
   
   Driver stacktrace:
   26873 [main] INFO  org.apache.spark.scheduler.DAGScheduler  - Job 0 failed: 
collect at DWDTenderLog.scala:54, took 14.697048 s
   26879 [Thread-1] INFO  org.apache.spark.SparkContext  - Invoking stop() from 
shutdown hook
   26885 [Thread-1] INFO  org.spark_project.jetty.server.AbstractConnector  - 
Stopped Spark@4d63b624{HTTP/1.1,[http/1.1]}{0.0.0.0:4040}
   26887 [Thread-1] INFO  org.apache.spark.ui.SparkUI  - Stopped Spark web UI 
at http://DESKTOP-E6TA5L3:4040
   26896 [dispatcher-event-loop-1] INFO  
org.apache.spark.MapOutputTrackerMasterEndpoint  - 
MapOutputTrackerMasterEndpoint stopped!
   Exception in thread "main" org.apache.spark.SparkException: Job aborted due 
to stage failure: Task 1 in stage 0.0 failed 1 times, most recent failure: Lost 
task 1.0 in stage 0.0 (TID 1, localhost, executor driver): 
java.io.FileNotFoundException: File does not exist: 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536622-8517450_20210126111049.parquet
   It is possible the underlying files have been updated. You can explicitly 
invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in 
SQL or by recreating the Dataset/DataFrame involved.
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:127)
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177)
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithKeys_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
 Source)
        at 
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
        at 
org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
        at 
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
        at org.apache.spark.scheduler.Task.run(Task.scala:123)
        at 
org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
   
   Driver stacktrace:
        at 
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1925)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1913)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1912)
        at 
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
        at 
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1912)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:948)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:948)
        at scala.Option.foreach(Option.scala:257)
        at 
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:948)
        at 
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2146)
        at 
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2095)
        at 
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2084)
        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
        at 
org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:759)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)
        at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:990)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
        at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
        at org.apache.spark.rdd.RDD.collect(RDD.scala:989)
        at 
org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:299)
        at 
org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3389)
        at 
org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2788)
        at 
org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2788)
        at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
        at 
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
        at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
        at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
        at 
org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withAction(Dataset.scala:3369)
        at org.apache.spark.sql.Dataset.collect(Dataset.scala:2788)
        at com.gjr.recommend.DWDTenderLog$.main(DWDTenderLog.scala:54)
        at com.gjr.recommend.DWDTenderLog.main(DWDTenderLog.scala)
   Caused by: java.io.FileNotFoundException: File does not exist: 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536622-8517450_20210126111049.parquet
   It is possible the underlying files have been updated. You can explicitly 
invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in 
SQL or by recreating the Dataset/DataFrame involved.
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:127)
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177)
        at 
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithKeys_0$(Unknown
 Source)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown
 Source)
        at 
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
        at 
org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
        at 
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
        at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
        at org.apache.spark.scheduler.Task.run(Task.scala:123)
        at 
org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
   26908 [Thread-1] INFO  org.apache.spark.storage.memory.MemoryStore  - 
MemoryStore cleared
   26908 [Thread-1] INFO  org.apache.spark.storage.BlockManager  - BlockManager 
stopped
   26910 [Thread-1] INFO  org.apache.spark.storage.BlockManagerMaster  - 
BlockManagerMaster stopped
   26911 [dispatcher-event-loop-0] INFO  
org.apache.spark.scheduler.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint
  - OutputCommitCoordinator stopped!
   26915 [Thread-1] INFO  org.apache.spark.SparkContext  - Successfully stopped 
SparkContext
   26916 [Thread-1] INFO  org.apache.spark.util.ShutdownHookManager  - Shutdown 
hook called
   26916 [Thread-1] INFO  org.apache.spark.util.ShutdownHookManager  - Deleting 
directory 
C:\Users\lsh\AppData\Local\Temp\spark-b9c30c48-0f2d-46ff-adb2-44e702a4dc5a
   26919 [Executor task launch worker for task 0] INFO  
org.apache.parquet.filter2.compat.FilterCompat  - Filtering using predicate: 
and(and(and(and(noteq(createTime, null), noteq(antistop, null)), 
gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), 
noteq(antistop, Binary{""}))
   
   Process finished with exit code 1
   
   
   hdfs data~~~~~~~~~~~~~~~~~~~~~~~~~
   
   [root@t1 ~]# hdfs dfs -ls 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/
   SLF4J: Class path contains multiple SLF4J bindings.
   SLF4J: Found binding in 
[jar:file:/usr/local/modules/hadoop-2.8.5/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
   SLF4J: Found binding in 
[jar:file:/usr/local/software/apache-tez-0.9.2-bin/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
   SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an 
explanation.
   SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
   Found 11 items
   -rw-r--r--   3 root supergroup         93 2021-01-26 00:00 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/.hoodie_partition_metadata
   -rw-r--r--   3 root supergroup     781714 2021-01-26 10:53 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3526941-8492659_20210126105334.parquet
   -rw-r--r--   3 root supergroup     781786 2021-01-26 10:53 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527031-8492879_20210126105340.parquet
   -rw-r--r--   3 root supergroup     781872 2021-01-26 10:53 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527113-8493091_20210126105354.parquet
   -rw-r--r--   3 root supergroup     781938 2021-01-26 10:54 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527261-8493515_20210126105409.parquet
   -rw-r--r--   3 root supergroup     782011 2021-01-26 10:54 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527346-8493730_20210126105414.parquet
   -rw-r--r--   3 root supergroup     782106 2021-01-26 10:54 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527439-8493953_20210126105420.parquet
   -rw-r--r--   3 root supergroup     782214 2021-01-26 10:54 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527532-8494176_20210126105434.parquet
   -rw-r--r--   3 root supergroup     782287 2021-01-26 10:54 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527617-8494391_20210126105444.parquet
   -rw-r--r--   3 root supergroup     782368 2021-01-26 10:54 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527710-8494614_20210126105454.parquet
   -rw-r--r--   3 root supergroup     782465 2021-01-26 10:55 
hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527795-8494829_20210126105500.parquet
   
   
   
   
   
   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [hudi] lshg opened a new issue #2490: spark read hudi data from hive

Reply via email to