您好:
我有两张表数据量都是1000多万条,需要针对两张表做join。
提交任务后,发现join十分缓慢,请问有什么调优的思路?
需要调整managed memory吗?
目前每个TaskManager申请的总内存是2g,每个taskManager上面有4个slot。taskmanager的metrics如下:
| {
"id":"container_e40_1555496777286_675191_01_000107",
"path":"akka.tcp://flink@hnode9:33156/user/taskmanager_0",
"dataPort":39423,
"timeSinceLastHeartbeat":1584697728127,
"slotsNumber":4,
"freeSlots":3,
"hardware":{
"cpuCores":32,
"physicalMemory":135355260928,
"freeMemory":749731840,
"managedMemory":732828804
},
"metrics":{
"heapUsed":261623760,
"heapCommitted":781189120,
"heapMax":781189120,
"nonHeapUsed":100441328,
"nonHeapCommitted":102957056,
"nonHeapMax":1426063360,
"directCount":5662,
"directUsed":191911352,
"directMax":191911350,
"mappedCount":0,
"mappedUsed":0,
"mappedMax":0,
"memorySegmentsAvailable":5582,
"memorySegmentsTotal":5591,
"garbageCollectors":[
{
"name":"PS_Scavenge",
"count":5734,
"time":19767
},
{
"name":"PS_MarkSweep",
"count":7,
"time":893
}
]
}
} |