Peter Bacsko created YUNIKORN-2303:
--------------------------------------
Summary: Data race during queue sorting
Key: YUNIKORN-2303
URL: https://issues.apache.org/jira/browse/YUNIKORN-2303
Project: Apache YuniKorn
Issue Type: Bug
Components: core - scheduler
Reporter: Peter Bacsko
The following data race has been uncovered while running a performance test:
{noformat}
==================
WARNING: DATA RACE
Read at 0x00c02179b5a8 by goroutine 51:
github.com/apache/yunikorn-core/pkg/common/resources.(*Resource).Clone()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/common/resources/resources.go:142
+0x304
github.com/apache/yunikorn-core/pkg/common/resources.Sub()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/common/resources/resources.go:349
+0xe0
github.com/apache/yunikorn-core/pkg/scheduler/objects.sortQueue.sortQueuesByPriorityAndFairness.func1()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/sorters.go:71
+0x17a
sort.insertionSort_func()
/snap/go/current/src/sort/zsortfunc.go:12 +0xc6
sort.stable_func()
/snap/go/current/src/sort/zsortfunc.go:343 +0x7e
sort.SliceStable()
/snap/go/current/src/sort/slice.go:38 +0x1b9
github.com/apache/yunikorn-core/pkg/scheduler/objects.sortQueuesByPriorityAndFairness()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/sorters.go:57
+0x104
github.com/apache/yunikorn-core/pkg/scheduler/objects.sortQueue()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/sorters.go:34
+0x7f
github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Queue).sortQueues()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/queue.go:1174
+0x2e7
github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Queue).TryAllocate()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/queue.go:1377
+0xdd
github.com/apache/yunikorn-core/pkg/scheduler.(*PartitionContext).tryAllocate()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/partition.go:825
+0x1f6
github.com/apache/yunikorn-core/pkg/scheduler.(*ClusterContext).schedule()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/context.go:140
+0x1a8
github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).internalSchedule()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:75
+0xf6
github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService.func2()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:66
+0x33
Previous write at 0x00c02179b5a8 by goroutine 48:
github.com/apache/yunikorn-core/pkg/common/resources.(*Resource).Clone()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/common/resources/resources.go:143
+0x353
github.com/apache/yunikorn-core/pkg/common/resources.Add()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/common/resources/resources.go:329
+0xe0
github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Queue).incPendingResource()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/queue.go:688
+0x11c
github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Application).AddAllocationAsk()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/application.go:658
+0x9ca
github.com/apache/yunikorn-core/pkg/scheduler.(*PartitionContext).addAllocationAsk()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/partition.go:1439
+0x91
github.com/apache/yunikorn-core/pkg/scheduler.(*ClusterContext).processAsks()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/context.go:826
+0x132
github.com/apache/yunikorn-core/pkg/scheduler.(*ClusterContext).handleRMUpdateAllocationEvent()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/context.go:743
+0x9d
github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).handleRMEvent()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:111
+0x1fa
github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService.func1()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:55
+0x33
Goroutine 51 (running) created at:
github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:66
+0x447
github.com/apache/yunikorn-core/pkg/entrypoint.startAllServicesWithParameters()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/entrypoint/entrypoint.go:92
+0x35b
github.com/apache/yunikorn-core/pkg/entrypoint.StartAllServices()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/entrypoint/entrypoint.go:43
+0x59
github.com/apache/yunikorn-k8shim/pkg/shim.(*MockScheduler).init()
/home/bacskop/repos/incubator-yunikorn-k8shim/pkg/shim/scheduler_mock_test.go:64
+0xad
github.com/apache/yunikorn-k8shim/pkg/shim.BenchmarkSchedulingThroughPut()
/home/bacskop/repos/incubator-yunikorn-k8shim/pkg/shim/scheduler_perf_test.go:82
+0x1c4
testing.(*B).runN()
/snap/go/current/src/testing/benchmark.go:193 +0x232
testing.(*B).run1.func1()
/snap/go/current/src/testing/benchmark.go:233 +0x7b
Goroutine 48 (running) created at:
github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:55
+0x153
github.com/apache/yunikorn-core/pkg/entrypoint.startAllServicesWithParameters()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/entrypoint/entrypoint.go:92
+0x35b
github.com/apache/yunikorn-core/pkg/entrypoint.StartAllServices()
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/entrypoint/entrypoint.go:43
+0x59
github.com/apache/yunikorn-k8shim/pkg/shim.(*MockScheduler).init()
/home/bacskop/repos/incubator-yunikorn-k8shim/pkg/shim/scheduler_mock_test.go:64
+0xad
github.com/apache/yunikorn-k8shim/pkg/shim.BenchmarkSchedulingThroughPut()
/home/bacskop/repos/incubator-yunikorn-k8shim/pkg/shim/scheduler_perf_test.go:82
+0x1c4
testing.(*B).runN()
/snap/go/current/src/testing/benchmark.go:193 +0x232
testing.(*B).run1.func1()
/snap/go/current/src/testing/benchmark.go:233 +0x7b
==================
{noformat}
We must not access {{Queue.pending}} directly:
{noformat}
if comp == 0 {
return
resources.StrictlyGreaterThan(resources.Sub(l.pending, r.pending),
resources.Zero)
}
...
if lPriority < rPriority {
return false
}
return
resources.StrictlyGreaterThan(resources.Sub(l.pending, r.pending),
resources.Zero)
{noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]