[
https://issues.apache.org/jira/browse/YUNIKORN-2303?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Peter Bacsko resolved YUNIKORN-2303.
------------------------------------
Fix Version/s: 1.5.0
Resolution: Fixed
> Data race during queue sorting
> ------------------------------
>
> Key: YUNIKORN-2303
> URL: https://issues.apache.org/jira/browse/YUNIKORN-2303
> Project: Apache YuniKorn
> Issue Type: Bug
> Components: core - scheduler
> Reporter: Peter Bacsko
> Assignee: Peter Bacsko
> Priority: Major
> Labels: pull-request-available
> Fix For: 1.5.0
>
>
> The following data race has been uncovered while running a performance test:
> {noformat}
> ==================
> WARNING: DATA RACE
> Read at 0x00c02179b5a8 by goroutine 51:
> github.com/apache/yunikorn-core/pkg/common/resources.(*Resource).Clone()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/common/resources/resources.go:142
> +0x304
> github.com/apache/yunikorn-core/pkg/common/resources.Sub()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/common/resources/resources.go:349
> +0xe0
>
> github.com/apache/yunikorn-core/pkg/scheduler/objects.sortQueue.sortQueuesByPriorityAndFairness.func1()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/sorters.go:71
> +0x17a
> sort.insertionSort_func()
> /snap/go/current/src/sort/zsortfunc.go:12 +0xc6
> sort.stable_func()
> /snap/go/current/src/sort/zsortfunc.go:343 +0x7e
> sort.SliceStable()
> /snap/go/current/src/sort/slice.go:38 +0x1b9
>
> github.com/apache/yunikorn-core/pkg/scheduler/objects.sortQueuesByPriorityAndFairness()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/sorters.go:57
> +0x104
> github.com/apache/yunikorn-core/pkg/scheduler/objects.sortQueue()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/sorters.go:34
> +0x7f
> github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Queue).sortQueues()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/queue.go:1174
> +0x2e7
> github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Queue).TryAllocate()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/queue.go:1377
> +0xdd
>
> github.com/apache/yunikorn-core/pkg/scheduler.(*PartitionContext).tryAllocate()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/partition.go:825
> +0x1f6
> github.com/apache/yunikorn-core/pkg/scheduler.(*ClusterContext).schedule()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/context.go:140
> +0x1a8
>
> github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).internalSchedule()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:75
> +0xf6
>
> github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService.func2()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:66
> +0x33
> Previous write at 0x00c02179b5a8 by goroutine 48:
> github.com/apache/yunikorn-core/pkg/common/resources.(*Resource).Clone()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/common/resources/resources.go:143
> +0x353
> github.com/apache/yunikorn-core/pkg/common/resources.Add()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/common/resources/resources.go:329
> +0xe0
>
> github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Queue).incPendingResource()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/queue.go:688
> +0x11c
>
> github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Application).AddAllocationAsk()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/application.go:658
> +0x9ca
>
> github.com/apache/yunikorn-core/pkg/scheduler.(*PartitionContext).addAllocationAsk()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/partition.go:1439
> +0x91
>
> github.com/apache/yunikorn-core/pkg/scheduler.(*ClusterContext).processAsks()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/context.go:826
> +0x132
>
> github.com/apache/yunikorn-core/pkg/scheduler.(*ClusterContext).handleRMUpdateAllocationEvent()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/context.go:743
> +0x9d
> github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).handleRMEvent()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:111
> +0x1fa
>
> github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService.func1()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:55
> +0x33
> Goroutine 51 (running) created at:
> github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:66
> +0x447
>
> github.com/apache/yunikorn-core/pkg/entrypoint.startAllServicesWithParameters()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/entrypoint/entrypoint.go:92
> +0x35b
> github.com/apache/yunikorn-core/pkg/entrypoint.StartAllServices()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/entrypoint/entrypoint.go:43
> +0x59
> github.com/apache/yunikorn-k8shim/pkg/shim.(*MockScheduler).init()
>
> /home/bacskop/repos/incubator-yunikorn-k8shim/pkg/shim/scheduler_mock_test.go:64
> +0xad
> github.com/apache/yunikorn-k8shim/pkg/shim.BenchmarkSchedulingThroughPut()
>
> /home/bacskop/repos/incubator-yunikorn-k8shim/pkg/shim/scheduler_perf_test.go:82
> +0x1c4
> testing.(*B).runN()
> /snap/go/current/src/testing/benchmark.go:193 +0x232
> testing.(*B).run1.func1()
> /snap/go/current/src/testing/benchmark.go:233 +0x7b
> Goroutine 48 (running) created at:
> github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:55
> +0x153
>
> github.com/apache/yunikorn-core/pkg/entrypoint.startAllServicesWithParameters()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/entrypoint/entrypoint.go:92
> +0x35b
> github.com/apache/yunikorn-core/pkg/entrypoint.StartAllServices()
>
> /home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/entrypoint/entrypoint.go:43
> +0x59
> github.com/apache/yunikorn-k8shim/pkg/shim.(*MockScheduler).init()
>
> /home/bacskop/repos/incubator-yunikorn-k8shim/pkg/shim/scheduler_mock_test.go:64
> +0xad
> github.com/apache/yunikorn-k8shim/pkg/shim.BenchmarkSchedulingThroughPut()
>
> /home/bacskop/repos/incubator-yunikorn-k8shim/pkg/shim/scheduler_perf_test.go:82
> +0x1c4
> testing.(*B).runN()
> /snap/go/current/src/testing/benchmark.go:193 +0x232
> testing.(*B).run1.func1()
> /snap/go/current/src/testing/benchmark.go:233 +0x7b
> ==================
> {noformat}
> We must not access {{Queue.pending}} directly:
> {noformat}
> return resources.StrictlyGreaterThan(resources.Sub(l.pending, r.pending),
> resources.Zero)
> {noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]