Peter Bacsko created YUNIKORN-2303:
--------------------------------------

             Summary: Data race during queue sorting
                 Key: YUNIKORN-2303
                 URL: https://issues.apache.org/jira/browse/YUNIKORN-2303
             Project: Apache YuniKorn
          Issue Type: Bug
          Components: core - scheduler
            Reporter: Peter Bacsko


The following data race has been uncovered while running a performance test:

{noformat}
==================
WARNING: DATA RACE
Read at 0x00c02179b5a8 by goroutine 51:
  github.com/apache/yunikorn-core/pkg/common/resources.(*Resource).Clone()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/common/resources/resources.go:142
 +0x304
  github.com/apache/yunikorn-core/pkg/common/resources.Sub()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/common/resources/resources.go:349
 +0xe0
  
github.com/apache/yunikorn-core/pkg/scheduler/objects.sortQueue.sortQueuesByPriorityAndFairness.func1()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/sorters.go:71
 +0x17a
  sort.insertionSort_func()
      /snap/go/current/src/sort/zsortfunc.go:12 +0xc6
  sort.stable_func()
      /snap/go/current/src/sort/zsortfunc.go:343 +0x7e
  sort.SliceStable()
      /snap/go/current/src/sort/slice.go:38 +0x1b9
  
github.com/apache/yunikorn-core/pkg/scheduler/objects.sortQueuesByPriorityAndFairness()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/sorters.go:57
 +0x104
  github.com/apache/yunikorn-core/pkg/scheduler/objects.sortQueue()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/sorters.go:34
 +0x7f
  github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Queue).sortQueues()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/queue.go:1174
 +0x2e7
  github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Queue).TryAllocate()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/queue.go:1377
 +0xdd
  
github.com/apache/yunikorn-core/pkg/scheduler.(*PartitionContext).tryAllocate()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/partition.go:825
 +0x1f6
  github.com/apache/yunikorn-core/pkg/scheduler.(*ClusterContext).schedule()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/context.go:140
 +0x1a8
  github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).internalSchedule()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:75
 +0xf6
  
github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService.func2()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:66
 +0x33

Previous write at 0x00c02179b5a8 by goroutine 48:
  github.com/apache/yunikorn-core/pkg/common/resources.(*Resource).Clone()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/common/resources/resources.go:143
 +0x353
  github.com/apache/yunikorn-core/pkg/common/resources.Add()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/common/resources/resources.go:329
 +0xe0
  
github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Queue).incPendingResource()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/queue.go:688
 +0x11c
  
github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Application).AddAllocationAsk()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/objects/application.go:658
 +0x9ca
  
github.com/apache/yunikorn-core/pkg/scheduler.(*PartitionContext).addAllocationAsk()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/partition.go:1439
 +0x91
  github.com/apache/yunikorn-core/pkg/scheduler.(*ClusterContext).processAsks()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/context.go:826
 +0x132
  
github.com/apache/yunikorn-core/pkg/scheduler.(*ClusterContext).handleRMUpdateAllocationEvent()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/context.go:743
 +0x9d
  github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).handleRMEvent()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:111
 +0x1fa
  
github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService.func1()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:55
 +0x33

Goroutine 51 (running) created at:
  github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:66
 +0x447
  
github.com/apache/yunikorn-core/pkg/entrypoint.startAllServicesWithParameters()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/entrypoint/entrypoint.go:92
 +0x35b
  github.com/apache/yunikorn-core/pkg/entrypoint.StartAllServices()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/entrypoint/entrypoint.go:43
 +0x59
  github.com/apache/yunikorn-k8shim/pkg/shim.(*MockScheduler).init()
      
/home/bacskop/repos/incubator-yunikorn-k8shim/pkg/shim/scheduler_mock_test.go:64
 +0xad
  github.com/apache/yunikorn-k8shim/pkg/shim.BenchmarkSchedulingThroughPut()
      
/home/bacskop/repos/incubator-yunikorn-k8shim/pkg/shim/scheduler_perf_test.go:82
 +0x1c4
  testing.(*B).runN()
      /snap/go/current/src/testing/benchmark.go:193 +0x232
  testing.(*B).run1.func1()
      /snap/go/current/src/testing/benchmark.go:233 +0x7b

Goroutine 48 (running) created at:
  github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/scheduler/scheduler.go:55
 +0x153
  
github.com/apache/yunikorn-core/pkg/entrypoint.startAllServicesWithParameters()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/entrypoint/entrypoint.go:92
 +0x35b
  github.com/apache/yunikorn-core/pkg/entrypoint.StartAllServices()
      
/home/bacskop/go/pkg/mod/github.com/apache/[email protected]/pkg/entrypoint/entrypoint.go:43
 +0x59
  github.com/apache/yunikorn-k8shim/pkg/shim.(*MockScheduler).init()
      
/home/bacskop/repos/incubator-yunikorn-k8shim/pkg/shim/scheduler_mock_test.go:64
 +0xad
  github.com/apache/yunikorn-k8shim/pkg/shim.BenchmarkSchedulingThroughPut()
      
/home/bacskop/repos/incubator-yunikorn-k8shim/pkg/shim/scheduler_perf_test.go:82
 +0x1c4
  testing.(*B).runN()
      /snap/go/current/src/testing/benchmark.go:193 +0x232
  testing.(*B).run1.func1()
      /snap/go/current/src/testing/benchmark.go:233 +0x7b
==================
{noformat}

We must not access {{Queue.pending}} directly:

{noformat}
                if comp == 0 {
                        return 
resources.StrictlyGreaterThan(resources.Sub(l.pending, r.pending), 
resources.Zero)
                }

...

if lPriority < rPriority {
                                return false
                        }
                        return 
resources.StrictlyGreaterThan(resources.Sub(l.pending, r.pending), 
resources.Zero)
{noformat}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to