Re: [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation

2024-04-23 Thread Alex Deucher
On Tue, Apr 23, 2024 at 9:58 AM Christian König
 wrote:
>
> Am 23.04.24 um 15:18 schrieb Alex Deucher:
> > On Tue, Apr 23, 2024 at 2:57 AM Christian König
> >  wrote:
> >> Am 22.04.24 um 16:37 schrieb Alex Deucher:
> >>> As we use wb slots more dynamically, we need to lock
> >>> access to avoid racing on allocation or free.
> >> Wait a second. Why are we using the wb slots dynamically?
> >>
> > See patch 2.  I needed a way to allocate small GPU accessible memory
> > locations on the fly.  Using WB seems like a good solution.
>
> That's probably better done with the seq64 allocator. At least the
> original idea was that it is self containing and can be used by many
> threads at the same time.

Sure, but that is mapped into the user VMs and has sequence numbers
handling which we don't really need for handling the MES.  It also
makes seq64 a requirement for this fix which makes it harder to port
back to older kernels.

Alex

>
> Apart from that I really think we need to talk with the MES guys about
> changing that behavior ASAP. This is really a bug we need to fix and not
> work around like that.
>
> Christian.
>
> >
> > Alex
> >
> >> The number of slots made available is statically calculated, when this
> >> is suddenly used dynamically we have quite a bug here.
> >>
> >> Regards,
> >> Christian.
> >>
> >>> Signed-off-by: Alex Deucher 
> >>> ---
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++-
> >>>2 files changed, 11 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> index cac0ca64367b..f87d53e183c3 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> @@ -502,6 +502,7 @@ struct amdgpu_wb {
> >>>uint64_tgpu_addr;
> >>>u32 num_wb; /* Number of wb slots actually 
> >>> reserved for amdgpu. */
> >>>unsigned long   used[DIV_ROUND_UP(AMDGPU_MAX_WB, 
> >>> BITS_PER_LONG)];
> >>> + spinlock_t  lock;
> >>>};
> >>>
> >>>int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> index f8a34db5d9e3..869256394136 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> @@ -1482,13 +1482,17 @@ static int amdgpu_device_wb_init(struct 
> >>> amdgpu_device *adev)
> >>> */
> >>>int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
> >>>{
> >>> - unsigned long offset = find_first_zero_bit(adev->wb.used, 
> >>> adev->wb.num_wb);
> >>> + unsigned long flags, offset;
> >>>
> >>> + spin_lock_irqsave(>wb.lock, flags);
> >>> + offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
> >>>if (offset < adev->wb.num_wb) {
> >>>__set_bit(offset, adev->wb.used);
> >>> + spin_unlock_irqrestore(>wb.lock, flags);
> >>>*wb = offset << 3; /* convert to dw offset */
> >>>return 0;
> >>>} else {
> >>> + spin_unlock_irqrestore(>wb.lock, flags);
> >>>return -EINVAL;
> >>>}
> >>>}
> >>> @@ -1503,9 +1507,13 @@ int amdgpu_device_wb_get(struct amdgpu_device 
> >>> *adev, u32 *wb)
> >>> */
> >>>void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
> >>>{
> >>> + unsigned long flags;
> >>> +
> >>>wb >>= 3;
> >>> + spin_lock_irqsave(>wb.lock, flags);
> >>>if (wb < adev->wb.num_wb)
> >>>__clear_bit(wb, adev->wb.used);
> >>> + spin_unlock_irqrestore(>wb.lock, flags);
> >>>}
> >>>
> >>>/**
> >>> @@ -4061,6 +4069,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> >>>spin_lock_init(>se_cac_idx_lock);
> >>>spin_lock_init(>audio_endpt_idx_lock);
> >>>spin_lock_init(>mm_stats.lock);
> >>> + spin_lock_init(>wb.lock);
> >>>
> >>>INIT_LIST_HEAD(>shadow_list);
> >>>mutex_init(>shadow_list_lock);
>


Re: [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation

2024-04-23 Thread Christian König

Am 23.04.24 um 15:18 schrieb Alex Deucher:

On Tue, Apr 23, 2024 at 2:57 AM Christian König
 wrote:

Am 22.04.24 um 16:37 schrieb Alex Deucher:

As we use wb slots more dynamically, we need to lock
access to avoid racing on allocation or free.

Wait a second. Why are we using the wb slots dynamically?


See patch 2.  I needed a way to allocate small GPU accessible memory
locations on the fly.  Using WB seems like a good solution.


That's probably better done with the seq64 allocator. At least the 
original idea was that it is self containing and can be used by many 
threads at the same time.


Apart from that I really think we need to talk with the MES guys about 
changing that behavior ASAP. This is really a bug we need to fix and not 
work around like that.


Christian.



Alex


The number of slots made available is statically calculated, when this
is suddenly used dynamically we have quite a bug here.

Regards,
Christian.


Signed-off-by: Alex Deucher 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++-
   2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cac0ca64367b..f87d53e183c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -502,6 +502,7 @@ struct amdgpu_wb {
   uint64_tgpu_addr;
   u32 num_wb; /* Number of wb slots actually reserved 
for amdgpu. */
   unsigned long   used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
+ spinlock_t  lock;
   };

   int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f8a34db5d9e3..869256394136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1482,13 +1482,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device 
*adev)
*/
   int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
   {
- unsigned long offset = find_first_zero_bit(adev->wb.used, 
adev->wb.num_wb);
+ unsigned long flags, offset;

+ spin_lock_irqsave(>wb.lock, flags);
+ offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
   if (offset < adev->wb.num_wb) {
   __set_bit(offset, adev->wb.used);
+ spin_unlock_irqrestore(>wb.lock, flags);
   *wb = offset << 3; /* convert to dw offset */
   return 0;
   } else {
+ spin_unlock_irqrestore(>wb.lock, flags);
   return -EINVAL;
   }
   }
@@ -1503,9 +1507,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 
*wb)
*/
   void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
   {
+ unsigned long flags;
+
   wb >>= 3;
+ spin_lock_irqsave(>wb.lock, flags);
   if (wb < adev->wb.num_wb)
   __clear_bit(wb, adev->wb.used);
+ spin_unlock_irqrestore(>wb.lock, flags);
   }

   /**
@@ -4061,6 +4069,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
   spin_lock_init(>se_cac_idx_lock);
   spin_lock_init(>audio_endpt_idx_lock);
   spin_lock_init(>mm_stats.lock);
+ spin_lock_init(>wb.lock);

   INIT_LIST_HEAD(>shadow_list);
   mutex_init(>shadow_list_lock);




Re: [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation

2024-04-23 Thread Alex Deucher
On Tue, Apr 23, 2024 at 2:57 AM Christian König
 wrote:
>
> Am 22.04.24 um 16:37 schrieb Alex Deucher:
> > As we use wb slots more dynamically, we need to lock
> > access to avoid racing on allocation or free.
>
> Wait a second. Why are we using the wb slots dynamically?
>

See patch 2.  I needed a way to allocate small GPU accessible memory
locations on the fly.  Using WB seems like a good solution.

Alex

> The number of slots made available is statically calculated, when this
> is suddenly used dynamically we have quite a bug here.
>
> Regards,
> Christian.
>
> >
> > Signed-off-by: Alex Deucher 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++-
> >   2 files changed, 11 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index cac0ca64367b..f87d53e183c3 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -502,6 +502,7 @@ struct amdgpu_wb {
> >   uint64_tgpu_addr;
> >   u32 num_wb; /* Number of wb slots actually 
> > reserved for amdgpu. */
> >   unsigned long   used[DIV_ROUND_UP(AMDGPU_MAX_WB, 
> > BITS_PER_LONG)];
> > + spinlock_t  lock;
> >   };
> >
> >   int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index f8a34db5d9e3..869256394136 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -1482,13 +1482,17 @@ static int amdgpu_device_wb_init(struct 
> > amdgpu_device *adev)
> >*/
> >   int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
> >   {
> > - unsigned long offset = find_first_zero_bit(adev->wb.used, 
> > adev->wb.num_wb);
> > + unsigned long flags, offset;
> >
> > + spin_lock_irqsave(>wb.lock, flags);
> > + offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
> >   if (offset < adev->wb.num_wb) {
> >   __set_bit(offset, adev->wb.used);
> > + spin_unlock_irqrestore(>wb.lock, flags);
> >   *wb = offset << 3; /* convert to dw offset */
> >   return 0;
> >   } else {
> > + spin_unlock_irqrestore(>wb.lock, flags);
> >   return -EINVAL;
> >   }
> >   }
> > @@ -1503,9 +1507,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, 
> > u32 *wb)
> >*/
> >   void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
> >   {
> > + unsigned long flags;
> > +
> >   wb >>= 3;
> > + spin_lock_irqsave(>wb.lock, flags);
> >   if (wb < adev->wb.num_wb)
> >   __clear_bit(wb, adev->wb.used);
> > + spin_unlock_irqrestore(>wb.lock, flags);
> >   }
> >
> >   /**
> > @@ -4061,6 +4069,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> >   spin_lock_init(>se_cac_idx_lock);
> >   spin_lock_init(>audio_endpt_idx_lock);
> >   spin_lock_init(>mm_stats.lock);
> > + spin_lock_init(>wb.lock);
> >
> >   INIT_LIST_HEAD(>shadow_list);
> >   mutex_init(>shadow_list_lock);
>


Re: [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation

2024-04-23 Thread Christian König

Am 22.04.24 um 16:37 schrieb Alex Deucher:

As we use wb slots more dynamically, we need to lock
access to avoid racing on allocation or free.


Wait a second. Why are we using the wb slots dynamically?

The number of slots made available is statically calculated, when this 
is suddenly used dynamically we have quite a bug here.


Regards,
Christian.



Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++-
  2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cac0ca64367b..f87d53e183c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -502,6 +502,7 @@ struct amdgpu_wb {
uint64_tgpu_addr;
u32 num_wb; /* Number of wb slots actually reserved 
for amdgpu. */
unsigned long   used[DIV_ROUND_UP(AMDGPU_MAX_WB, 
BITS_PER_LONG)];
+   spinlock_t  lock;
  };
  
  int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f8a34db5d9e3..869256394136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1482,13 +1482,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device 
*adev)
   */
  int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
  {
-   unsigned long offset = find_first_zero_bit(adev->wb.used, 
adev->wb.num_wb);
+   unsigned long flags, offset;
  
+	spin_lock_irqsave(>wb.lock, flags);

+   offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
if (offset < adev->wb.num_wb) {
__set_bit(offset, adev->wb.used);
+   spin_unlock_irqrestore(>wb.lock, flags);
*wb = offset << 3; /* convert to dw offset */
return 0;
} else {
+   spin_unlock_irqrestore(>wb.lock, flags);
return -EINVAL;
}
  }
@@ -1503,9 +1507,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 
*wb)
   */
  void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
  {
+   unsigned long flags;
+
wb >>= 3;
+   spin_lock_irqsave(>wb.lock, flags);
if (wb < adev->wb.num_wb)
__clear_bit(wb, adev->wb.used);
+   spin_unlock_irqrestore(>wb.lock, flags);
  }
  
  /**

@@ -4061,6 +4069,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(>se_cac_idx_lock);
spin_lock_init(>audio_endpt_idx_lock);
spin_lock_init(>mm_stats.lock);
+   spin_lock_init(>wb.lock);
  
  	INIT_LIST_HEAD(>shadow_list);

mutex_init(>shadow_list_lock);




RE: [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation

2024-04-22 Thread Liu, Shaoyun
[AMD Official Use Only - General]

These two patches Looks good to me .

Reviewed by Shaoyun.liu 

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Monday, April 22, 2024 10:38 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander 
Subject: [PATCH 1/2] drm/amdgpu: add a spinlock to wb allocation

As we use wb slots more dynamically, we need to lock access to avoid racing on 
allocation or free.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cac0ca64367b..f87d53e183c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -502,6 +502,7 @@ struct amdgpu_wb {
uint64_tgpu_addr;
u32 num_wb; /* Number of wb slots actually reserved 
for amdgpu. */
unsigned long   used[DIV_ROUND_UP(AMDGPU_MAX_WB, 
BITS_PER_LONG)];
+   spinlock_t  lock;
 };

 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb); diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f8a34db5d9e3..869256394136 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1482,13 +1482,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device 
*adev)
  */
 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)  {
-   unsigned long offset = find_first_zero_bit(adev->wb.used, 
adev->wb.num_wb);
+   unsigned long flags, offset;

+   spin_lock_irqsave(>wb.lock, flags);
+   offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
if (offset < adev->wb.num_wb) {
__set_bit(offset, adev->wb.used);
+   spin_unlock_irqrestore(>wb.lock, flags);
*wb = offset << 3; /* convert to dw offset */
return 0;
} else {
+   spin_unlock_irqrestore(>wb.lock, flags);
return -EINVAL;
}
 }
@@ -1503,9 +1507,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 
*wb)
  */
 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)  {
+   unsigned long flags;
+
wb >>= 3;
+   spin_lock_irqsave(>wb.lock, flags);
if (wb < adev->wb.num_wb)
__clear_bit(wb, adev->wb.used);
+   spin_unlock_irqrestore(>wb.lock, flags);
 }

 /**
@@ -4061,6 +4069,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(>se_cac_idx_lock);
spin_lock_init(>audio_endpt_idx_lock);
spin_lock_init(>mm_stats.lock);
+   spin_lock_init(>wb.lock);

INIT_LIST_HEAD(>shadow_list);
mutex_init(>shadow_list_lock);
--
2.44.0