[PATCH RFC 3/3] x86/sgx: Implement EAUG population with MAP_POPULATE

2022-03-05 Thread Jarkko Sakkinen
With SGX1 an enclave needs to be created with its maximum memory demands
pre-allocated. Pages cannot be added to an enclave after it is initialized.
SGX2 introduces a new function, ENCLS[EAUG] for adding pages to an
initialized enclave.

Add support for dynamically adding pages to an initialized enclave with
mmap() by populating pages with EAUG. Use f_ops->populate() callback to
achieve this behaviour.

Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/kernel/cpu/sgx/driver.c | 129 +++
 1 file changed, 129 insertions(+)

diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index aa9b8b868867..0e97e7476076 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -9,6 +9,7 @@
 #include 
 #include "driver.h"
 #include "encl.h"
+#include "encls.h"
 
 u64 sgx_attributes_reserved_mask;
 u64 sgx_xfrm_reserved_mask = ~0x3;
@@ -101,6 +102,133 @@ static int sgx_mmap(struct file *file, struct 
vm_area_struct *vma)
return 0;
 }
 
+static int sgx_encl_augment_page(struct sgx_encl *encl, unsigned long offset)
+{
+   struct sgx_pageinfo pginfo = {0};
+   struct sgx_encl_page *encl_page;
+   struct sgx_epc_page *epc_page;
+   struct sgx_va_page *va_page;
+   u64 secinfo_flags;
+   int ret;
+
+   /*
+* Ignore internal permission checking for dynamically added pages.
+* They matter only for data added during the pre-initialization phase.
+* The enclave decides the permissions by the means of EACCEPT,
+* EACCEPTCOPY and EMODPE.
+*/
+   secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
+   encl_page = sgx_encl_page_alloc(encl, offset, secinfo_flags);
+   if (IS_ERR(encl_page))
+   return PTR_ERR(encl_page);
+
+   epc_page = sgx_alloc_epc_page(encl_page, true);
+   if (IS_ERR(epc_page)) {
+   ret = PTR_ERR(epc_page);
+   goto err_alloc_epc_page;
+   }
+
+   va_page = sgx_encl_grow(encl);
+   if (IS_ERR(va_page)) {
+   ret = PTR_ERR(va_page);
+   goto err_grow;
+   }
+
+   mutex_lock(>lock);
+
+   /*
+* Adding to encl->va_pages must be done under encl->lock.  Ditto for
+* deleting (via sgx_encl_shrink()) in the error path.
+*/
+   if (va_page)
+   list_add(_page->list, >va_pages);
+
+   /*
+* Insert prior to EADD in case of OOM.  EADD modifies MRENCLAVE, i.e.
+* can't be gracefully unwound, while failure on EADD/EXTEND is limited
+* to userspace errors (or kernel/hardware bugs).
+*/
+   ret = xa_insert(>page_array, PFN_DOWN(encl_page->desc),
+   encl_page, GFP_KERNEL);
+
+   /*
+* If ret == -EBUSY then page was created in another flow while
+* running without encl->lock
+*/
+   if (ret)
+   goto err_xa_insert;
+
+   pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
+   pginfo.addr = encl_page->desc & PAGE_MASK;
+   pginfo.metadata = 0;
+
+   ret = __eaug(, sgx_get_epc_virt_addr(epc_page));
+   if (ret)
+   goto err_eaug;
+
+   encl_page->encl = encl;
+   encl_page->epc_page = epc_page;
+   encl_page->type = SGX_PAGE_TYPE_REG;
+   encl->secs_child_cnt++;
+
+   sgx_mark_page_reclaimable(encl_page->epc_page);
+
+   mutex_unlock(>lock);
+
+   return 0;
+
+err_eaug:
+   xa_erase(>page_array, PFN_DOWN(encl_page->desc));
+
+err_xa_insert:
+   sgx_encl_shrink(encl, va_page);
+   mutex_unlock(>lock);
+
+err_grow:
+   sgx_encl_free_epc_page(epc_page);
+
+err_alloc_epc_page:
+   kfree(encl_page);
+
+   return VM_FAULT_SIGBUS;
+}
+
+/*
+ * Add new pages to the enclave sequentially with ENCLS[EAUG]. Note that
+ * sgx_mmap() validates that the given VMA is within the enclave range. Calling
+ * here sgx_encl_may_map() second time would too time consuming.
+ */
+static int sgx_populate(struct file *file, struct vm_area_struct *vma)
+{
+   unsigned long length = vma->vm_end - vma->vm_start;
+   struct sgx_encl *encl = file->private_data;
+   unsigned long start = encl->base - vma->vm_start;
+   unsigned long pos;
+   int ret;
+
+   /* EAUG works only for initialized enclaves. */
+   if (!test_bit(SGX_ENCL_INITIALIZED, >flags))
+   return -EINVAL;
+
+   for (pos = 0 ; pos < length; pos += PAGE_SIZE) {
+   if (signal_pending(current)) {
+   if (!pos)
+   ret = -ERESTARTSYS;
+
+   break;
+   }
+
+   if (need_resched())
+   cond_resched();
+
+   ret = sgx_encl_augment_page(encl, start + pos);
+   if (ret)
+   break;
+   }
+
+   return ret;
+}
+
 static unsigned long sgx_get_unmapped_area(struct file *file,
 

[PATCH RFC 2/3] x86/sgx: Export sgx_encl_page_alloc()

2022-03-05 Thread Jarkko Sakkinen
Move sgx_encl_page_alloc() to encl.c and export it so that it can be
used in the implementation for MAP_POPULATE, which requires to allocate
new enclave pages.

Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/kernel/cpu/sgx/encl.c  | 38 +
 arch/x86/kernel/cpu/sgx/encl.h  |  3 +++
 arch/x86/kernel/cpu/sgx/ioctl.c | 38 -
 3 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 89aeed798ffb..79e39bd99c09 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -914,6 +914,44 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
return ret;
 }
 
+struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
+ unsigned long offset,
+ u64 secinfo_flags)
+{
+   struct sgx_encl_page *encl_page;
+   unsigned long prot;
+
+   encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
+   if (!encl_page)
+   return ERR_PTR(-ENOMEM);
+
+   encl_page->desc = encl->base + offset;
+   encl_page->encl = encl;
+
+   prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ)  |
+  _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
+  _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
+
+   /*
+* TCS pages must always RW set for CPU access while the SECINFO
+* permissions are *always* zero - the CPU ignores the user provided
+* values and silently overwrites them with zero permissions.
+*/
+   if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
+   prot |= PROT_READ | PROT_WRITE;
+
+   /* Calculate maximum of the VM flags for the page. */
+   encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
+
+   /*
+* At time of allocation, the runtime protection bits are the same
+* as the maximum protection bits.
+*/
+   encl_page->vm_run_prot_bits = encl_page->vm_max_prot_bits;
+
+   return encl_page;
+}
+
 /**
  * sgx_zap_enclave_ptes() - remove PTEs mapping the address from enclave
  * @encl: the enclave
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index 1b6ce1da7c92..3df0d3faf3a1 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -113,6 +113,9 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned 
long page_index,
 void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write);
 int sgx_encl_test_and_clear_young(struct mm_struct *mm,
  struct sgx_encl_page *page);
+struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
+ unsigned long offset,
+ u64 secinfo_flags);
 void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr);
 struct sgx_epc_page *sgx_alloc_va_page(void);
 unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page);
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index d8c3c07badb3..3e3ca27a6f72 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -169,44 +169,6 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, 
void __user *arg)
return ret;
 }
 
-static struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
-unsigned long offset,
-u64 secinfo_flags)
-{
-   struct sgx_encl_page *encl_page;
-   unsigned long prot;
-
-   encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
-   if (!encl_page)
-   return ERR_PTR(-ENOMEM);
-
-   encl_page->desc = encl->base + offset;
-   encl_page->encl = encl;
-
-   prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ)  |
-  _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
-  _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
-
-   /*
-* TCS pages must always RW set for CPU access while the SECINFO
-* permissions are *always* zero - the CPU ignores the user provided
-* values and silently overwrites them with zero permissions.
-*/
-   if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
-   prot |= PROT_READ | PROT_WRITE;
-
-   /* Calculate maximum of the VM flags for the page. */
-   encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
-
-   /*
-* At time of allocation, the runtime protection bits are the same
-* as the maximum protection bits.
-*/
-   encl_page->vm_run_prot_bits = encl_page->vm_max_prot_bits;
-
-   return encl_page;
-}
-
 static int sgx_validate_secinfo(struct sgx_secinfo *secinfo)
 {
u64 perm = secinfo->flags & 

[PATCH RFC 1/3] mm: Add f_ops->populate()

2022-03-05 Thread Jarkko Sakkinen
Sometimes you might want to use MAP_POPULATE to ask a device driver to
initialize the device memory in some specific manner. SGX driver can use
this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
page in the address range.

Add f_ops->populate() with the same parameters as f_ops->mmap() and make
it conditionally called inside call_mmap(). Update call sites
accodingly.
---
Signed-off-by: Jarkko Sakkinen 
v3:
-   if (!ret && do_populate && file->f_op->populate)
+   if (!ret && do_populate && file->f_op->populate &&
+   !!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
(reported by Matthew Wilcox)
v2:
-   if (!ret && do_populate)
+   if (!ret && do_populate && file->f_op->populate)
(reported by Jan Harkes)
---
 arch/mips/kernel/vdso.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |  2 +-
 fs/coda/file.c |  2 +-
 fs/overlayfs/file.c|  2 +-
 include/linux/fs.h | 12 ++--
 include/linux/mm.h |  2 +-
 ipc/shm.c  |  2 +-
 mm/mmap.c  | 10 +-
 mm/nommu.c |  4 ++--
 9 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 3d0cf471f2fe..89f3f3da9abd 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -102,7 +102,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, 
int uses_interp)
base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
VM_READ | VM_EXEC |
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-   0, NULL);
+   0, NULL, false);
if (IS_ERR_VALUE(base)) {
ret = base;
goto out;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 1b526039a60d..4c71f64d6a79 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -107,7 +107,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, 
struct vm_area_struct *
if (!obj->base.filp)
return -ENODEV;
 
-   ret = call_mmap(obj->base.filp, vma);
+   ret = call_mmap(obj->base.filp, vma, false);
if (ret)
return ret;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29dd87be2fb8..e14f312fdbf8 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -173,7 +173,7 @@ coda_file_mmap(struct file *coda_file, struct 
vm_area_struct *vma)
spin_unlock(>c_lock);
 
vma->vm_file = get_file(host_file);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
 
if (ret) {
/* if call_mmap fails, our caller will put host_file so we
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fa125feed0ff..b963a9397e80 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -503,7 +503,7 @@ static int ovl_mmap(struct file *file, struct 
vm_area_struct *vma)
vma_set_file(vma, realfile);
 
old_cred = ovl_override_creds(file_inode(file)->i_sb);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
revert_creds(old_cred);
ovl_file_accessed(file);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2d892b201b0..2909e2d14af8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -42,6 +42,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1993,6 +1994,7 @@ struct file_operations {
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
+   int (*populate)(struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
@@ -2074,9 +2076,15 @@ static inline ssize_t call_write_iter(struct file *file, 
struct kiocb *kio,
return file->f_op->write_iter(kio, iter);
 }
 
-static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma, 
bool do_populate)
 {
-   return file->f_op->mmap(file, vma);
+   int ret = file->f_op->mmap(file, vma);
+
+   if (!ret && do_populate && file->f_op->populate &&
+   !!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+   ret = file->f_op->populate(file, vma);
+
+   return ret;
 }
 
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 213cc569b192..6c8c036f423b 100644
--- a/include/linux/mm.h
+++ 

[PATCH RFC 0/3] MAP_POPULATE for device memory

2022-03-05 Thread Jarkko Sakkinen
For device memory (aka VM_IO | VM_PFNMAP) MAP_POPULATE does nothing. Allow
to use that for initializing the device memory by providing a new callback
f_ops->populate() for the purpose.

SGX patches are provided to show the callback in context.

An obvious alternative is a ioctl but it is less elegant and requires
two syscalls (mmap + ioctl) per memory range, instead of just one
(mmap).

Jarkko Sakkinen (3):
  mm: Add f_ops->populate()
  x86/sgx: Export sgx_encl_page_alloc()
  x86/sgx: Implement EAUG population with MAP_POPULATE

 arch/mips/kernel/vdso.c|   2 +-
 arch/x86/kernel/cpu/sgx/driver.c   | 129 +
 arch/x86/kernel/cpu/sgx/encl.c |  38 ++
 arch/x86/kernel/cpu/sgx/encl.h |   3 +
 arch/x86/kernel/cpu/sgx/ioctl.c|  38 --
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |   2 +-
 fs/coda/file.c |   2 +-
 fs/overlayfs/file.c|   2 +-
 include/linux/fs.h |  12 +-
 include/linux/mm.h |   2 +-
 ipc/shm.c  |   2 +-
 mm/mmap.c  |  10 +-
 mm/nommu.c |   4 +-
 13 files changed, 193 insertions(+), 53 deletions(-)

-- 
2.35.1



Re: [PATCH RFC] mm: Add f_ops->populate()

2022-03-05 Thread Matthew Wilcox
On Sun, Mar 06, 2022 at 06:25:52AM +0200, Jarkko Sakkinen wrote:
> > Are you deliberately avoiding the question?  I'm not asking about
> > implementation.  I'm asking about the semantics of MAP_POPULATE with
> > your driver.
> 
> No. I just noticed a bug in the guard from your comment that I wanted
> point out.
> 
> With the next version I post the corresponding change to the driver,
> in order to see this in context.

Oh, good grief.  Don't bother.  NAK.


Re: [PATCH RFC] mm: Add f_ops->populate()

2022-03-05 Thread Jarkko Sakkinen
On Sun, Mar 06, 2022 at 04:19:26AM +, Matthew Wilcox wrote:
> On Sun, Mar 06, 2022 at 06:11:21AM +0200, Jarkko Sakkinen wrote:
> > On Sun, Mar 06, 2022 at 03:52:12AM +, Matthew Wilcox wrote:
> > > On Sun, Mar 06, 2022 at 05:21:11AM +0200, Jarkko Sakkinen wrote:
> > > > On Sun, Mar 06, 2022 at 02:57:55AM +, Matthew Wilcox wrote:
> > > > > On Sun, Mar 06, 2022 at 04:15:33AM +0200, Jarkko Sakkinen wrote:
> > > > > > Sometimes you might want to use MAP_POPULATE to ask a device driver 
> > > > > > to
> > > > > > initialize the device memory in some specific manner. SGX driver 
> > > > > > can use
> > > > > > this to request more memory by issuing ENCLS[EAUG] x86 opcode for 
> > > > > > each
> > > > > > page in the address range.
> > > > > > 
> > > > > > Add f_ops->populate() with the same parameters as f_ops->mmap() and 
> > > > > > make
> > > > > > it conditionally called inside call_mmap(). Update call sites
> > > > > > accodingly.
> > > > > 
> > > > > Your device driver has a ->mmap operation.  Why does it need another
> > > > > one?  More explanation required here.
> > > > 
> > > > f_ops->mmap() would require an additional parameter, which results
> > > > heavy refactoring.
> > > > 
> > > > struct file_operations has 1125 references in the kernel tree, so I
> > > > decided to check this way around first. 
> > > 
> > > Are you saying that your device driver behaves differently if
> > > MAP_POPULATE is set versus if it isn't?  That seems hideously broken.
> > 
> > MAP_POPULATE does not do anything (according to __mm_populate in mm/gup.c)
> > with VMA's that have some sort of device/IO memory, i.e. vm_flags
> > intersecting with VM_PFNMAP | VM_IO.
> > 
> > I can extend the guard obviously to:
> > 
> > if (!ret && do_populate && file->f_op->populate &&
> > !!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
> > file->f_op->populate(file, vma);
> 
> Are you deliberately avoiding the question?  I'm not asking about
> implementation.  I'm asking about the semantics of MAP_POPULATE with
> your driver.

No. I just noticed a bug in the guard from your comment that I wanted
point out.

With the next version I post the corresponding change to the driver,
in order to see this in context.

BR, Jarkko


Re: [PATCH RFC] mm: Add f_ops->populate()

2022-03-05 Thread Matthew Wilcox
On Sun, Mar 06, 2022 at 06:11:21AM +0200, Jarkko Sakkinen wrote:
> On Sun, Mar 06, 2022 at 03:52:12AM +, Matthew Wilcox wrote:
> > On Sun, Mar 06, 2022 at 05:21:11AM +0200, Jarkko Sakkinen wrote:
> > > On Sun, Mar 06, 2022 at 02:57:55AM +, Matthew Wilcox wrote:
> > > > On Sun, Mar 06, 2022 at 04:15:33AM +0200, Jarkko Sakkinen wrote:
> > > > > Sometimes you might want to use MAP_POPULATE to ask a device driver to
> > > > > initialize the device memory in some specific manner. SGX driver can 
> > > > > use
> > > > > this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> > > > > page in the address range.
> > > > > 
> > > > > Add f_ops->populate() with the same parameters as f_ops->mmap() and 
> > > > > make
> > > > > it conditionally called inside call_mmap(). Update call sites
> > > > > accodingly.
> > > > 
> > > > Your device driver has a ->mmap operation.  Why does it need another
> > > > one?  More explanation required here.
> > > 
> > > f_ops->mmap() would require an additional parameter, which results
> > > heavy refactoring.
> > > 
> > > struct file_operations has 1125 references in the kernel tree, so I
> > > decided to check this way around first. 
> > 
> > Are you saying that your device driver behaves differently if
> > MAP_POPULATE is set versus if it isn't?  That seems hideously broken.
> 
> MAP_POPULATE does not do anything (according to __mm_populate in mm/gup.c)
> with VMA's that have some sort of device/IO memory, i.e. vm_flags
> intersecting with VM_PFNMAP | VM_IO.
> 
> I can extend the guard obviously to:
> 
> if (!ret && do_populate && file->f_op->populate &&
> !!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
> file->f_op->populate(file, vma);

Are you deliberately avoiding the question?  I'm not asking about
implementation.  I'm asking about the semantics of MAP_POPULATE with
your driver.


Re: [PATCH RFC] mm: Add f_ops->populate()

2022-03-05 Thread Jarkko Sakkinen
On Sun, Mar 06, 2022 at 03:52:12AM +, Matthew Wilcox wrote:
> On Sun, Mar 06, 2022 at 05:21:11AM +0200, Jarkko Sakkinen wrote:
> > On Sun, Mar 06, 2022 at 02:57:55AM +, Matthew Wilcox wrote:
> > > On Sun, Mar 06, 2022 at 04:15:33AM +0200, Jarkko Sakkinen wrote:
> > > > Sometimes you might want to use MAP_POPULATE to ask a device driver to
> > > > initialize the device memory in some specific manner. SGX driver can use
> > > > this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> > > > page in the address range.
> > > > 
> > > > Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> > > > it conditionally called inside call_mmap(). Update call sites
> > > > accodingly.
> > > 
> > > Your device driver has a ->mmap operation.  Why does it need another
> > > one?  More explanation required here.
> > 
> > f_ops->mmap() would require an additional parameter, which results
> > heavy refactoring.
> > 
> > struct file_operations has 1125 references in the kernel tree, so I
> > decided to check this way around first. 
> 
> Are you saying that your device driver behaves differently if
> MAP_POPULATE is set versus if it isn't?  That seems hideously broken.

MAP_POPULATE does not do anything (according to __mm_populate in mm/gup.c)
with VMA's that have some sort of device/IO memory, i.e. vm_flags
intersecting with VM_PFNMAP | VM_IO.

I can extend the guard obviously to:

if (!ret && do_populate && file->f_op->populate &&
!!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
file->f_op->populate(file, vma);

BR, Jarkko


Re: [PATCH RFC] mm: Add f_ops->populate()

2022-03-05 Thread Matthew Wilcox
On Sun, Mar 06, 2022 at 05:21:11AM +0200, Jarkko Sakkinen wrote:
> On Sun, Mar 06, 2022 at 02:57:55AM +, Matthew Wilcox wrote:
> > On Sun, Mar 06, 2022 at 04:15:33AM +0200, Jarkko Sakkinen wrote:
> > > Sometimes you might want to use MAP_POPULATE to ask a device driver to
> > > initialize the device memory in some specific manner. SGX driver can use
> > > this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> > > page in the address range.
> > > 
> > > Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> > > it conditionally called inside call_mmap(). Update call sites
> > > accodingly.
> > 
> > Your device driver has a ->mmap operation.  Why does it need another
> > one?  More explanation required here.
> 
> f_ops->mmap() would require an additional parameter, which results
> heavy refactoring.
> 
> struct file_operations has 1125 references in the kernel tree, so I
> decided to check this way around first. 

Are you saying that your device driver behaves differently if
MAP_POPULATE is set versus if it isn't?  That seems hideously broken.


Re: Report 2 in ext4 and journal based on v5.17-rc1

2022-03-05 Thread Theodore Ts'o
On Sat, Mar 05, 2022 at 11:55:34PM +0900, Byungchul Park wrote:
> > that is why some of the DEPT reports were completely incomprehensible
> 
> It's because you are blinded to blame at it without understanding how
> Dept works at all. I will fix those that must be fixed. Don't worry.

Users of DEPT must not have to understand how DEPT works in order to
understand and use DEPT reports.  If you think I don't understand how
DEPT work, I'm going to gently suggest that this means DEPT reports
are clear enough, and/or DEPT documentation needs to be
*substantially* improved, or both --- and these needs to happen before
DEPT is ready to be merged.

> > So if DEPT is issuing lots of reports about apparently circular
> > dependencies, please try to be open to the thought that the fault is
> 
> No one was convinced that Dept doesn't have a fault. I think your
> worries are too much.

In that case, may I ask that you add back a RFC to the subject prefix
(e.g., [PATCH RFC -v5]?)  Or maybe even add the subject prefix NOT YET
READY?  I have seen cases when after a patch series get to PATCH -v22,
and then people assume that it *must* be ready, as opposed what it
really means, which is "the author is just persistently reposting and
rebasing the patch series over and over again".  It would be helpful
if you directly acknowledge, in each patch submission, that it is not
yet ready for prime time.

After all, right now, DEPT has generated two reports in ext4, both of
which were false positives, and both of which have required a lot of
maintainer times to prove to you that they were in fact false
positives.  So are we all agreed that DEPT is not ready for prime
time?

> No one argued that their code must be buggy, either. So I don't think
> you have to worry about what's never happened.

Well, you kept on insisting that ext4 must have a circular dependency,
and that depending on a "rescue wakeup" is bad programming practice,
but you'll reluctantly agree to make DEPT accept "rescue wakeups" if
that is the will of the developers.  My concern here is the
fundmaental concept of "rescue wakeups" is wrong; I don't see how you
can distinguish between a valid wakeup and one that you and DEPT is
going to somehow characterize as dodgy.

Consider: a process can first subscribe to multiple wait queues, and
arrange to be woken up by a timeout, and then call schedule() to go to
sleep.  So it is not waiting on a single wait channel, but potentially
*multiple* wakeup sources.  If you are going to prove that kernel is
not going to make forward progress, you need to prove that *all* ways
that process might not wake up aren't going to happen for some reason.

Just because one wakeup source seems to form a circular dependency
proves nothing, since another wakeup source might be the designed and
architected way that code makes forward progress.

You seem to be assuminng that one wakeup source is somehow the
"correct" one, and the other ways that process could be woken up is a
"rescue wakeup source" and you seem to believe that relying on a
"rescue wakeup source" is bad.  But in the case of a process which has
called prepare-to-wait on more than one wait queue, how is DEPT going
to distinguish between your "morally correct" wkaeup source, and the
"rescue wakeup source"?

> No doubt. I already think so. But it doesn't mean that I have to keep
> quiet without discussing to imporve Dept. I will keep improving Dept in
> a reasonable way.

Well, I don't want to be in a position of having to prove that every
single DEPT report in my code that doesn't make sense to me is
nonsense, or else DEPT will get merged.

So maybe we need to reverse the burden of proof.

Instead of just sending a DEPT report, and then asking the maintainers
to explain why it is a false positive, how about if *you* use the DEPT
report to examinie the subsystem code, and then explain plain English,
how you think this could trigger in real life, or cause a performance
problem in real life or perhaps provide a script or C reproducer that
triggers the supposed deadlock?

Yes, that means you will need to understand the code in question, but
hopefully the DEPT reports should be clear enough that someone who
isn't a deep expert in the code should be able to spot the bug.  If
not, and if only a few deep experts of code in question will be able
to decipher the DEPT report and figure out a fix, that's really not
ideal.

If DEPT can find a real bug and you can show that Lockdep wouldn't
have been able to find it, then that would be proof that it is making
a real contribution.  That's would be real benefit.  At the same time,
DEPT will hopefully be able to demonstrate a false positive rate which
is low enough that the benefits clearly outweight the costs.

At the moment, I believe the scoreboard for DEPT with respect to ext4
is zero real bugs found, and two false positives, both of which have
required significant rounds of e-mail before the subsystem maintainers
were able to prove to you that it 

[PATCH RFC v2] mm: Add f_ops->populate()

2022-03-05 Thread Jarkko Sakkinen
Sometimes you might want to use MAP_POPULATE to ask a device driver to
initialize the device memory in some specific manner. SGX driver can use
this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
page in the address range.

Add f_ops->populate() with the same parameters as f_ops->mmap() and make
it conditionally called inside call_mmap(). Update call sites
accodingly.

Signed-off-by: Jarkko Sakkinen 
---
v2:
-   if (!ret && do_populate)
+   if (!ret && do_populate && file->f_op->populate)
(reported by Jan Harkes)
---
 arch/mips/kernel/vdso.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |  2 +-
 fs/coda/file.c |  2 +-
 fs/overlayfs/file.c|  2 +-
 include/linux/fs.h | 10 --
 include/linux/mm.h |  2 +-
 ipc/shm.c  |  2 +-
 mm/mmap.c  | 10 +-
 mm/nommu.c |  4 ++--
 9 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 3d0cf471f2fe..89f3f3da9abd 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -102,7 +102,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, 
int uses_interp)
base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
VM_READ | VM_EXEC |
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-   0, NULL);
+   0, NULL, false);
if (IS_ERR_VALUE(base)) {
ret = base;
goto out;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 1b526039a60d..4c71f64d6a79 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -107,7 +107,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, 
struct vm_area_struct *
if (!obj->base.filp)
return -ENODEV;
 
-   ret = call_mmap(obj->base.filp, vma);
+   ret = call_mmap(obj->base.filp, vma, false);
if (ret)
return ret;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29dd87be2fb8..e14f312fdbf8 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -173,7 +173,7 @@ coda_file_mmap(struct file *coda_file, struct 
vm_area_struct *vma)
spin_unlock(>c_lock);
 
vma->vm_file = get_file(host_file);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
 
if (ret) {
/* if call_mmap fails, our caller will put host_file so we
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fa125feed0ff..b963a9397e80 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -503,7 +503,7 @@ static int ovl_mmap(struct file *file, struct 
vm_area_struct *vma)
vma_set_file(vma, realfile);
 
old_cred = ovl_override_creds(file_inode(file)->i_sb);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
revert_creds(old_cred);
ovl_file_accessed(file);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2d892b201b0..4c6a3339373d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1993,6 +1993,7 @@ struct file_operations {
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
+   int (*populate)(struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
@@ -2074,9 +2075,14 @@ static inline ssize_t call_write_iter(struct file *file, 
struct kiocb *kio,
return file->f_op->write_iter(kio, iter);
 }
 
-static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma, 
bool do_populate)
 {
-   return file->f_op->mmap(file, vma);
+   int ret = file->f_op->mmap(file, vma);
+
+   if (!ret && do_populate && file->f_op->populate)
+   ret = file->f_op->populate(file, vma);
+
+   return ret;
 }
 
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 213cc569b192..6c8c036f423b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2683,7 +2683,7 @@ extern unsigned long get_unmapped_area(struct file *, 
unsigned long, unsigned lo
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-   struct list_head *uf);
+   struct list_head *uf, bool do_populate);
 

Re: [PATCH RFC] mm: Add f_ops->populate()

2022-03-05 Thread Jarkko Sakkinen
On Sun, Mar 06, 2022 at 02:57:55AM +, Matthew Wilcox wrote:
> On Sun, Mar 06, 2022 at 04:15:33AM +0200, Jarkko Sakkinen wrote:
> > Sometimes you might want to use MAP_POPULATE to ask a device driver to
> > initialize the device memory in some specific manner. SGX driver can use
> > this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> > page in the address range.
> > 
> > Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> > it conditionally called inside call_mmap(). Update call sites
> > accodingly.
> 
> Your device driver has a ->mmap operation.  Why does it need another
> one?  More explanation required here.

f_ops->mmap() would require an additional parameter, which results
heavy refactoring.

struct file_operations has 1125 references in the kernel tree, so I
decided to check this way around first. 

BR, Jarkko


Re: [PATCH RFC] mm: Add f_ops->populate()

2022-03-05 Thread Matthew Wilcox
On Sun, Mar 06, 2022 at 04:15:33AM +0200, Jarkko Sakkinen wrote:
> Sometimes you might want to use MAP_POPULATE to ask a device driver to
> initialize the device memory in some specific manner. SGX driver can use
> this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> page in the address range.
> 
> Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> it conditionally called inside call_mmap(). Update call sites
> accodingly.

Your device driver has a ->mmap operation.  Why does it need another
one?  More explanation required here.


[PATCH RFC] mm: Add f_ops->populate()

2022-03-05 Thread Jarkko Sakkinen
Sometimes you might want to use MAP_POPULATE to ask a device driver to
initialize the device memory in some specific manner. SGX driver can use
this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
page in the address range.

Add f_ops->populate() with the same parameters as f_ops->mmap() and make
it conditionally called inside call_mmap(). Update call sites
accodingly.

Signed-off-by: Jarkko Sakkinen 
---
 arch/mips/kernel/vdso.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |  2 +-
 fs/coda/file.c |  2 +-
 fs/overlayfs/file.c|  2 +-
 include/linux/fs.h | 10 --
 include/linux/mm.h |  2 +-
 ipc/shm.c  |  2 +-
 mm/mmap.c  | 10 +-
 mm/nommu.c |  4 ++--
 9 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 3d0cf471f2fe..89f3f3da9abd 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -102,7 +102,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, 
int uses_interp)
base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
VM_READ | VM_EXEC |
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-   0, NULL);
+   0, NULL, false);
if (IS_ERR_VALUE(base)) {
ret = base;
goto out;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 1b526039a60d..4c71f64d6a79 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -107,7 +107,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, 
struct vm_area_struct *
if (!obj->base.filp)
return -ENODEV;
 
-   ret = call_mmap(obj->base.filp, vma);
+   ret = call_mmap(obj->base.filp, vma, false);
if (ret)
return ret;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29dd87be2fb8..e14f312fdbf8 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -173,7 +173,7 @@ coda_file_mmap(struct file *coda_file, struct 
vm_area_struct *vma)
spin_unlock(>c_lock);
 
vma->vm_file = get_file(host_file);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
 
if (ret) {
/* if call_mmap fails, our caller will put host_file so we
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fa125feed0ff..b963a9397e80 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -503,7 +503,7 @@ static int ovl_mmap(struct file *file, struct 
vm_area_struct *vma)
vma_set_file(vma, realfile);
 
old_cred = ovl_override_creds(file_inode(file)->i_sb);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
revert_creds(old_cred);
ovl_file_accessed(file);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2d892b201b0..fb90284e1c82 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1993,6 +1993,7 @@ struct file_operations {
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
+   int (*populate)(struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
@@ -2074,9 +2075,14 @@ static inline ssize_t call_write_iter(struct file *file, 
struct kiocb *kio,
return file->f_op->write_iter(kio, iter);
 }
 
-static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma, 
bool do_populate)
 {
-   return file->f_op->mmap(file, vma);
+   int ret = file->f_op->mmap(file, vma);
+
+   if (!ret && do_populate)
+   ret = file->f_op->populate(file, vma);
+
+   return ret;
 }
 
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 213cc569b192..6c8c036f423b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2683,7 +2683,7 @@ extern unsigned long get_unmapped_area(struct file *, 
unsigned long, unsigned lo
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-   struct list_head *uf);
+   struct list_head *uf, bool do_populate);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,

[CI 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

2022-03-05 Thread Vivek Kasireddy
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.
Therefore, to fix this issue, we try to see if there is space to map
at-least two objects of a given size and return early if there isn't. This
would ensure that we do not try with PIN_MAPPABLE for any objects that
are too big to map thereby preventing unncessary unbind.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 11 platforms.

v4:
- Remove existing heuristic that checks just for size. (Ville)
- Return early if we find space to map at-least two objects. (Tvrtko)
- Slightly update the commit message.

v5: (Tvrtko)
- Rename the function to indicate that the object may be too big to
  map into the aperture.
- Account for guard pages while calculating the total size required
  for the object.
- Do not subject all objects to the heuristic check and instead
  consider objects only of a certain size.
- Do the hole walk using the rbtree.
- Preserve the existing PIN_NONBLOCK logic.
- Drop the PIN_MAPPABLE check while pinning the VMA.

v6: (Tvrtko)
- Return 0 on success and the specific error code on failure to
  preserve the existing behavior.

v7: (Ville)
- Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and
  size < ggtt->mappable_end / 4 checks.
- Drop the redundant check that is based on previous heuristic.

v8:
- Make sure that we are holding the mutex associated with ggtt vm
  as we traverse the hole nodes.

v9: (Tvrtko)
- Use mutex_lock_interruptible_nested() instead of mutex_lock().

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 1 file changed, 94 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2e10187cd0a0..4bef9eaa8b2e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,6 +49,7 @@
 #include "gem/i915_gem_pm.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_userptr.h"
+#include "gem/i915_gem_tiling.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -879,6 +880,96 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static int
+i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj,
+u64 alignment, u64 flags)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+   u64 fence_size, fence_alignment;
+   unsigned int count = 0;
+   int err = 0;
+
+   /*
+* If the 

[CI 0/2] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation

2022-03-05 Thread Vivek Kasireddy
The first patch is a drm core patch that replaces the for loop in
drm_mm_insert_node_in_range() with the iterator and would not
cause any functional changes. The second patch is a i915 driver
specific patch that also uses the iterator but solves a different
problem.

v2:
- Added a new patch to this series to fix a potential NULL
  dereference.
- Fixed a typo associated with the iterator introduced in the
  drm core patch.
- Added locking around the snippet in the i915 patch that
  traverses the GGTT hole nodes.

v3: (Tvrtko)
- Replaced mutex_lock with mutex_lock_interruptible_nested() in
  the i915 patch.

v4: (Tvrtko)
- Dropped the patch added in v2 as it was deemed unnecessary.

v5: (Tvrtko)
- Fixed yet another typo in the drm core patch: should have
  passed caller_mode instead of mode to the iterator.

Cc: Tvrtko Ursulin 
Cc: Nirmoy Das 
Cc: Christian König 

Vivek Kasireddy (2):
  drm/mm: Add an iterator to optimally walk over holes for an allocation
(v5)
  drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

 drivers/gpu/drm/drm_mm.c|  32 
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 include/drm/drm_mm.h|  36 +
 3 files changed, 145 insertions(+), 51 deletions(-)

-- 
2.35.1



[CI 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v5)

2022-03-05 Thread Vivek Kasireddy
This iterator relies on drm_mm_first_hole() and drm_mm_next_hole()
functions to identify suitable holes for an allocation of a given
size by efficiently traversing the rbtree associated with the given
allocator.

It replaces the for loop in drm_mm_insert_node_in_range() and can
also be used by drm drivers to quickly identify holes of a certain
size within a given range.

v2: (Tvrtko)
- Prepend a double underscore for the newly exported first/next_hole
- s/each_best_hole/each_suitable_hole/g
- Mask out DRM_MM_INSERT_ONCE from the mode before calling
  first/next_hole and elsewhere.

v3: (Tvrtko)
- Reduce the number of hunks by retaining the "mode" variable name

v4:
- Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos

v5: (Tvrtko)
- Fixed another typo: should pass caller_mode instead of mode to
  the iterator in drm_mm_insert_node_in_range().

Reviewed-by: Tvrtko Ursulin 
Acked-by: Christian König 
Suggested-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 32 +++-
 include/drm/drm_mm.h | 36 
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8257f9d4f619..6ff98a0e4df3 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm 
*mm, u64 addr, u64 size)
return node;
 }
 
-static struct drm_mm_node *
-first_hole(struct drm_mm *mm,
-  u64 start, u64 end, u64 size,
-  enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm,
hole_stack);
}
 }
+EXPORT_SYMBOL(__drm_mm_first_hole);
 
 /**
  * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions
@@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node 
*entry, u64 size)  \
 DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right)
 DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left)
 
-static struct drm_mm_node *
-next_hole(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm,
return >hole_stack == >hole_stack ? NULL : node;
}
 }
+EXPORT_SYMBOL(__drm_mm_next_hole);
 
 /**
  * drm_mm_reserve_node - insert an pre-initialized node
@@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
u64 size, u64 alignment,
unsigned long color,
u64 range_start, u64 range_end,
-   enum drm_mm_insert_mode mode)
+   enum drm_mm_insert_mode caller_mode)
 {
struct drm_mm_node *hole;
u64 remainder_mask;
-   bool once;
+   enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE;
 
DRM_MM_BUG_ON(range_start > range_end);
 
@@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
if (alignment <= 1)
alignment = 0;
 
-   once = mode & DRM_MM_INSERT_ONCE;
-   mode &= ~DRM_MM_INSERT_ONCE;
-
remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
-   for (hole = first_hole(mm, range_start, range_end, size, mode);
-hole;
-hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+   drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end,
+ size, caller_mode) {
u64 hole_start = __drm_mm_hole_node_start(hole);
u64 hole_end = hole_start + hole->hole_size;
u64 adj_start, adj_end;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ac33ba1b18bc..dff6db627807 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct 
drm_mm_node *hole_node)
 1 : 0; \
 pos = list_next_entry(pos, hole_stack))
 
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode);
+
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode);
+
+/**
+ * drm_mm_for_each_suitable_hole - iterator to optimally walk over all
+ * 

Re: [PATCH v2 2/2] dt-bindings: gpu: Convert aspeed-gfx bindings to yaml

2022-03-05 Thread Krzysztof Kozlowski
On 04/03/2022 01:03, Joel Stanley wrote:
> Convert the bindings to yaml and add the ast2600 compatible string.
> 
> The legacy mfd description was put in place before the gfx bindings
> existed, to document the compatible that is used in the pinctrl
> bindings.
> 
> Signed-off-by: Joel Stanley 
> ---
>  .../devicetree/bindings/gpu/aspeed,gfx.yaml   | 69 +++
>  .../devicetree/bindings/gpu/aspeed-gfx.txt| 41 ---
>  .../devicetree/bindings/mfd/aspeed-gfx.txt| 17 -
>  3 files changed, 69 insertions(+), 58 deletions(-)
>  create mode 100644 Documentation/devicetree/bindings/gpu/aspeed,gfx.yaml
>  delete mode 100644 Documentation/devicetree/bindings/gpu/aspeed-gfx.txt
>  delete mode 100644 Documentation/devicetree/bindings/mfd/aspeed-gfx.txt
> 
> diff --git a/Documentation/devicetree/bindings/gpu/aspeed,gfx.yaml 
> b/Documentation/devicetree/bindings/gpu/aspeed,gfx.yaml
> new file mode 100644
> index ..8ddc4fa6e8e4
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/gpu/aspeed,gfx.yaml
> @@ -0,0 +1,69 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/gpu/aspeed,gfx.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: ASPEED GFX display device
> +
> +maintainers:
> +  - Joel Stanley 
> +
> +properties:
> +  compatible:
> +items:
> +  - enum:
> +  - aspeed,ast2400-gfx
> +  - aspeed,ast2500-gfx
> +  - aspeed,ast2600-gfx

That's different than original bindings - new patch. It's not currently
supported, so adding it is more than just updating bindings to current
state.

> +  - const: syscon
> +
> +  reg:
> +minItems: 1

maxItems?

> +
> +  interrupts:
> +maxItems: 1
> +
> +  clocks:
> +maxItems: 1
> +
> +  resets:
> +maxItems: 1
> +
> +  memory-region: true
> +
> +  syscon: true

You need at least description. I see old bindings did not mention it
(except that it is required)... I think you also need a type, because it
is not a standard property.

> +
> +  reg-io-width: true

Some constraints? Can it be anything from syscon schema?

Best regards,
Krzysztof


[PATCH] drm/msm/a6xx: Fix missing ARRAY_SIZE() check

2022-03-05 Thread Rob Clark
From: Rob Clark 

Fixes: f6d62d091cfd ("drm/msm/a6xx: add support for Adreno 660 GPU")
Signed-off-by: Rob Clark 
Reviewed-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 02b47977b5c3..83c31b2ad865 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -683,19 +683,23 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
const u32 *regs = a6xx_protect;
-   unsigned i, count = ARRAY_SIZE(a6xx_protect), count_max = 32;
-
-   BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
-   BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
+   unsigned i, count, count_max;
 
if (adreno_is_a650(adreno_gpu)) {
regs = a650_protect;
count = ARRAY_SIZE(a650_protect);
count_max = 48;
+   BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
} else if (adreno_is_a660_family(adreno_gpu)) {
regs = a660_protect;
count = ARRAY_SIZE(a660_protect);
count_max = 48;
+   BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48);
+   } else {
+   regs = a6xx_protect;
+   count = ARRAY_SIZE(a6xx_protect);
+   count_max = 32;
+   BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
}
 
/*
-- 
2.35.1



Re: [PATCH v3 3/5] drm/print: RFC add choice to use dynamic debug in drm-debug

2022-03-05 Thread Jim Cromie
From: Daniel Vetter 


Hi Daniel, everyone,

Ive substantially updated this patchset,
and I thought it useful to reply here.

Im noting the biggest changes in context below, trimming heavily otherwize.

Also, Ive lost the msg in my gmail-cloud, so this lacks
the usual "Daniel said: " attribution, and the "> " marks.
Ive prefixed "< " to 1st line of my replies where it helps.

The current patchset is here:

https://patchwork.freedesktop.org/series/100290/
https://lore.kernel.org/lkml/20220217034829.64395-1-jim.cro...@gmail.com/

Its "failing" patchwork CI cuz of a macro warning in dyndbg, which
would be nice for CI to "pass" because its out of DRM tree, and
subject to a separate review process, and in the meantime, it would be
nice to see it under test.


Going item by item:

On Wed, Jul 14, 2021 at 11:51:36AM -0600, Jim Cromie wrote:
> drm's debug system uses distinct categories of debug messages, encoded
> in an enum (DRM_UT_),

  enum is now renumbered to 0-15 (fits in 4 bits)
  drm_debug_enabled() does BIT(cat)
  15 is unclassed/reserved

> Dynamic debug has no concept of category, but we can map the DRM_UT_*

now it has "class" keyword, and class_id:4 field.

   #> echo module drm class 1 +p > control # 1=DRM_UT_KMS iirc

This interface is unused yet, DRM is its A-1 customer, are you shopping ?
Since its unused, it cannot break anything not using it :-)

> CONFIG_DRM_USE_DYNAMIC_DEBUG enables this, and is available if

still true.

> The indirection/switchover is layered into the macro scheme:

Heres where the biggest DRM centric changes (vs old) are:

The cDRM_UT_* => prefix-string mapping is gone; while it worked, it
made the literal format strings config dependent, and was more
complicated to explain.  Fitting category into class_id is much
cleaner.

old way replaced drm*dbg with pr_debug (at surface level)
new way adapts drm*dbg to reuse the class-Factory macro under pr_debug.

This loses pr_debug's log-decorating feature, but DRM has its own
conventions, and extra decorations are unlikely to really add anything.
OTOH, drm could re-use those flags to optionalize some of its conventions.

> 0. A new callback on drm.debug which calls dynamic_debug_exec_queries

now in dyndbg, where it belonged.  now just uses class query inside.


This is really awesome. For merging I think we need to discuss with dyn
debug folks whether they're all ok with this, but it's exported already
should should be fine.

< Your (fresh) endorsement should help :-)
Particularly, the new dyndbg features need a user.

> 
> 1. A "converted" or "classy" DRM_UT_* map
>

repeating, 2nd map is gone, DRM_UT_* is merely renumbered.

>DRM_UT_* are unchanged, since theyre used in drm_debug_enabled()
>and elsewhere.

I think for the production version of these we need to retire/deprecate
them, at least for drm core. Otherwise you have an annoying mismatch
between drm.debug module option and dyn debug.

< I think this isnt relevant now, since symbols are preserved.

Also, the __drm_debug var is used directly by the CLASSBITS macro,
(and therefore the callbacks), so /sys/module/drm/parameters/debug is
preserved at an ABI-ish level. (__drm_debug now ulong, to work with BIT)

> 
> 2. drm_dev_dbg & drm_debug are renamed (prefixed with '_')
> 
>old names are now macros, calling either:
>  legacy:  -> to renamed fn
>  enabled: -> dev_dbg & pr_debug, with cDRM-prefix # format.
>

For merging, can we pull out the renames and reorgs from this patch, and
then maybe also the reorder the next patch in your series here to be
before the dyn debug stuff?

< the above adaptation is re-adapted to use dyndbg's new class-Factory
macro, other stuff is gone.


> Signed-off-by: Jim Cromie 
> ---
>  drivers/gpu/drm/Kconfig |  13 +
>  drivers/gpu/drm/drm_print.c |  75 --
>  include/drm/drm_print.h | 102 ++--
>  3 files changed, 158 insertions(+), 32 deletions(-)

update - drm parts are slightly smaller now :-)

[jimc@frodo wk-next]$ git diff --stat master
 Documentation/admin-guide/dynamic-debug-howto.rst |   7 +
 drivers/gpu/drm/Kconfig   |  12 
 drivers/gpu/drm/Makefile  |   2 ++
 drivers/gpu/drm/drm_print.c   |  56 
++
 include/drm/drm_print.h   |  80 
+
 include/linux/dynamic_debug.h | 113 
+
 lib/dynamic_debug.c   | 140 
+
 7 files changed, 323 insertions(+), 87 deletions(-)


I really like this, I think you can drop the RFC. A few more things that I
think we need:

- An overview kerneldoc section which explains the interfaces and how it
  all works together. Essentially your commit message with some light
  markup to make it look good.

< not sure anything worth documenting has survived.

- I think it would 

Re: Report 2 in ext4 and journal based on v5.17-rc1

2022-03-05 Thread Byungchul Park
On Fri, Mar 04, 2022 at 10:40:35PM -0500, Theodore Ts'o wrote:
> On Fri, Mar 04, 2022 at 12:20:02PM +0900, Byungchul Park wrote:
> > 
> > I found a point that the two wait channels don't lead a deadlock in
> > some cases thanks to Jan Kara. I will fix it so that Dept won't
> > complain it.
> 
> I sent my last (admittedly cranky) message before you sent this.  I'm
> glad you finally understood Jan's explanation.  I was trying to tell

Not finally. I've understood him whenever he tried to tell me something.

> you the same thing, but apparently I failed to communicate in a

I don't think so. Your point and Jan's point are different. All he has
said make sense. But yours does not.

> sufficiently clear manner.  In any case, what Jan described is a
> fundamental part of how wait queues work, and I'm kind of amazed that
> you were able to implement DEPT without understanding it.  (But maybe

Of course, it was possible because all that Dept has to know for basic
work is wait and event. The subtle things like what Jan told me help
Dept be better.

> that is why some of the DEPT reports were completely incomprehensible

It's because you are blinded to blame at it without understanding how
Dept works at all. I will fix those that must be fixed. Don't worry.

> to me; I couldn't interpret why in the world DEPT was saying there was
> a problem.)

I can tell you if you really want to understand why. But I can't if you
are like this.

> In any case, the thing I would ask is a little humility.  We regularly
> use lockdep, and we run a huge number of stress tests, throughout each
> development cycle.

Sure.

> So if DEPT is issuing lots of reports about apparently circular
> dependencies, please try to be open to the thought that the fault is

No one was convinced that Dept doesn't have a fault. I think your
worries are too much.

> in DEPT, and don't try to argue with maintainers that their code MUST
> be buggy --- but since you don't understand our code, and DEPT must be

No one argued that their code must be buggy, either. So I don't think
you have to worry about what's never happened.

> theoretically perfect, that it is up to the Maintainers to prove to
> you that their code is correct.
> 
> I am going to gently suggest that it is at least as likely, if not
> more likely, that the failure is in DEPT or your understanding of what

No doubt. I already think so. But it doesn't mean that I have to keep
quiet without discussing to imporve Dept. I will keep improving Dept in
a reasonable way.

> how kernel wait channels and locking works.  After all, why would it
> be that we haven't found these problems via our other QA practices?

Let's talk more once you understand how Dept works at least 10%. Or I
think we cannot talk in a productive way.



Re: Report 2 in ext4 and journal based on v5.17-rc1

2022-03-05 Thread Byungchul Park
On Fri, Mar 04, 2022 at 10:26:23PM -0500, Theodore Ts'o wrote:
> On Fri, Mar 04, 2022 at 09:42:37AM +0900, Byungchul Park wrote:
> > 
> > All contexts waiting for any of the events in the circular dependency
> > chain will be definitely stuck if there is a circular dependency as I
> > explained. So we need another wakeup source to break the circle. In
> > ext4 code, you might have the wakeup source for breaking the circle.
> > 
> > What I agreed with is:
> > 
> >The case that 1) the circular dependency is unevitable 2) there are
> >another wakeup source for breadking the circle and 3) the duration
> >in sleep is short enough, should be acceptable.
> > 
> > Sounds good?
> 
> These dependencies are part of every single ext4 metadata update,
> and if there were any unnecessary sleeps, this would be a major
> performance gap, and this is a very well studied part of ext4.
> 
> There are some places where we sleep, sure.  In some case
> start_this_handle() needs to wait for a commit to complete, and the
> commit thread might need to sleep for I/O to complete.  But the moment
> the thing that we're waiting for is complete, we wake up all of the
> processes on the wait queue.  But in the case where we wait for I/O
> complete, that wakeupis coming from the device driver, when it
> receives the the I/O completion interrupt from the hard drive.  Is
> that considered an "external source"?  Maybe DEPT doesn't recognize
> that this is certain to happen just as day follows the night?  (Well,
> maybe the I/O completion interrupt might not happen if the disk drive
> bursts into flames --- but then, you've got bigger problems. :-)

Almost all you've been blaming at Dept are totally non-sense. Based on
what you're saying, I'm conviced that you don't understand how Dept
works even 1%. You don't even try to understand it before blame.

You don't have to understand and support it. But I can't response to you
if you keep saying silly things that way.

> In any case, if DEPT is going to report these "circular dependencies
> as bugs that MUST be fixed", it's going to be pure noise and I will
> ignore all DEPT reports, and will push back on having Lockdep replaced

Dept is going to be improved so that what you are concerning about won't
be reported.

> by DEPT --- because Lockdep give us actionable reports, and if DEPT

Right. Dept should give actionable reports, too.

> can't tell the difference between a valid programming pattern and a
> bug, then it's worse than useless.

Needless to say.



Re: [PATCH] drm/msm/a6xx: Fix missing ARRAY_SIZE() check

2022-03-05 Thread Dmitry Baryshkov
On Sat, 5 Mar 2022 at 00:57, Rob Clark  wrote:
>
> On Fri, Mar 4, 2022 at 1:47 PM Dmitry Baryshkov
>  wrote:
> >
> > On Fri, 4 Mar 2022 at 23:23, Rob Clark  wrote:
> > >
> > > From: Rob Clark 
> > >
> > > Fixes: f6d62d091cfd ("drm/msm/a6xx: add support for Adreno 660 GPU")
> > > Signed-off-by: Rob Clark 
> >
> > Reviewed-by: Dmitry Baryshkov 
> > However see the comment below.
> >
> > > ---
> > >  drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 1 +
> > >  1 file changed, 1 insertion(+)
> > >
> > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
> > > b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> > > index 02b47977b5c3..6406d8c3411a 100644
> > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> > > @@ -687,6 +687,7 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu)
> > >
> > > BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
> > > BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
> > > +   BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48);
> >
> > The magic number 32 and 48 are repeated through this code. I'd suggest
> > to define them and use defined names.
> > It can come up as a separate commit.
> >
>
> Or perhaps instead:

IMO this is much better.
Reviewed-by: Dmitry Baryshkov 

> 
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 6406d8c3411a..58c371930fb4 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -683,20 +683,23 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu)
>  {
> struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> const u32 *regs = a6xx_protect;
> -   unsigned i, count = ARRAY_SIZE(a6xx_protect), count_max = 32;
> -
> -   BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
> -   BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
> -   BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48);
> +   unsigned i, count, count_max;
>
> if (adreno_is_a650(adreno_gpu)) {
> regs = a650_protect;
> count = ARRAY_SIZE(a650_protect);
> count_max = 48;
> +   BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
> } else if (adreno_is_a660_family(adreno_gpu)) {
> regs = a660_protect;
> count = ARRAY_SIZE(a660_protect);
> count_max = 48;
> +   BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48);
> +   } else {
> +   regs = a6xx_protect;
> +   count = ARRAY_SIZE(a6xx_protect);
> +   count_max = 32;
> +   BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
> }
>
> /*
> 
>
> that moves each of the two uses of constant together..  adding three
> #defines each used only twice seems a bit silly, IMHO
>
> BR,
> -R



-- 
With best wishes
Dmitry


Re: [PATCH] drm/nouveau/bios: Rename prom_init() and friends functions

2022-03-05 Thread Christophe Leroy




Le 05/03/2022 à 08:38, Christophe Leroy a écrit :



Le 04/03/2022 à 21:24, Lyude Paul a écrit :
This mostly looks good to me. Just one question (and one comment down 
below
that needs addressing). Is this with ppc32? (I ask because ppc64le 
doesn't

seem to hit this compilation error).


That's with PPC64, see 
http://kisskb.ellerman.id.au/kisskb/branch/chleroy/head/252ba609bea83234d2e35841c19ae84c67b43ec7/ 



But that's not (yet) with the mainline tree. That's work I'm doing to 
cleanup our asm/asm-protoypes.h header.


Since commit 4efca4ed05cb ("kbuild: modversions for EXPORT_SYMBOL() for 
asm") that file is dedicated to prototypes of functions defined in 
assembly. Therefore I'm trying to dispatch C functions prototypes in 
other headers. I wanted to move prom_init() prototype into asm/prom.h 
and then I hit the problem.


In the beginning I was thinking about just changing the name of the 
function in powerpc, but as I see that M68K, MIPS and SPARC also have a 
prom_init() function, I thought it would be better to change the name in 
shadowrom.c to avoid any future conflict like the one I got while 
reworking the headers.




@@ -57,8 +57,8 @@ prom_init(struct nvkm_bios *bios, const char *name)
  const struct nvbios_source
  nvbios_rom = {
 .name = "PROM",
-   .init = prom_init,
-   .fini = prom_fini,
-   .read = prom_read,
+   .init = nvbios_rom_init,
+   .fini = nvbios_rom_fini,
+   .read = nvbios_rom_read,


Seeing as the source name is prom, I think using the naming convention
nvbios_prom_* would be better then nvbios_rom_*.



Yes I wasn't sure about the best naming as the file name is shadowrom.c 
and not shadowprom.c.


I will send v2 using nvbios_prom_* as a name.


While preparing v2 I remembered that in fact, I called the functions 
nvbios_rom_* because the name of the nvbios_source struct is nvbios_rom, 
so for me it made sense to use the name of the struct as a prefix for 
the functions.


So I'm OK to change it to nvbios_prom_* but it looks less logical to me.

Please confirm you still prefer nvbios_prom as prefix to the function names.

Christophe