[PATCH v2 2/2] ocxl: use pci_find_next_dvsec_capability() to simplify the code
PCI core add pci_find_next_dvsec_capability() to query the next DVSEC. We can use that core API to simplify the code. Also remove the unused macros. Signed-off-by: Xiongfeng Wang Reviewed-by: Andrew Donnellan --- arch/powerpc/platforms/powernv/ocxl.c | 20 ++-- drivers/misc/ocxl/config.c| 21 ++--- include/misc/ocxl-config.h| 4 3 files changed, 8 insertions(+), 37 deletions(-) diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index 629067781cec..8dbc1a9535fc 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -71,29 +71,13 @@ static DEFINE_MUTEX(links_list_lock); * the AFUs, by pro-rating if needed. */ -static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos) -{ - int vsec = pos; - u16 vendor, id; - - while ((vsec = pci_find_next_ext_capability(dev, vsec, - OCXL_EXT_CAP_ID_DVSEC))) { - pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, - ); - pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, ); - if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) - return vsec; - } - return 0; -} - static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) { int vsec = 0; u8 idx; - while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID, - vsec))) { + while ((vsec = pci_find_next_dvsec_capability(dev, vsec, + PCI_VENDOR_ID_IBM, OCXL_DVSEC_AFU_CTRL_ID))) { pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, ); if (idx == afu_idx) diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c index 92ab49705f64..6c0fca32e6db 100644 --- a/drivers/misc/ocxl/config.c +++ b/drivers/misc/ocxl/config.c @@ -39,23 +39,14 @@ static int find_dvsec(struct pci_dev *dev, int dvsec_id) static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) { int vsec = 0; - u16 vendor, id; u8 idx; - while ((vsec = pci_find_next_ext_capability(dev, vsec, - OCXL_EXT_CAP_ID_DVSEC))) { - pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, - ); - pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, ); - - if (vendor == PCI_VENDOR_ID_IBM && - id == OCXL_DVSEC_AFU_CTRL_ID) { - pci_read_config_byte(dev, - vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, - ); - if (idx == afu_idx) - return vsec; - } + while ((vsec = pci_find_next_dvsec_capability(dev, vsec, + PCI_VENDOR_ID_IBM, OCXL_DVSEC_AFU_CTRL_ID))) { + pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, +); + if (idx == afu_idx) + return vsec; } return 0; } diff --git a/include/misc/ocxl-config.h b/include/misc/ocxl-config.h index ccfd3b463517..40cf1b143170 100644 --- a/include/misc/ocxl-config.h +++ b/include/misc/ocxl-config.h @@ -10,10 +10,6 @@ * It follows the specification for opencapi 3.0 */ -#define OCXL_EXT_CAP_ID_DVSEC 0x23 - -#define OCXL_DVSEC_VENDOR_OFFSET 0x4 -#define OCXL_DVSEC_ID_OFFSET 0x8 #define OCXL_DVSEC_TL_ID 0xF000 #define OCXL_DVSEC_TL_BACKOFF_TIMERS 0x10 #define OCXL_DVSEC_TL_RECV_CAP0x18 -- 2.20.1
[PATCH v2 1/2] PCI: Add pci_find_next_dvsec_capability to find next Designated VSEC
Some devices may have several DVSEC (Designated Vendor-Specific Extended Capability) entries with the same DVSEC ID. Add pci_find_next_dvsec_capability() to find them all. Signed-off-by: Xiongfeng Wang Reviewed-by: Andrew Donnellan Acked-by: Bjorn Helgaas --- drivers/pci/pci.c | 39 ++- include/linux/pci.h | 2 ++ 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 60230da957e0..2ff5b1ce0eec 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -749,35 +749,48 @@ u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap) EXPORT_SYMBOL_GPL(pci_find_vsec_capability); /** - * pci_find_dvsec_capability - Find DVSEC for vendor + * pci_find_next_dvsec_capability - Find next DVSEC for vendor * @dev: PCI device to query + * @start: Address at which to start looking (0 to start at beginning of list) * @vendor: Vendor ID to match for the DVSEC - * @dvsec: Designated Vendor-specific capability ID + * @dvsec: Vendor-defined DVSEC ID * - * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the capability - * offset in config space; otherwise return 0. + * Returns the address of the next DVSEC if the DVSEC has Vendor ID @vendor and + * DVSEC ID @dvsec; otherwise return 0. DVSEC can occur several times with the + * same DVSEC ID for some devices, and this provides a way to find them all. */ -u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec) +u16 pci_find_next_dvsec_capability(struct pci_dev *dev, u16 start, u16 vendor, + u16 dvsec) { - int pos; + u16 pos = start; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DVSEC); - if (!pos) - return 0; - - while (pos) { + while ((pos = pci_find_next_ext_capability(dev, pos, + PCI_EXT_CAP_ID_DVSEC))) { u16 v, id; pci_read_config_word(dev, pos + PCI_DVSEC_HEADER1, ); pci_read_config_word(dev, pos + PCI_DVSEC_HEADER2, ); if (vendor == v && dvsec == id) return pos; - - pos = pci_find_next_ext_capability(dev, pos, PCI_EXT_CAP_ID_DVSEC); } return 0; } +EXPORT_SYMBOL_GPL(pci_find_next_dvsec_capability); + +/** + * pci_find_dvsec_capability - Find DVSEC for vendor + * @dev: PCI device to query + * @vendor: Vendor ID to match for the DVSEC + * @dvsec: Vendor-defined DVSEC ID + * + * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the capability + * offset in config space; otherwise return 0. + */ +u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec) +{ + return pci_find_next_dvsec_capability(dev, 0, vendor, dvsec); +} EXPORT_SYMBOL_GPL(pci_find_dvsec_capability); /** diff --git a/include/linux/pci.h b/include/linux/pci.h index c69a2cc1f412..82bb905daf72 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1168,6 +1168,8 @@ u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 pos, int cap); struct pci_bus *pci_find_next_bus(const struct pci_bus *from); u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap); u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec); +u16 pci_find_next_dvsec_capability(struct pci_dev *dev, u16 start, u16 vendor, + u16 dvsec); u64 pci_get_dsn(struct pci_dev *dev); -- 2.20.1
[PATCH v2 0/2] Introduce pci_find_next_dvsec_capability() to simplify the code
Some devices may have several DVSEC (Designated Vendor-Specific Extended Capability) entries with the same DVSEC ID. Introduce pci_find_next_dvsec_capability() to simplify the code. ChangeLog: v1->v2: Add Reviewed-by and Acked-by tags Modify commit message and document a little for the first patch Xiongfeng Wang (2): PCI: Add pci_find_next_dvsec_capability to find next Designated VSEC ocxl: use pci_find_next_dvsec_capability() to simplify the code arch/powerpc/platforms/powernv/ocxl.c | 20 ++ drivers/misc/ocxl/config.c| 21 +-- drivers/pci/pci.c | 39 ++- include/linux/pci.h | 2 ++ include/misc/ocxl-config.h| 4 --- 5 files changed, 36 insertions(+), 50 deletions(-) -- 2.20.1
[PATCH -next] ASoC: imx-audio-rpmsg: Remove redundant initialization owner in imx_audio_rpmsg_driver
The module_rpmsg_driver() will set "THIS_MODULE" to driver.owner when register a rpmsg_driver driver, so it is redundant initialization to set driver.owner in the statement. Remove it for clean code. Signed-off-by: Li Zetao --- sound/soc/fsl/imx-audio-rpmsg.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/fsl/imx-audio-rpmsg.c b/sound/soc/fsl/imx-audio-rpmsg.c index d5234ac4b09b..289e47c03d40 100644 --- a/sound/soc/fsl/imx-audio-rpmsg.c +++ b/sound/soc/fsl/imx-audio-rpmsg.c @@ -116,7 +116,6 @@ static struct rpmsg_device_id imx_audio_rpmsg_id_table[] = { static struct rpmsg_driver imx_audio_rpmsg_driver = { .drv.name = "imx_audio_rpmsg", - .drv.owner = THIS_MODULE, .id_table = imx_audio_rpmsg_id_table, .probe = imx_audio_rpmsg_probe, .callback = imx_audio_rpmsg_cb, -- 2.34.1
Re: [RFC PATCH v11 28/29] KVM: selftests: Add basic selftest for guest_memfd()
Sean Christopherson writes: > Add a selftest to verify the basic functionality of guest_memfd(): > > Here's one more test: >From 72dc6836f01bdd613d64d4c6a4f2af8f2b777ba2 Mon Sep 17 00:00:00 2001 From: Ackerley Tng Date: Tue, 1 Aug 2023 18:02:50 + Subject: [PATCH] KVM: selftests: Add tests - invalid inputs for KVM_CREATE_GUEST_MEMFD Test that invalid inputs for KVM_CREATE_GUEST_MEMFD, such as non-page-aligned page size and invalid flags, are rejected by the KVM_CREATE_GUEST_MEMFD with EINVAL Signed-off-by: Ackerley Tng --- tools/testing/selftests/kvm/guest_memfd_test.c | 17 + .../selftests/kvm/include/kvm_util_base.h | 11 +-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index eb93c608a7e0..ad20f11b2d2c 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -90,6 +90,21 @@ static void test_fallocate(int fd, size_t page_size, size_t total_size) TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed"); } +static void test_create_guest_memfd_invalid(struct kvm_vm *vm, size_t page_size) +{ + int fd; + + /* Non-page-aligned page_size */ + fd = __vm_create_guest_memfd(vm, 1, 0); + ASSERT_EQ(fd, -1); + ASSERT_EQ(errno, EINVAL); + + /* Invalid flags */ + fd = __vm_create_guest_memfd(vm, page_size, 99); + ASSERT_EQ(fd, -1); + ASSERT_EQ(errno, EINVAL); +} + int main(int argc, char *argv[]) { @@ -103,6 +118,8 @@ int main(int argc, char *argv[]) vm = vm_create_barebones(); + test_create_guest_memfd_invalid(vm, page_size); + fd = vm_create_guest_memfd(vm, total_size, 0); test_file_read_write(fd); diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 39b38c75b99c..8bdfadd72349 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -474,7 +474,8 @@ static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) } void vm_create_irqchip(struct kvm_vm *vm); -static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + +static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, uint64_t flags) { struct kvm_create_guest_memfd gmem = { @@ -482,7 +483,13 @@ static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, .flags = flags, }; - int fd = __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, ); + return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, ); +} + +static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + int fd = __vm_create_guest_memfd(vm, size, flags); TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd)); return fd; -- 2.41.0.640.ga95def55d0-goog
Re: [RFC PATCH v11 28/29] KVM: selftests: Add basic selftest for guest_memfd()
Sean Christopherson writes: > Add a selftest to verify the basic functionality of guest_memfd(): > > + file descriptor created with the guest_memfd() ioctl does not allow > read/write/mmap operations > + file size and block size as returned from fstat are as expected > + fallocate on the fd checks that offset/length on > fallocate(FALLOC_FL_PUNCH_HOLE) should be page aligned > > > + > +static void test_fallocate(int fd, size_t page_size, size_t total_size) > +{ > + int ret; > + > + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, total_size); > + TEST_ASSERT(!ret, "fallocate with aligned offset and size should > succeed"); > + > + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, > + page_size - 1, page_size); > + TEST_ASSERT(ret, "fallocate with unaligned offset should fail"); > + > + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size, page_size); > + TEST_ASSERT(ret, "fallocate beginning at total_size should fail"); > + > + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size + page_size, > page_size); > + TEST_ASSERT(ret, "fallocate beginning at total_size should fail"); This should be TEST_ASSERT(ret, "fallocate beginning after total_size should fail"); > + > + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, > + total_size, page_size); > + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) at total_size should succeed"); > + > + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, > + total_size + page_size, page_size); > + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) after total_size should > succeed"); > + > + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, > + page_size, page_size - 1); > + TEST_ASSERT(ret, "fallocate with unaligned size should fail"); > + > + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, > + page_size, page_size); > + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) with aligned offset and size > should succeed"); > + > + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, page_size, page_size); > + TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed"); > +} >
Re: [RFC PATCH v11 27/29] KVM: selftests: Expand set_memory_region_test to validate guest_memfd()
Sean Christopherson writes: > From: Chao Peng > > Expand set_memory_region_test to exercise various positive and negative > testcases for private memory. > > - Non-guest_memfd() file descriptor for private memory > - guest_memfd() from different VM > - Overlapping bindings > - Unaligned bindings > > Signed-off-by: Chao Peng > Co-developed-by: Ackerley Tng > Signed-off-by: Ackerley Tng > [sean: trim the testcases to remove duplicate coverage] > Signed-off-by: Sean Christopherson > --- > .../selftests/kvm/include/kvm_util_base.h | 10 ++ > .../selftests/kvm/set_memory_region_test.c| 99 +++ > 2 files changed, 109 insertions(+) > > diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h > b/tools/testing/selftests/kvm/include/kvm_util_base.h > index 334df27a6f43..39b38c75b99c 100644 > --- a/tools/testing/selftests/kvm/include/kvm_util_base.h > +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h > @@ -789,6 +789,16 @@ static inline struct kvm_vm *vm_create_barebones(void) > return vm_create(VM_SHAPE_DEFAULT); > } > > > + > +static void test_add_private_memory_region(void) > +{ > + struct kvm_vm *vm, *vm2; > + int memfd, i; > + > + pr_info("Testing ADD of KVM_MEM_PRIVATE memory regions\n"); > + > + vm = vm_create_barebones_protected_vm(); > + > + test_invalid_guest_memfd(vm, vm->kvm_fd, 0, "KVM fd should fail"); > + test_invalid_guest_memfd(vm, vm->fd, 0, "VM's fd should fail"); > + > + memfd = kvm_memfd_alloc(MEM_REGION_SIZE, false); > + test_invalid_guest_memfd(vm, vm->fd, 0, "Regular memfd() should fail"); This should be test_invalid_guest_memfd(vm, memfd, 0, "Regular memfd() should fail"); > + close(memfd); > + > + vm2 = vm_create_barebones_protected_vm(); > + memfd = vm_create_guest_memfd(vm2, MEM_REGION_SIZE, 0); > + test_invalid_guest_memfd(vm, memfd, 0, "Other VM's guest_memfd() should > fail"); > + > + vm_set_user_memory_region2(vm2, MEM_REGION_SLOT, KVM_MEM_PRIVATE, > +MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, > 0); > + close(memfd); > + kvm_vm_free(vm2); > + > + memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE, 0); > + for (i = 1; i < PAGE_SIZE; i++) > + test_invalid_guest_memfd(vm, memfd, i, "Unaligned offset should > fail"); > + > + vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_PRIVATE, > +MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, > 0); > + close(memfd); > + > + kvm_vm_free(vm); > +} > + >
Re: [RFC PATCH v11 12/29] KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory
Sean Christopherson writes: > > +static int kvm_gmem_release(struct inode *inode, struct file *file) > +{ > + struct kvm_gmem *gmem = file->private_data; > + struct kvm_memory_slot *slot; > + struct kvm *kvm = gmem->kvm; > + unsigned long index; > + > + filemap_invalidate_lock(inode->i_mapping); > + > + /* > + * Prevent concurrent attempts to *unbind* a memslot. This is the last > + * reference to the file and thus no new bindings can be created, but > + * dereferencing the slot for existing bindings needs to be protected > + * against memslot updates, specifically so that unbind doesn't race > + * and free the memslot (kvm_gmem_get_file() will return NULL). > + */ > + mutex_lock(>slots_lock); > + > + xa_for_each(>bindings, index, slot) > + rcu_assign_pointer(slot->gmem.file, NULL); > + > + synchronize_rcu(); > + > + /* > + * All in-flight operations are gone and new bindings can be created. > + * Zap all SPTEs pointed at by this file. Do not free the backing > + * memory, as its lifetime is associated with the inode, not the file. > + */ > + kvm_gmem_invalidate_begin(gmem, 0, -1ul); > + kvm_gmem_invalidate_end(gmem, 0, -1ul); > + > + mutex_unlock(>slots_lock); > + > + list_del(>entry); > + > + filemap_invalidate_unlock(inode->i_mapping); > + > + xa_destroy(>bindings); > + kfree(gmem); > + > + kvm_put_kvm(kvm); > + > + return 0; > +} > + > > + > +int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, > + unsigned int fd, loff_t offset) > +{ > + loff_t size = slot->npages << PAGE_SHIFT; > + unsigned long start, end, flags; > + struct kvm_gmem *gmem; > + struct inode *inode; > + struct file *file; > + > + BUILD_BUG_ON(sizeof(gfn_t) != sizeof(slot->gmem.pgoff)); > + > + file = fget(fd); > + if (!file) > + return -EINVAL; > + > + if (file->f_op != _gmem_fops) > + goto err; > + > + gmem = file->private_data; > + if (gmem->kvm != kvm) > + goto err; > + > + inode = file_inode(file); > + flags = (unsigned long)inode->i_private; > + > + /* > + * For simplicity, require the offset into the file and the size of the > + * memslot to be aligned to the largest possible page size used to back > + * the file (same as the size of the file itself). > + */ > + if (!kvm_gmem_is_valid_size(offset, flags) || > + !kvm_gmem_is_valid_size(size, flags)) > + goto err; > + > + if (offset + size > i_size_read(inode)) > + goto err; > + > + filemap_invalidate_lock(inode->i_mapping); > + > + start = offset >> PAGE_SHIFT; > + end = start + slot->npages; > + > + if (!xa_empty(>bindings) && > + xa_find(>bindings, , end - 1, XA_PRESENT)) { > + filemap_invalidate_unlock(inode->i_mapping); > + goto err; > + } > + > + /* > + * No synchronize_rcu() needed, any in-flight readers are guaranteed to > + * be see either a NULL file or this new file, no need for them to go > + * away. > + */ > + rcu_assign_pointer(slot->gmem.file, file); > + slot->gmem.pgoff = start; > + > + xa_store_range(>bindings, start, end - 1, slot, GFP_KERNEL); > + filemap_invalidate_unlock(inode->i_mapping); > + > + /* > + * Drop the reference to the file, even on success. The file pins KVM, > + * not the other way 'round. Active bindings are invalidated if the > + * file is closed before memslots are destroyed. > + */ > + fput(file); > + return 0; > + > +err: > + fput(file); > + return -EINVAL; > +} > + I’d like to propose an alternative to the refcounting approach between the gmem file and associated kvm, where we think of KVM’s memslots as users of the gmem file. Instead of having the gmem file pin the VM (i.e. take a refcount on kvm), we could let memslot take a refcount on the gmem file when the memslots are configured. Here’s a POC patch that flips the refcounting (and modified selftests in the next commit): https://github.com/googleprodkernel/linux-cc/commit/7f487b029b89b9f3e9b094a721bc0772f3c8c797 One side effect of having the gmem file pin the VM is that now the gmem file becomes sort of a false handle on the VM: + Closing the file destroys the file pointers in the VM and invalidates the pointers + Keeping the file open keeps the VM around in the kernel even though the VM fd may already be closed. I feel that memslots form a natural way of managing usage of the gmem file. When a memslot is created, it is using the file; hence we take a refcount on the gmem file, and as memslots are removed, we drop refcounts on the gmem file. The KVM pointer is shared among all the bindings in gmem’s xarray, and we can enforce that a gmem file is used only with one VM: + When binding a memslot to the file, if a kvm
[PATCH] powerpc/pseries: Rework lppaca_shared_proc() to avoid DEBUG_PREEMPT
lppaca_shared_proc() takes a pointer to the lppaca which is typically accessed through get_lppaca(). With DEBUG_PREEMPT enabled, this leads to checking if preemption is enabled, for example: BUG: using smp_processor_id() in preemptible [] code: grep/10693 caller is lparcfg_data+0x408/0x19a0 CPU: 4 PID: 10693 Comm: grep Not tainted 6.5.0-rc3 #2 Call Trace: dump_stack_lvl+0x154/0x200 (unreliable) check_preemption_disabled+0x214/0x220 lparcfg_data+0x408/0x19a0 ... This isn't actually a problem however, as it does not matter which lppaca is accessed, the shared proc state will be the same. vcpudispatch_stats_procfs_init() already works around this by disabling preemption, but the lparcfg code does not, erroring any time /proc/powerpc/lparcfg is accessed with DEBUG_PREEMPT enabled. Instead of disabling preemption on the caller side, rework lppaca_shared_proc() to not take a pointer and instead directly access the lppaca, bypassing any potential preemption checks. Fixes: f13c13a00512 ("powerpc: Stop using non-architected shared_proc field in lppaca") Signed-off-by: Russell Currey --- Fixes tag might be a bit overkill. --- arch/powerpc/include/asm/lppaca.h| 9 - arch/powerpc/include/asm/paca.h | 5 + arch/powerpc/platforms/pseries/lpar.c| 10 +- arch/powerpc/platforms/pseries/lparcfg.c | 4 ++-- arch/powerpc/platforms/pseries/setup.c | 2 +- drivers/cpuidle/cpuidle-pseries.c| 8 +--- 6 files changed, 18 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 34d44cb17c87..c12e1a6e3595 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -127,7 +127,14 @@ struct lppaca { */ #define LPPACA_OLD_SHARED_PROC 2 -static inline bool lppaca_shared_proc(struct lppaca *l) +/* + * All CPUs should have the same shared proc value, so directly access the PACA + * to avoid false positives from DEBUG_PREEMPT. + * + * local_paca can't be referenced directly from lppaca.h, hence the macro. + */ +#define lppaca_shared_proc() (__lppaca_shared_proc(local_paca->lppaca_ptr)) +static inline bool __lppaca_shared_proc(struct lppaca *l) { if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return false; diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index cb325938766a..f77337b92ccf 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -49,6 +49,11 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */ #ifdef CONFIG_PPC_PSERIES #define get_lppaca() (get_paca()->lppaca_ptr) +/* + * All CPUs should have the same shared proc value, so directly access the PACA + * to avoid false positives from DEBUG_PREEMPT. + */ +#define lppaca_shared_proc() (__lppaca_shared_proc(local_paca->lppaca_ptr)) #endif #define get_slb_shadow() (get_paca()->slb_shadow_ptr) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 2eab323f6970..cb2f1211f7eb 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -639,16 +639,8 @@ static const struct proc_ops vcpudispatch_stats_freq_proc_ops = { static int __init vcpudispatch_stats_procfs_init(void) { - /* -* Avoid smp_processor_id while preemptible. All CPUs should have -* the same value for lppaca_shared_proc. -*/ - preempt_disable(); - if (!lppaca_shared_proc(get_lppaca())) { - preempt_enable(); + if (!lppaca_shared_proc()) return 0; - } - preempt_enable(); if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL, _stats_proc_ops)) diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index 8acc70509520..1c151d77e74b 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -206,7 +206,7 @@ static void parse_ppp_data(struct seq_file *m) ppp_data.active_system_procs); /* pool related entries are appropriate for shared configs */ - if (lppaca_shared_proc(get_lppaca())) { + if (lppaca_shared_proc()) { unsigned long pool_idle_time, pool_procs; seq_printf(m, "pool=%d\n", ppp_data.pool_num); @@ -560,7 +560,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) partition_potential_processors); seq_printf(m, "shared_processor_mode=%d\n", - lppaca_shared_proc(get_lppaca())); + lppaca_shared_proc()); #ifdef CONFIG_PPC_64S_HASH_MMU if (!radix_enabled()) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index
[PATCH mm-unstable v9 31/31] mm: Remove pgtable_{pmd, pte}_page_{ctor, dtor}() wrappers
These functions are no longer necessary. Remove them and cleanup Documentation referencing them. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- Documentation/mm/split_page_table_lock.rst| 12 +-- .../zh_CN/mm/split_page_table_lock.rst| 14 ++--- include/linux/mm.h| 20 --- 3 files changed, 13 insertions(+), 33 deletions(-) diff --git a/Documentation/mm/split_page_table_lock.rst b/Documentation/mm/split_page_table_lock.rst index a834fad9de12..e4f6972eb6c0 100644 --- a/Documentation/mm/split_page_table_lock.rst +++ b/Documentation/mm/split_page_table_lock.rst @@ -58,7 +58,7 @@ Support of split page table lock by an architecture === There's no need in special enabling of PTE split page table lock: everything -required is done by pgtable_pte_page_ctor() and pgtable_pte_page_dtor(), which +required is done by pagetable_pte_ctor() and pagetable_pte_dtor(), which must be called on PTE table allocation / freeing. Make sure the architecture doesn't use slab allocator for page table @@ -68,8 +68,8 @@ This field shares storage with page->ptl. PMD split lock only makes sense if you have more than two page table levels. -PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table -allocation and pgtable_pmd_page_dtor() on freeing. +PMD split lock enabling requires pagetable_pmd_ctor() call on PMD table +allocation and pagetable_pmd_dtor() on freeing. Allocation usually happens in pmd_alloc_one(), freeing in pmd_free() and pmd_free_tlb(), but make sure you cover all PMD table allocation / freeing @@ -77,7 +77,7 @@ paths: i.e X86_PAE preallocate few PMDs on pgd_alloc(). With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK. -NOTE: pgtable_pte_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must +NOTE: pagetable_pte_ctor() and pagetable_pmd_ctor() can fail -- it must be handled properly. page->ptl @@ -97,7 +97,7 @@ trick: split lock with enabled DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC, but costs one more cache line for indirect access; -The spinlock_t allocated in pgtable_pte_page_ctor() for PTE table and in -pgtable_pmd_page_ctor() for PMD table. +The spinlock_t allocated in pagetable_pte_ctor() for PTE table and in +pagetable_pmd_ctor() for PMD table. Please, never access page->ptl directly -- use appropriate helper. diff --git a/Documentation/translations/zh_CN/mm/split_page_table_lock.rst b/Documentation/translations/zh_CN/mm/split_page_table_lock.rst index 4fb7aa666037..a2c288670a24 100644 --- a/Documentation/translations/zh_CN/mm/split_page_table_lock.rst +++ b/Documentation/translations/zh_CN/mm/split_page_table_lock.rst @@ -56,16 +56,16 @@ Hugetlb特定的辅助函数: 架构对分页表锁的支持 -没有必要特别启用PTE分页表锁:所有需要的东西都由pgtable_pte_page_ctor() -和pgtable_pte_page_dtor()完成,它们必须在PTE表分配/释放时被调用。 +没有必要特别启用PTE分页表锁:所有需要的东西都由pagetable_pte_ctor() +和pagetable_pte_dtor()完成,它们必须在PTE表分配/释放时被调用。 确保架构不使用slab分配器来分配页表:slab使用page->slab_cache来分配其页 面。这个区域与page->ptl共享存储。 PMD分页锁只有在你有两个以上的页表级别时才有意义。 -启用PMD分页锁需要在PMD表分配时调用pgtable_pmd_page_ctor(),在释放时调 -用pgtable_pmd_page_dtor()。 +启用PMD分页锁需要在PMD表分配时调用pagetable_pmd_ctor(),在释放时调 +用pagetable_pmd_dtor()。 分配通常发生在pmd_alloc_one()中,释放发生在pmd_free()和pmd_free_tlb() 中,但要确保覆盖所有的PMD表分配/释放路径:即X86_PAE在pgd_alloc()中预先 @@ -73,7 +73,7 @@ PMD分页锁只有在你有两个以上的页表级别时才有意义。 一切就绪后,你可以设置CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK。 -注意:pgtable_pte_page_ctor()和pgtable_pmd_page_ctor()可能失败--必 +注意:pagetable_pte_ctor()和pagetable_pmd_ctor()可能失败--必 须正确处理。 page->ptl @@ -90,7 +90,7 @@ page->ptl用于访问分割页表锁,其中'page'是包含该表的页面struc 的指针并动态分配它。这允许在启用DEBUG_SPINLOCK或DEBUG_LOCK_ALLOC的 情况下使用分页锁,但由于间接访问而多花了一个缓存行。 -PTE表的spinlock_t分配在pgtable_pte_page_ctor()中,PMD表的spinlock_t -分配在pgtable_pmd_page_ctor()中。 +PTE表的spinlock_t分配在pagetable_pte_ctor()中,PMD表的spinlock_t +分配在pagetable_pmd_ctor()中。 请不要直接访问page->ptl - -使用适当的辅助函数。 diff --git a/include/linux/mm.h b/include/linux/mm.h index 6310e0c59efe..6a95dfed4957 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2932,11 +2932,6 @@ static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc) return true; } -static inline bool pgtable_pte_page_ctor(struct page *page) -{ - return pagetable_pte_ctor(page_ptdesc(page)); -} - static inline void pagetable_pte_dtor(struct ptdesc *ptdesc) { struct folio *folio = ptdesc_folio(ptdesc); @@ -2946,11 +2941,6 @@ static inline void pagetable_pte_dtor(struct ptdesc *ptdesc) lruvec_stat_sub_folio(folio, NR_PAGETABLE); } -static inline void pgtable_pte_page_dtor(struct page *page) -{ - pagetable_pte_dtor(page_ptdesc(page)); -} - pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp); static inline pte_t *pte_offset_map(pmd_t *pmd, unsigned long addr) { @@ -3057,11 +3047,6 @@ static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc) return true; }
[PATCH mm-unstable v9 30/31] um: Convert {pmd, pte}_free_tlb() to use ptdescs
Part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents. Also cleans up some spacing issues. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- arch/um/include/asm/pgalloc.h | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index 8ec7cd46dd96..de5e31c64793 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -25,19 +25,19 @@ */ extern pgd_t *pgd_alloc(struct mm_struct *); -#define __pte_free_tlb(tlb,pte, address) \ -do { \ - pgtable_pte_page_dtor(pte); \ - tlb_remove_page((tlb),(pte)); \ +#define __pte_free_tlb(tlb, pte, address) \ +do { \ + pagetable_pte_dtor(page_ptdesc(pte)); \ + tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) #ifdef CONFIG_3_LEVEL_PGTABLES -#define __pmd_free_tlb(tlb, pmd, address) \ -do { \ - pgtable_pmd_page_dtor(virt_to_page(pmd)); \ - tlb_remove_page((tlb),virt_to_page(pmd)); \ -} while (0)\ +#define __pmd_free_tlb(tlb, pmd, address) \ +do { \ + pagetable_pmd_dtor(virt_to_ptdesc(pmd));\ + tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ +} while (0) #endif -- 2.40.1
[PATCH mm-unstable v9 29/31] sparc: Convert pgtable_pte_page_{ctor, dtor}() to ptdesc equivalents
Part of the conversions to replace pgtable pte constructor/destructors with ptdesc equivalents. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- arch/sparc/mm/srmmu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index 13f027afc875..8393faa3e596 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -355,7 +355,8 @@ pgtable_t pte_alloc_one(struct mm_struct *mm) return NULL; page = pfn_to_page(__nocache_pa((unsigned long)ptep) >> PAGE_SHIFT); spin_lock(>page_table_lock); - if (page_ref_inc_return(page) == 2 && !pgtable_pte_page_ctor(page)) { + if (page_ref_inc_return(page) == 2 && + !pagetable_pte_ctor(page_ptdesc(page))) { page_ref_dec(page); ptep = NULL; } @@ -371,7 +372,7 @@ void pte_free(struct mm_struct *mm, pgtable_t ptep) page = pfn_to_page(__nocache_pa((unsigned long)ptep) >> PAGE_SHIFT); spin_lock(>page_table_lock); if (page_ref_dec_return(page) == 1) - pgtable_pte_page_dtor(page); + pagetable_pte_dtor(page_ptdesc(page)); spin_unlock(>page_table_lock); srmmu_free_nocache(ptep, SRMMU_PTE_TABLE_SIZE); -- 2.40.1
[PATCH mm-unstable v9 28/31] sparc64: Convert various functions to use ptdescs
As part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents, convert various page table functions to use ptdescs. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- arch/sparc/mm/init_64.c | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 680ef206565c..f83017992eaa 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2907,14 +2907,15 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm) pgtable_t pte_alloc_one(struct mm_struct *mm) { - struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!page) + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL | __GFP_ZERO, 0); + + if (!ptdesc) return NULL; - if (!pgtable_pte_page_ctor(page)) { - __free_page(page); + if (!pagetable_pte_ctor(ptdesc)) { + pagetable_free(ptdesc); return NULL; } - return (pte_t *) page_address(page); + return ptdesc_address(ptdesc); } void pte_free_kernel(struct mm_struct *mm, pte_t *pte) @@ -2924,10 +2925,10 @@ void pte_free_kernel(struct mm_struct *mm, pte_t *pte) static void __pte_free(pgtable_t pte) { - struct page *page = virt_to_page(pte); + struct ptdesc *ptdesc = virt_to_ptdesc(pte); - pgtable_pte_page_dtor(page); - __free_page(page); + pagetable_pte_dtor(ptdesc); + pagetable_free(ptdesc); } void pte_free(struct mm_struct *mm, pgtable_t pte) -- 2.40.1
[PATCH mm-unstable v9 27/31] sh: Convert pte_free_tlb() to use ptdescs
Part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents. Also cleans up some spacing issues. Reviewed-by: Geert Uytterhoeven Acked-by: John Paul Adrian Glaubitz Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- arch/sh/include/asm/pgalloc.h | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index a9e98233c4d4..5d8577ab1591 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -2,6 +2,7 @@ #ifndef __ASM_SH_PGALLOC_H #define __ASM_SH_PGALLOC_H +#include #include #define __HAVE_ARCH_PMD_ALLOC_ONE @@ -31,10 +32,10 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, set_pmd(pmd, __pmd((unsigned long)page_address(pte))); } -#define __pte_free_tlb(tlb,pte,addr) \ -do { \ - pgtable_pte_page_dtor(pte); \ - tlb_remove_page((tlb), (pte)); \ +#define __pte_free_tlb(tlb, pte, addr) \ +do { \ + pagetable_pte_dtor(page_ptdesc(pte)); \ + tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) #endif /* __ASM_SH_PGALLOC_H */ -- 2.40.1
[PATCH mm-unstable v9 26/31] riscv: Convert alloc_{pmd, pte}_late() to use ptdescs
As part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents, convert various page table functions to use ptdescs. Some of the functions use the *get*page*() helper functions. Convert these to use pagetable_alloc() and ptdesc_address() instead to help standardize page tables further. Acked-by: Palmer Dabbelt Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- arch/riscv/include/asm/pgalloc.h | 8 arch/riscv/mm/init.c | 16 ++-- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 59dc12b5b7e8..d169a4f41a2e 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -153,10 +153,10 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) #endif /* __PAGETABLE_PMD_FOLDED */ -#define __pte_free_tlb(tlb, pte, buf) \ -do {\ - pgtable_pte_page_dtor(pte); \ - tlb_remove_page((tlb), pte);\ +#define __pte_free_tlb(tlb, pte, buf) \ +do { \ + pagetable_pte_dtor(page_ptdesc(pte)); \ + tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));\ } while (0) #endif /* CONFIG_MMU */ diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 9ce504737d18..430a3d05a841 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -353,12 +353,10 @@ static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va) static phys_addr_t __init alloc_pte_late(uintptr_t va) { - unsigned long vaddr; - - vaddr = __get_free_page(GFP_KERNEL); - BUG_ON(!vaddr || !pgtable_pte_page_ctor(virt_to_page((void *)vaddr))); + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); - return __pa(vaddr); + BUG_ON(!ptdesc || !pagetable_pte_ctor(ptdesc)); + return __pa((pte_t *)ptdesc_address(ptdesc)); } static void __init create_pte_mapping(pte_t *ptep, @@ -436,12 +434,10 @@ static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) static phys_addr_t __init alloc_pmd_late(uintptr_t va) { - unsigned long vaddr; - - vaddr = __get_free_page(GFP_KERNEL); - BUG_ON(!vaddr || !pgtable_pmd_page_ctor(virt_to_page((void *)vaddr))); + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); - return __pa(vaddr); + BUG_ON(!ptdesc || !pagetable_pmd_ctor(ptdesc)); + return __pa((pmd_t *)ptdesc_address(ptdesc)); } static void __init create_pmd_mapping(pmd_t *pmdp, -- 2.40.1
[PATCH mm-unstable v9 25/31] openrisc: Convert __pte_free_tlb() to use ptdescs
Part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- arch/openrisc/include/asm/pgalloc.h | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index b7b2b8d16fad..c6a73772a546 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -66,10 +66,10 @@ extern inline pgd_t *pgd_alloc(struct mm_struct *mm) extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); -#define __pte_free_tlb(tlb, pte, addr) \ -do { \ - pgtable_pte_page_dtor(pte); \ - tlb_remove_page((tlb), (pte)); \ +#define __pte_free_tlb(tlb, pte, addr) \ +do { \ + pagetable_pte_dtor(page_ptdesc(pte)); \ + tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) #endif -- 2.40.1
[PATCH mm-unstable v9 24/31] nios2: Convert __pte_free_tlb() to use ptdescs
Part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents. Acked-by: Mike Rapoport (IBM) Acked-by: Dinh Nguyen Signed-off-by: Vishal Moola (Oracle) --- arch/nios2/include/asm/pgalloc.h | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index ecd1657bb2ce..ce6bb8e74271 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -28,10 +28,10 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, extern pgd_t *pgd_alloc(struct mm_struct *mm); -#define __pte_free_tlb(tlb, pte, addr) \ - do {\ - pgtable_pte_page_dtor(pte); \ - tlb_remove_page((tlb), (pte)); \ +#define __pte_free_tlb(tlb, pte, addr) \ + do {\ + pagetable_pte_dtor(page_ptdesc(pte)); \ + tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) #endif /* _ASM_NIOS2_PGALLOC_H */ -- 2.40.1
[PATCH mm-unstable v9 23/31] mips: Convert various functions to use ptdescs
As part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents, convert various page table functions to use ptdescs. Some of the functions use the *get*page*() helper functions. Convert these to use pagetable_alloc() and ptdesc_address() instead to help standardize page tables further. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- arch/mips/include/asm/pgalloc.h | 32 ++-- arch/mips/mm/pgtable.c | 8 +--- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index f72e737dda21..40e40a7eb94a 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -51,13 +51,13 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm); static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - free_pages((unsigned long)pgd, PGD_TABLE_ORDER); + pagetable_free(virt_to_ptdesc(pgd)); } -#define __pte_free_tlb(tlb,pte,address)\ -do { \ - pgtable_pte_page_dtor(pte); \ - tlb_remove_page((tlb), pte);\ +#define __pte_free_tlb(tlb, pte, address) \ +do { \ + pagetable_pte_dtor(page_ptdesc(pte)); \ + tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));\ } while (0) #ifndef __PAGETABLE_PMD_FOLDED @@ -65,18 +65,18 @@ do { \ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { pmd_t *pmd; - struct page *pg; + struct ptdesc *ptdesc; - pg = alloc_pages(GFP_KERNEL_ACCOUNT, PMD_TABLE_ORDER); - if (!pg) + ptdesc = pagetable_alloc(GFP_KERNEL_ACCOUNT, PMD_TABLE_ORDER); + if (!ptdesc) return NULL; - if (!pgtable_pmd_page_ctor(pg)) { - __free_pages(pg, PMD_TABLE_ORDER); + if (!pagetable_pmd_ctor(ptdesc)) { + pagetable_free(ptdesc); return NULL; } - pmd = (pmd_t *)page_address(pg); + pmd = ptdesc_address(ptdesc); pmd_init(pmd); return pmd; } @@ -90,10 +90,14 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { pud_t *pud; + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, + PUD_TABLE_ORDER); - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_TABLE_ORDER); - if (pud) - pud_init(pud); + if (!ptdesc) + return NULL; + pud = ptdesc_address(ptdesc); + + pud_init(pud); return pud; } diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c index b13314be5d0e..1506e458040d 100644 --- a/arch/mips/mm/pgtable.c +++ b/arch/mips/mm/pgtable.c @@ -10,10 +10,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) { - pgd_t *ret, *init; + pgd_t *init, *ret = NULL; + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, + PGD_TABLE_ORDER); - ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER); - if (ret) { + if (ptdesc) { + ret = ptdesc_address(ptdesc); init = pgd_offset(_mm, 0UL); pgd_init(ret); memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, -- 2.40.1
[PATCH mm-unstable v9 22/31] m68k: Convert various functions to use ptdescs
As part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents, convert various page table functions to use ptdescs. Some of the functions use the *get*page*() helper functions. Convert these to use pagetable_alloc() and ptdesc_address() instead to help standardize page tables further. Acked-by: Mike Rapoport (IBM) Acked-by: Geert Uytterhoeven Signed-off-by: Vishal Moola (Oracle) --- arch/m68k/include/asm/mcf_pgalloc.h | 47 ++-- arch/m68k/include/asm/sun3_pgalloc.h | 8 ++--- arch/m68k/mm/motorola.c | 4 +-- 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h index 5c2c0a864524..302c5bf67179 100644 --- a/arch/m68k/include/asm/mcf_pgalloc.h +++ b/arch/m68k/include/asm/mcf_pgalloc.h @@ -5,22 +5,22 @@ #include #include -extern inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - free_page((unsigned long) pte); + pagetable_free(virt_to_ptdesc(pte)); } extern const char bad_pmd_string[]; -extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) { - unsigned long page = __get_free_page(GFP_DMA); + struct ptdesc *ptdesc = pagetable_alloc((GFP_DMA | __GFP_ZERO) & + ~__GFP_HIGHMEM, 0); - if (!page) + if (!ptdesc) return NULL; - memset((void *)page, 0, PAGE_SIZE); - return (pte_t *) (page); + return ptdesc_address(ptdesc); } extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address) @@ -35,36 +35,34 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pgtable, unsigned long address) { - struct page *page = virt_to_page(pgtable); + struct ptdesc *ptdesc = virt_to_ptdesc(pgtable); - pgtable_pte_page_dtor(page); - __free_page(page); + pagetable_pte_dtor(ptdesc); + pagetable_free(ptdesc); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { - struct page *page = alloc_pages(GFP_DMA, 0); + struct ptdesc *ptdesc = pagetable_alloc(GFP_DMA | __GFP_ZERO, 0); pte_t *pte; - if (!page) + if (!ptdesc) return NULL; - if (!pgtable_pte_page_ctor(page)) { - __free_page(page); + if (!pagetable_pte_ctor(ptdesc)) { + pagetable_free(ptdesc); return NULL; } - pte = page_address(page); - clear_page(pte); - + pte = ptdesc_address(ptdesc); return pte; } static inline void pte_free(struct mm_struct *mm, pgtable_t pgtable) { - struct page *page = virt_to_page(pgtable); + struct ptdesc *ptdesc = virt_to_ptdesc(pgtable); - pgtable_pte_page_dtor(page); - __free_page(page); + pagetable_pte_dtor(ptdesc); + pagetable_free(ptdesc); } /* @@ -75,16 +73,19 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pgtable) static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - free_page((unsigned long) pgd); + pagetable_free(virt_to_ptdesc(pgd)); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *new_pgd; + struct ptdesc *ptdesc = pagetable_alloc((GFP_DMA | __GFP_NOWARN) & + ~__GFP_HIGHMEM, 0); - new_pgd = (pgd_t *)__get_free_page(GFP_DMA | __GFP_NOWARN); - if (!new_pgd) + if (!ptdesc) return NULL; + new_pgd = ptdesc_address(ptdesc); + memcpy(new_pgd, swapper_pg_dir, PTRS_PER_PGD * sizeof(pgd_t)); memset(new_pgd, 0, PAGE_OFFSET >> PGDIR_SHIFT); return new_pgd; diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 198036aff519..ff48573db2c0 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -17,10 +17,10 @@ extern const char bad_pmd_string[]; -#define __pte_free_tlb(tlb,pte,addr) \ -do { \ - pgtable_pte_page_dtor(pte); \ - tlb_remove_page((tlb), pte);\ +#define __pte_free_tlb(tlb, pte, addr) \ +do { \ + pagetable_pte_dtor(page_ptdesc(pte)); \ + tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));\ } while (0) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 8bca46e51e94..c1761d309fc6 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -161,7 +161,7 @@
[PATCH mm-unstable v9 21/31] loongarch: Convert various functions to use ptdescs
As part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents, convert various page table functions to use ptdescs. Some of the functions use the *get*page*() helper functions. Convert these to use pagetable_alloc() and ptdesc_address() instead to help standardize page tables further. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- arch/loongarch/include/asm/pgalloc.h | 27 +++ arch/loongarch/mm/pgtable.c | 7 --- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index af1d1e4a6965..23f5b1107246 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -45,9 +45,9 @@ extern void pagetable_init(void); extern pgd_t *pgd_alloc(struct mm_struct *mm); #define __pte_free_tlb(tlb, pte, address) \ -do { \ - pgtable_pte_page_dtor(pte); \ - tlb_remove_page((tlb), pte);\ +do { \ + pagetable_pte_dtor(page_ptdesc(pte)); \ + tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));\ } while (0) #ifndef __PAGETABLE_PMD_FOLDED @@ -55,18 +55,18 @@ do { \ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { pmd_t *pmd; - struct page *pg; + struct ptdesc *ptdesc; - pg = alloc_page(GFP_KERNEL_ACCOUNT); - if (!pg) + ptdesc = pagetable_alloc(GFP_KERNEL_ACCOUNT, 0); + if (!ptdesc) return NULL; - if (!pgtable_pmd_page_ctor(pg)) { - __free_page(pg); + if (!pagetable_pmd_ctor(ptdesc)) { + pagetable_free(ptdesc); return NULL; } - pmd = (pmd_t *)page_address(pg); + pmd = ptdesc_address(ptdesc); pmd_init(pmd); return pmd; } @@ -80,10 +80,13 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { pud_t *pud; + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); - pud = (pud_t *) __get_free_page(GFP_KERNEL); - if (pud) - pud_init(pud); + if (!ptdesc) + return NULL; + pud = ptdesc_address(ptdesc); + + pud_init(pud); return pud; } diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index 1260cf30e3ee..b14343e211b6 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -11,10 +11,11 @@ pgd_t *pgd_alloc(struct mm_struct *mm) { - pgd_t *ret, *init; + pgd_t *init, *ret = NULL; + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); - ret = (pgd_t *) __get_free_page(GFP_KERNEL); - if (ret) { + if (ptdesc) { + ret = (pgd_t *)ptdesc_address(ptdesc); init = pgd_offset(_mm, 0UL); pgd_init(ret); memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, -- 2.40.1
[PATCH mm-unstable v9 20/31] hexagon: Convert __pte_free_tlb() to use ptdescs
Part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- arch/hexagon/include/asm/pgalloc.h | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h index f0c47e6a7427..55988625e6fb 100644 --- a/arch/hexagon/include/asm/pgalloc.h +++ b/arch/hexagon/include/asm/pgalloc.h @@ -87,10 +87,10 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, max_kernel_seg = pmdindex; } -#define __pte_free_tlb(tlb, pte, addr) \ -do { \ - pgtable_pte_page_dtor((pte)); \ - tlb_remove_page((tlb), (pte)); \ +#define __pte_free_tlb(tlb, pte, addr) \ +do { \ + pagetable_pte_dtor((page_ptdesc(pte))); \ + tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) #endif -- 2.40.1
[PATCH mm-unstable v9 19/31] csky: Convert __pte_free_tlb() to use ptdescs
Part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents. Acked-by: Guo Ren Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- arch/csky/include/asm/pgalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h index 7d57e5da0914..9c84c9012e53 100644 --- a/arch/csky/include/asm/pgalloc.h +++ b/arch/csky/include/asm/pgalloc.h @@ -63,8 +63,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) #define __pte_free_tlb(tlb, pte, address) \ do { \ - pgtable_pte_page_dtor(pte); \ - tlb_remove_page(tlb, pte); \ + pagetable_pte_dtor(page_ptdesc(pte)); \ + tlb_remove_page_ptdesc(tlb, page_ptdesc(pte)); \ } while (0) extern void pagetable_init(void); -- 2.40.1
[PATCH mm-unstable v9 18/31] arm64: Convert various functions to use ptdescs
As part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents, convert various page table functions to use ptdescs. Acked-by: Mike Rapoport (IBM) Acked-by: Catalin Marinas Signed-off-by: Vishal Moola (Oracle) --- arch/arm64/include/asm/tlb.h | 14 -- arch/arm64/mm/mmu.c | 7 --- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index c995d1f4594f..2c29239d05c3 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -75,18 +75,20 @@ static inline void tlb_flush(struct mmu_gather *tlb) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - pgtable_pte_page_dtor(pte); - tlb_remove_table(tlb, pte); + struct ptdesc *ptdesc = page_ptdesc(pte); + + pagetable_pte_dtor(ptdesc); + tlb_remove_ptdesc(tlb, ptdesc); } #if CONFIG_PGTABLE_LEVELS > 2 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { - struct page *page = virt_to_page(pmdp); + struct ptdesc *ptdesc = virt_to_ptdesc(pmdp); - pgtable_pmd_page_dtor(page); - tlb_remove_table(tlb, page); + pagetable_pmd_dtor(ptdesc); + tlb_remove_ptdesc(tlb, ptdesc); } #endif @@ -94,7 +96,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { - tlb_remove_table(tlb, virt_to_page(pudp)); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pudp)); } #endif diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 95d360805f8a..47781bec6171 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -426,6 +426,7 @@ static phys_addr_t __pgd_pgtable_alloc(int shift) static phys_addr_t pgd_pgtable_alloc(int shift) { phys_addr_t pa = __pgd_pgtable_alloc(shift); + struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa)); /* * Call proper page table ctor in case later we need to @@ -433,12 +434,12 @@ static phys_addr_t pgd_pgtable_alloc(int shift) * this pre-allocated page table. * * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is -* folded, and if so pgtable_pmd_page_ctor() becomes nop. +* folded, and if so pagetable_pte_ctor() becomes nop. */ if (shift == PAGE_SHIFT) - BUG_ON(!pgtable_pte_page_ctor(phys_to_page(pa))); + BUG_ON(!pagetable_pte_ctor(ptdesc)); else if (shift == PMD_SHIFT) - BUG_ON(!pgtable_pmd_page_ctor(phys_to_page(pa))); + BUG_ON(!pagetable_pmd_ctor(ptdesc)); return pa; } -- 2.40.1
[PATCH mm-unstable v9 17/31] arm: Convert various functions to use ptdescs
As part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents, convert various page table functions to use ptdescs. late_alloc() also uses the __get_free_pages() helper function. Convert this to use pagetable_alloc() and ptdesc_address() instead to help standardize page tables further. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- arch/arm/include/asm/tlb.h | 12 +++- arch/arm/mm/mmu.c | 7 --- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index b8cbe03ad260..f40d06ad5d2a 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -39,7 +39,9 @@ static inline void __tlb_remove_table(void *_table) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - pgtable_pte_page_dtor(pte); + struct ptdesc *ptdesc = page_ptdesc(pte); + + pagetable_pte_dtor(ptdesc); #ifndef CONFIG_ARM_LPAE /* @@ -50,17 +52,17 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) __tlb_adjust_range(tlb, addr - PAGE_SIZE, 2 * PAGE_SIZE); #endif - tlb_remove_table(tlb, pte); + tlb_remove_ptdesc(tlb, ptdesc); } static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { #ifdef CONFIG_ARM_LPAE - struct page *page = virt_to_page(pmdp); + struct ptdesc *ptdesc = virt_to_ptdesc(pmdp); - pgtable_pmd_page_dtor(page); - tlb_remove_table(tlb, page); + pagetable_pmd_dtor(ptdesc); + tlb_remove_ptdesc(tlb, ptdesc); #endif } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index c9981c23e8e9..674ed71573a8 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -737,11 +737,12 @@ static void __init *early_alloc(unsigned long sz) static void *__init late_alloc(unsigned long sz) { - void *ptr = (void *)__get_free_pages(GFP_PGTABLE_KERNEL, get_order(sz)); + void *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_HIGHMEM, + get_order(sz)); - if (!ptr || !pgtable_pte_page_ctor(virt_to_page(ptr))) + if (!ptdesc || !pagetable_pte_ctor(ptdesc)) BUG(); - return ptr; + return ptdesc_to_virt(ptdesc); } static pte_t * __init arm_pte_alloc(pmd_t *pmd, unsigned long addr, -- 2.40.1
[PATCH mm-unstable v9 16/31] pgalloc: Convert various functions to use ptdescs
As part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents, convert various page table functions to use ptdescs. Some of the functions use the *get*page*() helper functions. Convert these to use pagetable_alloc() and ptdesc_address() instead to help standardize page tables further. Signed-off-by: Vishal Moola (Oracle) --- include/asm-generic/pgalloc.h | 88 +-- 1 file changed, 52 insertions(+), 36 deletions(-) diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index a7cf825befae..c75d4a753849 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -8,7 +8,7 @@ #define GFP_PGTABLE_USER (GFP_PGTABLE_KERNEL | __GFP_ACCOUNT) /** - * __pte_alloc_one_kernel - allocate a page for PTE-level kernel page table + * __pte_alloc_one_kernel - allocate memory for a PTE-level kernel page table * @mm: the mm_struct of the current context * * This function is intended for architectures that need @@ -18,12 +18,17 @@ */ static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm) { - return (pte_t *)__get_free_page(GFP_PGTABLE_KERNEL); + struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & + ~__GFP_HIGHMEM, 0); + + if (!ptdesc) + return NULL; + return ptdesc_address(ptdesc); } #ifndef __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL /** - * pte_alloc_one_kernel - allocate a page for PTE-level kernel page table + * pte_alloc_one_kernel - allocate memory for a PTE-level kernel page table * @mm: the mm_struct of the current context * * Return: pointer to the allocated memory or %NULL on error @@ -35,40 +40,40 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) #endif /** - * pte_free_kernel - free PTE-level kernel page table page + * pte_free_kernel - free PTE-level kernel page table memory * @mm: the mm_struct of the current context * @pte: pointer to the memory containing the page table */ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - free_page((unsigned long)pte); + pagetable_free(virt_to_ptdesc(pte)); } /** - * __pte_alloc_one - allocate a page for PTE-level user page table + * __pte_alloc_one - allocate memory for a PTE-level user page table * @mm: the mm_struct of the current context * @gfp: GFP flags to use for the allocation * - * Allocates a page and runs the pgtable_pte_page_ctor(). + * Allocate memory for a page table and ptdesc and runs pagetable_pte_ctor(). * * This function is intended for architectures that need * anything beyond simple page allocation or must have custom GFP flags. * - * Return: `struct page` initialized as page table or %NULL on error + * Return: `struct page` referencing the ptdesc or %NULL on error */ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp) { - struct page *pte; + struct ptdesc *ptdesc; - pte = alloc_page(gfp); - if (!pte) + ptdesc = pagetable_alloc(gfp, 0); + if (!ptdesc) return NULL; - if (!pgtable_pte_page_ctor(pte)) { - __free_page(pte); + if (!pagetable_pte_ctor(ptdesc)) { + pagetable_free(ptdesc); return NULL; } - return pte; + return ptdesc_page(ptdesc); } #ifndef __HAVE_ARCH_PTE_ALLOC_ONE @@ -76,9 +81,9 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp) * pte_alloc_one - allocate a page for PTE-level user page table * @mm: the mm_struct of the current context * - * Allocates a page and runs the pgtable_pte_page_ctor(). + * Allocate memory for a page table and ptdesc and runs pagetable_pte_ctor(). * - * Return: `struct page` initialized as page table or %NULL on error + * Return: `struct page` referencing the ptdesc or %NULL on error */ static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { @@ -92,14 +97,16 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm) */ /** - * pte_free - free PTE-level user page table page + * pte_free - free PTE-level user page table memory * @mm: the mm_struct of the current context - * @pte_page: the `struct page` representing the page table + * @pte_page: the `struct page` referencing the ptdesc */ static inline void pte_free(struct mm_struct *mm, struct page *pte_page) { - pgtable_pte_page_dtor(pte_page); - __free_page(pte_page); + struct ptdesc *ptdesc = page_ptdesc(pte_page); + + pagetable_pte_dtor(ptdesc); + pagetable_free(ptdesc); } @@ -107,10 +114,11 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte_page) #ifndef __HAVE_ARCH_PMD_ALLOC_ONE /** - * pmd_alloc_one - allocate a page for PMD-level page table + * pmd_alloc_one - allocate memory for a PMD-level page table * @mm: the mm_struct of the current context * - * Allocates a page and runs the
[PATCH mm-unstable v9 15/31] mm: remove page table members from struct page
The page table members are now split out into their own ptdesc struct. Remove them from struct page. Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) --- include/linux/mm_types.h | 21 - 1 file changed, 21 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ea34b22b4cbf..f5ba5b0bc836 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -141,24 +141,6 @@ struct page { struct {/* Tail pages of compound page */ unsigned long compound_head;/* Bit zero is set */ }; - struct {/* Page table pages */ - unsigned long _pt_pad_1;/* compound_head */ - pgtable_t pmd_huge_pte; /* protected by page->ptl */ - /* -* A PTE page table page might be freed by use of -* rcu_head: which overlays those two fields above. -*/ - unsigned long _pt_pad_2;/* mapping */ - union { - struct mm_struct *pt_mm; /* x86 pgds only */ - atomic_t pt_frag_refcount; /* powerpc */ - }; -#if ALLOC_SPLIT_PTLOCKS - spinlock_t *ptl; -#else - spinlock_t ptl; -#endif - }; struct {/* ZONE_DEVICE pages */ /** @pgmap: Points to the hosting device page map. */ struct dev_pagemap *pgmap; @@ -454,10 +436,7 @@ struct ptdesc { TABLE_MATCH(flags, __page_flags); TABLE_MATCH(compound_head, pt_list); TABLE_MATCH(compound_head, _pt_pad_1); -TABLE_MATCH(pmd_huge_pte, pmd_huge_pte); TABLE_MATCH(mapping, __page_mapping); -TABLE_MATCH(pt_mm, pt_mm); -TABLE_MATCH(ptl, ptl); TABLE_MATCH(rcu_head, pt_rcu_head); TABLE_MATCH(page_type, __page_type); TABLE_MATCH(_refcount, _refcount); -- 2.40.1
[PATCH mm-unstable v9 14/31] s390: Convert various pgalloc functions to use ptdescs
As part of the conversions to replace pgtable constructor/destructors with ptdesc equivalents, convert various page table functions to use ptdescs. Some of the functions use the *get*page*() helper functions. Convert these to use pagetable_alloc() and ptdesc_address() instead to help standardize page tables further. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- arch/s390/include/asm/pgalloc.h | 4 +- arch/s390/include/asm/tlb.h | 4 +- arch/s390/mm/pgalloc.c | 128 3 files changed, 69 insertions(+), 67 deletions(-) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 89a9d5ef94f8..376b4b23bdaa 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -86,7 +86,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) if (!table) return NULL; crst_table_init(table, _SEGMENT_ENTRY_EMPTY); - if (!pgtable_pmd_page_ctor(virt_to_page(table))) { + if (!pagetable_pmd_ctor(virt_to_ptdesc(table))) { crst_table_free(mm, table); return NULL; } @@ -97,7 +97,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { if (mm_pmd_folded(mm)) return; - pgtable_pmd_page_dtor(virt_to_page(pmd)); + pagetable_pmd_dtor(virt_to_ptdesc(pmd)); crst_table_free(mm, (unsigned long *) pmd); } diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index b91f4a9b044c..383b1f91442c 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -89,12 +89,12 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, { if (mm_pmd_folded(tlb->mm)) return; - pgtable_pmd_page_dtor(virt_to_page(pmd)); + pagetable_pmd_dtor(virt_to_ptdesc(pmd)); __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_puds = 1; - tlb_remove_table(tlb, pmd); + tlb_remove_ptdesc(tlb, pmd); } /* diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index d7374add7820..07fc660a24aa 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -43,17 +43,17 @@ __initcall(page_table_register_sysctl); unsigned long *crst_table_alloc(struct mm_struct *mm) { - struct page *page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER); - if (!page) + if (!ptdesc) return NULL; - arch_set_page_dat(page, CRST_ALLOC_ORDER); - return (unsigned long *) page_to_virt(page); + arch_set_page_dat(ptdesc_page(ptdesc), CRST_ALLOC_ORDER); + return (unsigned long *) ptdesc_to_virt(ptdesc); } void crst_table_free(struct mm_struct *mm, unsigned long *table) { - free_pages((unsigned long)table, CRST_ALLOC_ORDER); + pagetable_free(virt_to_ptdesc(table)); } static void __crst_table_upgrade(void *arg) @@ -140,21 +140,21 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) struct page *page_table_alloc_pgste(struct mm_struct *mm) { - struct page *page; + struct ptdesc *ptdesc; u64 *table; - page = alloc_page(GFP_KERNEL); - if (page) { - table = (u64 *)page_to_virt(page); + ptdesc = pagetable_alloc(GFP_KERNEL, 0); + if (ptdesc) { + table = (u64 *)ptdesc_to_virt(ptdesc); memset64(table, _PAGE_INVALID, PTRS_PER_PTE); memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE); } - return page; + return ptdesc_page(ptdesc); } void page_table_free_pgste(struct page *page) { - __free_page(page); + pagetable_free(page_ptdesc(page)); } #endif /* CONFIG_PGSTE */ @@ -242,7 +242,7 @@ void page_table_free_pgste(struct page *page) unsigned long *page_table_alloc(struct mm_struct *mm) { unsigned long *table; - struct page *page; + struct ptdesc *ptdesc; unsigned int mask, bit; /* Try to get a fragment of a 4K page as a 2K page table */ @@ -250,9 +250,9 @@ unsigned long *page_table_alloc(struct mm_struct *mm) table = NULL; spin_lock_bh(>context.lock); if (!list_empty(>context.pgtable_list)) { - page = list_first_entry(>context.pgtable_list, - struct page, lru); - mask = atomic_read(>_refcount) >> 24; + ptdesc = list_first_entry(>context.pgtable_list, + struct ptdesc, pt_list); + mask = atomic_read(>_refcount) >> 24; /* * The pending removal bits must also be checked.
[PATCH mm-unstable v9 13/31] x86: Convert various functions to use ptdescs
In order to split struct ptdesc from struct page, convert various functions to use ptdescs. Some of the functions use the *get*page*() helper functions. Convert these to use pagetable_alloc() and ptdesc_address() instead to help standardize page tables further. Signed-off-by: Vishal Moola (Oracle) --- arch/x86/mm/pgtable.c | 47 ++- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 15a8009a4480..d3a93e8766ee 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -52,7 +52,7 @@ early_param("userpte", setup_userpte); void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) { - pgtable_pte_page_dtor(pte); + pagetable_pte_dtor(page_ptdesc(pte)); paravirt_release_pte(page_to_pfn(pte)); paravirt_tlb_remove_table(tlb, pte); } @@ -60,7 +60,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) #if CONFIG_PGTABLE_LEVELS > 2 void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { - struct page *page = virt_to_page(pmd); + struct ptdesc *ptdesc = virt_to_ptdesc(pmd); paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); /* * NOTE! For PAE, any changes to the top page-directory-pointer-table @@ -69,8 +69,8 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) #ifdef CONFIG_X86_PAE tlb->need_flush_all = 1; #endif - pgtable_pmd_page_dtor(page); - paravirt_tlb_remove_table(tlb, page); + pagetable_pmd_dtor(ptdesc); + paravirt_tlb_remove_table(tlb, ptdesc_page(ptdesc)); } #if CONFIG_PGTABLE_LEVELS > 3 @@ -92,16 +92,16 @@ void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d) static inline void pgd_list_add(pgd_t *pgd) { - struct page *page = virt_to_page(pgd); + struct ptdesc *ptdesc = virt_to_ptdesc(pgd); - list_add(>lru, _list); + list_add(>pt_list, _list); } static inline void pgd_list_del(pgd_t *pgd) { - struct page *page = virt_to_page(pgd); + struct ptdesc *ptdesc = virt_to_ptdesc(pgd); - list_del(>lru); + list_del(>pt_list); } #define UNSHARED_PTRS_PER_PGD \ @@ -112,12 +112,12 @@ static inline void pgd_list_del(pgd_t *pgd) static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) { - virt_to_page(pgd)->pt_mm = mm; + virt_to_ptdesc(pgd)->pt_mm = mm; } struct mm_struct *pgd_page_get_mm(struct page *page) { - return page->pt_mm; + return page_ptdesc(page)->pt_mm; } static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) @@ -213,11 +213,14 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) { int i; + struct ptdesc *ptdesc; for (i = 0; i < count; i++) if (pmds[i]) { - pgtable_pmd_page_dtor(virt_to_page(pmds[i])); - free_page((unsigned long)pmds[i]); + ptdesc = virt_to_ptdesc(pmds[i]); + + pagetable_pmd_dtor(ptdesc); + pagetable_free(ptdesc); mm_dec_nr_pmds(mm); } } @@ -230,18 +233,24 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) if (mm == _mm) gfp &= ~__GFP_ACCOUNT; + gfp &= ~__GFP_HIGHMEM; for (i = 0; i < count; i++) { - pmd_t *pmd = (pmd_t *)__get_free_page(gfp); - if (!pmd) + pmd_t *pmd = NULL; + struct ptdesc *ptdesc = pagetable_alloc(gfp, 0); + + if (!ptdesc) failed = true; - if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) { - free_page((unsigned long)pmd); - pmd = NULL; + if (ptdesc && !pagetable_pmd_ctor(ptdesc)) { + pagetable_free(ptdesc); + ptdesc = NULL; failed = true; } - if (pmd) + if (ptdesc) { mm_inc_nr_pmds(mm); + pmd = ptdesc_address(ptdesc); + } + pmds[i] = pmd; } @@ -830,7 +839,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) free_page((unsigned long)pmd_sv); - pgtable_pmd_page_dtor(virt_to_page(pmd)); + pagetable_pmd_dtor(virt_to_ptdesc(pmd)); free_page((unsigned long)pmd); return 1; -- 2.40.1
[PATCH mm-unstable v9 12/31] powerpc: Convert various functions to use ptdescs
In order to split struct ptdesc from struct page, convert various functions to use ptdescs. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- arch/powerpc/mm/book3s64/mmu_context.c | 10 ++--- arch/powerpc/mm/book3s64/pgtable.c | 32 +++--- arch/powerpc/mm/pgtable-frag.c | 58 +- 3 files changed, 50 insertions(+), 50 deletions(-) diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index c766e4c26e42..1715b07c630c 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -246,15 +246,15 @@ static void destroy_contexts(mm_context_t *ctx) static void pmd_frag_destroy(void *pmd_frag) { int count; - struct page *page; + struct ptdesc *ptdesc; - page = virt_to_page(pmd_frag); + ptdesc = virt_to_ptdesc(pmd_frag); /* drop all the pending references */ count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT; /* We allow PTE_FRAG_NR fragments from a PTE page */ - if (atomic_sub_and_test(PMD_FRAG_NR - count, >pt_frag_refcount)) { - pgtable_pmd_page_dtor(page); - __free_page(page); + if (atomic_sub_and_test(PMD_FRAG_NR - count, >pt_frag_refcount)) { + pagetable_pmd_dtor(ptdesc); + pagetable_free(ptdesc); } } diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 75b938268b04..1498ccd08367 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -384,22 +384,22 @@ static pmd_t *get_pmd_from_cache(struct mm_struct *mm) static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm) { void *ret = NULL; - struct page *page; + struct ptdesc *ptdesc; gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO; if (mm == _mm) gfp &= ~__GFP_ACCOUNT; - page = alloc_page(gfp); - if (!page) + ptdesc = pagetable_alloc(gfp, 0); + if (!ptdesc) return NULL; - if (!pgtable_pmd_page_ctor(page)) { - __free_pages(page, 0); + if (!pagetable_pmd_ctor(ptdesc)) { + pagetable_free(ptdesc); return NULL; } - atomic_set(>pt_frag_refcount, 1); + atomic_set(>pt_frag_refcount, 1); - ret = page_address(page); + ret = ptdesc_address(ptdesc); /* * if we support only one fragment just return the * allocated page. @@ -409,12 +409,12 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm) spin_lock(>page_table_lock); /* -* If we find pgtable_page set, we return +* If we find ptdesc_page set, we return * the allocated page with single fragment * count. */ if (likely(!mm->context.pmd_frag)) { - atomic_set(>pt_frag_refcount, PMD_FRAG_NR); + atomic_set(>pt_frag_refcount, PMD_FRAG_NR); mm->context.pmd_frag = ret + PMD_FRAG_SIZE; } spin_unlock(>page_table_lock); @@ -435,15 +435,15 @@ pmd_t *pmd_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr) void pmd_fragment_free(unsigned long *pmd) { - struct page *page = virt_to_page(pmd); + struct ptdesc *ptdesc = virt_to_ptdesc(pmd); - if (PageReserved(page)) - return free_reserved_page(page); + if (pagetable_is_reserved(ptdesc)) + return free_reserved_ptdesc(ptdesc); - BUG_ON(atomic_read(>pt_frag_refcount) <= 0); - if (atomic_dec_and_test(>pt_frag_refcount)) { - pgtable_pmd_page_dtor(page); - __free_page(page); + BUG_ON(atomic_read(>pt_frag_refcount) <= 0); + if (atomic_dec_and_test(>pt_frag_refcount)) { + pagetable_pmd_dtor(ptdesc); + pagetable_free(ptdesc); } } diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c index 0c6b68130025..8c31802f97e8 100644 --- a/arch/powerpc/mm/pgtable-frag.c +++ b/arch/powerpc/mm/pgtable-frag.c @@ -18,15 +18,15 @@ void pte_frag_destroy(void *pte_frag) { int count; - struct page *page; + struct ptdesc *ptdesc; - page = virt_to_page(pte_frag); + ptdesc = virt_to_ptdesc(pte_frag); /* drop all the pending references */ count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT; /* We allow PTE_FRAG_NR fragments from a PTE page */ - if (atomic_sub_and_test(PTE_FRAG_NR - count, >pt_frag_refcount)) { - pgtable_pte_page_dtor(page); - __free_page(page); + if (atomic_sub_and_test(PTE_FRAG_NR - count, >pt_frag_refcount)) { + pagetable_pte_dtor(ptdesc); + pagetable_free(ptdesc); } } @@ -55,25 +55,25 @@ static pte_t *get_pte_from_cache(struct mm_struct *mm) static pte_t
[PATCH mm-unstable v9 11/31] mm: Create ptdesc equivalents for pgtable_{pte,pmd}_page_{ctor,dtor}
Create pagetable_pte_ctor(), pagetable_pmd_ctor(), pagetable_pte_dtor(), and pagetable_pmd_dtor() and make the original pgtable constructor/destructors wrappers. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- include/linux/mm.h | 56 ++ 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 94984d49ab01..6310e0c59efe 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2921,20 +2921,34 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} #endif /* USE_SPLIT_PTE_PTLOCKS */ -static inline bool pgtable_pte_page_ctor(struct page *page) +static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc) { - if (!ptlock_init(page_ptdesc(page))) + struct folio *folio = ptdesc_folio(ptdesc); + + if (!ptlock_init(ptdesc)) return false; - __SetPageTable(page); - inc_lruvec_page_state(page, NR_PAGETABLE); + __folio_set_pgtable(folio); + lruvec_stat_add_folio(folio, NR_PAGETABLE); return true; } +static inline bool pgtable_pte_page_ctor(struct page *page) +{ + return pagetable_pte_ctor(page_ptdesc(page)); +} + +static inline void pagetable_pte_dtor(struct ptdesc *ptdesc) +{ + struct folio *folio = ptdesc_folio(ptdesc); + + ptlock_free(ptdesc); + __folio_clear_pgtable(folio); + lruvec_stat_sub_folio(folio, NR_PAGETABLE); +} + static inline void pgtable_pte_page_dtor(struct page *page) { - ptlock_free(page_ptdesc(page)); - __ClearPageTable(page); - dec_lruvec_page_state(page, NR_PAGETABLE); + pagetable_pte_dtor(page_ptdesc(page)); } pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp); @@ -3032,20 +3046,34 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) return ptl; } -static inline bool pgtable_pmd_page_ctor(struct page *page) +static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc) { - if (!pmd_ptlock_init(page_ptdesc(page))) + struct folio *folio = ptdesc_folio(ptdesc); + + if (!pmd_ptlock_init(ptdesc)) return false; - __SetPageTable(page); - inc_lruvec_page_state(page, NR_PAGETABLE); + __folio_set_pgtable(folio); + lruvec_stat_add_folio(folio, NR_PAGETABLE); return true; } +static inline bool pgtable_pmd_page_ctor(struct page *page) +{ + return pagetable_pmd_ctor(page_ptdesc(page)); +} + +static inline void pagetable_pmd_dtor(struct ptdesc *ptdesc) +{ + struct folio *folio = ptdesc_folio(ptdesc); + + pmd_ptlock_free(ptdesc); + __folio_clear_pgtable(folio); + lruvec_stat_sub_folio(folio, NR_PAGETABLE); +} + static inline void pgtable_pmd_page_dtor(struct page *page) { - pmd_ptlock_free(page_ptdesc(page)); - __ClearPageTable(page); - dec_lruvec_page_state(page, NR_PAGETABLE); + pagetable_pmd_dtor(page_ptdesc(page)); } /* -- 2.40.1
[PATCH mm-unstable v9 10/31] mm: Convert ptlock_free() to use ptdescs
This removes some direct accesses to struct page, working towards splitting out struct ptdesc from struct page. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- include/linux/mm.h | 10 +- mm/memory.c| 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index aa6f77c71453..94984d49ab01 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2861,7 +2861,7 @@ static inline void pagetable_free(struct ptdesc *pt) #if ALLOC_SPLIT_PTLOCKS void __init ptlock_cache_init(void); bool ptlock_alloc(struct ptdesc *ptdesc); -extern void ptlock_free(struct page *page); +void ptlock_free(struct ptdesc *ptdesc); static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc) { @@ -2877,7 +2877,7 @@ static inline bool ptlock_alloc(struct ptdesc *ptdesc) return true; } -static inline void ptlock_free(struct page *page) +static inline void ptlock_free(struct ptdesc *ptdesc) { } @@ -2918,7 +2918,7 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) } static inline void ptlock_cache_init(void) {} static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } -static inline void ptlock_free(struct page *page) {} +static inline void ptlock_free(struct ptdesc *ptdesc) {} #endif /* USE_SPLIT_PTE_PTLOCKS */ static inline bool pgtable_pte_page_ctor(struct page *page) @@ -2932,7 +2932,7 @@ static inline bool pgtable_pte_page_ctor(struct page *page) static inline void pgtable_pte_page_dtor(struct page *page) { - ptlock_free(page); + ptlock_free(page_ptdesc(page)); __ClearPageTable(page); dec_lruvec_page_state(page, NR_PAGETABLE); } @@ -3006,7 +3006,7 @@ static inline void pmd_ptlock_free(struct ptdesc *ptdesc) #ifdef CONFIG_TRANSPARENT_HUGEPAGE VM_BUG_ON_PAGE(ptdesc->pmd_huge_pte, ptdesc_page(ptdesc)); #endif - ptlock_free(ptdesc_page(ptdesc)); + ptlock_free(ptdesc); } #define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte) diff --git a/mm/memory.c b/mm/memory.c index 3606ef72ba70..d003076b218d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6145,8 +6145,8 @@ bool ptlock_alloc(struct ptdesc *ptdesc) return true; } -void ptlock_free(struct page *page) +void ptlock_free(struct ptdesc *ptdesc) { - kmem_cache_free(page_ptl_cachep, page->ptl); + kmem_cache_free(page_ptl_cachep, ptdesc->ptl); } #endif -- 2.40.1
[PATCH mm-unstable v9 09/31] mm: Convert pmd_ptlock_free() to use ptdescs
This removes some direct accesses to struct page, working towards splitting out struct ptdesc from struct page. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- include/linux/mm.h | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 13947b17f25e..aa6f77c71453 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3001,12 +3001,12 @@ static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) return ptlock_init(ptdesc); } -static inline void pmd_ptlock_free(struct page *page) +static inline void pmd_ptlock_free(struct ptdesc *ptdesc) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON_PAGE(page->pmd_huge_pte, page); + VM_BUG_ON_PAGE(ptdesc->pmd_huge_pte, ptdesc_page(ptdesc)); #endif - ptlock_free(page); + ptlock_free(ptdesc_page(ptdesc)); } #define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte) @@ -3019,7 +3019,7 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) } static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) { return true; } -static inline void pmd_ptlock_free(struct page *page) {} +static inline void pmd_ptlock_free(struct ptdesc *ptdesc) {} #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) @@ -3043,7 +3043,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page) static inline void pgtable_pmd_page_dtor(struct page *page) { - pmd_ptlock_free(page); + pmd_ptlock_free(page_ptdesc(page)); __ClearPageTable(page); dec_lruvec_page_state(page, NR_PAGETABLE); } -- 2.40.1
[PATCH mm-unstable v9 08/31] mm: Convert ptlock_init() to use ptdescs
This removes some direct accesses to struct page, working towards splitting out struct ptdesc from struct page. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- include/linux/mm.h | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 040982fe9063..13947b17f25e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2892,7 +2892,7 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); } -static inline bool ptlock_init(struct page *page) +static inline bool ptlock_init(struct ptdesc *ptdesc) { /* * prep_new_page() initialize page->private (and therefore page->ptl) @@ -2901,10 +2901,10 @@ static inline bool ptlock_init(struct page *page) * It can happen if arch try to use slab for page table allocation: * slab code uses page->slab_cache, which share storage with page->ptl. */ - VM_BUG_ON_PAGE(*(unsigned long *)>ptl, page); - if (!ptlock_alloc(page_ptdesc(page))) + VM_BUG_ON_PAGE(*(unsigned long *)>ptl, ptdesc_page(ptdesc)); + if (!ptlock_alloc(ptdesc)) return false; - spin_lock_init(ptlock_ptr(page_ptdesc(page))); + spin_lock_init(ptlock_ptr(ptdesc)); return true; } @@ -2917,13 +2917,13 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) return >page_table_lock; } static inline void ptlock_cache_init(void) {} -static inline bool ptlock_init(struct page *page) { return true; } +static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct page *page) {} #endif /* USE_SPLIT_PTE_PTLOCKS */ static inline bool pgtable_pte_page_ctor(struct page *page) { - if (!ptlock_init(page)) + if (!ptlock_init(page_ptdesc(page))) return false; __SetPageTable(page); inc_lruvec_page_state(page, NR_PAGETABLE); @@ -2998,7 +2998,7 @@ static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) #ifdef CONFIG_TRANSPARENT_HUGEPAGE ptdesc->pmd_huge_pte = NULL; #endif - return ptlock_init(ptdesc_page(ptdesc)); + return ptlock_init(ptdesc); } static inline void pmd_ptlock_free(struct page *page) -- 2.40.1
[PATCH mm-unstable v9 07/31] mm: Convert pmd_ptlock_init() to use ptdescs
This removes some direct accesses to struct page, working towards splitting out struct ptdesc from struct page. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- include/linux/mm.h | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index bc82a64e5f01..040982fe9063 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2993,12 +2993,12 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) return ptlock_ptr(pmd_ptdesc(pmd)); } -static inline bool pmd_ptlock_init(struct page *page) +static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - page->pmd_huge_pte = NULL; + ptdesc->pmd_huge_pte = NULL; #endif - return ptlock_init(page); + return ptlock_init(ptdesc_page(ptdesc)); } static inline void pmd_ptlock_free(struct page *page) @@ -3018,7 +3018,7 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) return >page_table_lock; } -static inline bool pmd_ptlock_init(struct page *page) { return true; } +static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void pmd_ptlock_free(struct page *page) {} #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) @@ -3034,7 +3034,7 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) static inline bool pgtable_pmd_page_ctor(struct page *page) { - if (!pmd_ptlock_init(page)) + if (!pmd_ptlock_init(page_ptdesc(page))) return false; __SetPageTable(page); inc_lruvec_page_state(page, NR_PAGETABLE); -- 2.40.1
[PATCH mm-unstable v9 06/31] mm: Convert ptlock_ptr() to use ptdescs
This removes some direct accesses to struct page, working towards splitting out struct ptdesc from struct page. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- arch/x86/xen/mmu_pv.c | 2 +- include/linux/mm.h| 14 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index e0a975165de7..8796ec310483 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -667,7 +667,7 @@ static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) spinlock_t *ptl = NULL; #if USE_SPLIT_PTE_PTLOCKS - ptl = ptlock_ptr(page); + ptl = ptlock_ptr(page_ptdesc(page)); spin_lock_nest_lock(ptl, >page_table_lock); #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 6aea8fb671f1..bc82a64e5f01 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2863,9 +2863,9 @@ void __init ptlock_cache_init(void); bool ptlock_alloc(struct ptdesc *ptdesc); extern void ptlock_free(struct page *page); -static inline spinlock_t *ptlock_ptr(struct page *page) +static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc) { - return page->ptl; + return ptdesc->ptl; } #else /* ALLOC_SPLIT_PTLOCKS */ static inline void ptlock_cache_init(void) @@ -2881,15 +2881,15 @@ static inline void ptlock_free(struct page *page) { } -static inline spinlock_t *ptlock_ptr(struct page *page) +static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc) { - return >ptl; + return >ptl; } #endif /* ALLOC_SPLIT_PTLOCKS */ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { - return ptlock_ptr(pmd_page(*pmd)); + return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); } static inline bool ptlock_init(struct page *page) @@ -2904,7 +2904,7 @@ static inline bool ptlock_init(struct page *page) VM_BUG_ON_PAGE(*(unsigned long *)>ptl, page); if (!ptlock_alloc(page_ptdesc(page))) return false; - spin_lock_init(ptlock_ptr(page)); + spin_lock_init(ptlock_ptr(page_ptdesc(page))); return true; } @@ -2990,7 +2990,7 @@ static inline struct ptdesc *pmd_ptdesc(pmd_t *pmd) static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) { - return ptlock_ptr(ptdesc_page(pmd_ptdesc(pmd))); + return ptlock_ptr(pmd_ptdesc(pmd)); } static inline bool pmd_ptlock_init(struct page *page) -- 2.40.1
[PATCH mm-unstable v9 05/31] mm: Convert ptlock_alloc() to use ptdescs
This removes some direct accesses to struct page, working towards splitting out struct ptdesc from struct page. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- include/linux/mm.h | 6 +++--- mm/memory.c| 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index f6d14a5fe747..6aea8fb671f1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2860,7 +2860,7 @@ static inline void pagetable_free(struct ptdesc *pt) #if USE_SPLIT_PTE_PTLOCKS #if ALLOC_SPLIT_PTLOCKS void __init ptlock_cache_init(void); -extern bool ptlock_alloc(struct page *page); +bool ptlock_alloc(struct ptdesc *ptdesc); extern void ptlock_free(struct page *page); static inline spinlock_t *ptlock_ptr(struct page *page) @@ -2872,7 +2872,7 @@ static inline void ptlock_cache_init(void) { } -static inline bool ptlock_alloc(struct page *page) +static inline bool ptlock_alloc(struct ptdesc *ptdesc) { return true; } @@ -2902,7 +2902,7 @@ static inline bool ptlock_init(struct page *page) * slab code uses page->slab_cache, which share storage with page->ptl. */ VM_BUG_ON_PAGE(*(unsigned long *)>ptl, page); - if (!ptlock_alloc(page)) + if (!ptlock_alloc(page_ptdesc(page))) return false; spin_lock_init(ptlock_ptr(page)); return true; diff --git a/mm/memory.c b/mm/memory.c index 956aad8aff34..3606ef72ba70 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6134,14 +6134,14 @@ void __init ptlock_cache_init(void) SLAB_PANIC, NULL); } -bool ptlock_alloc(struct page *page) +bool ptlock_alloc(struct ptdesc *ptdesc) { spinlock_t *ptl; ptl = kmem_cache_alloc(page_ptl_cachep, GFP_KERNEL); if (!ptl) return false; - page->ptl = ptl; + ptdesc->ptl = ptl; return true; } -- 2.40.1
[PATCH mm-unstable v9 04/31] mm: Convert pmd_pgtable_page() callers to use pmd_ptdesc()
Converts internal pmd_pgtable_page() callers to use pmd_ptdesc(). This removes some direct accesses to struct page, working towards splitting out struct ptdesc from struct page. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 54dc176b90ea..f6d14a5fe747 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2990,7 +2990,7 @@ static inline struct ptdesc *pmd_ptdesc(pmd_t *pmd) static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) { - return ptlock_ptr(pmd_pgtable_page(pmd)); + return ptlock_ptr(ptdesc_page(pmd_ptdesc(pmd))); } static inline bool pmd_ptlock_init(struct page *page) @@ -3009,7 +3009,7 @@ static inline void pmd_ptlock_free(struct page *page) ptlock_free(page); } -#define pmd_huge_pte(mm, pmd) (pmd_pgtable_page(pmd)->pmd_huge_pte) +#define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte) #else -- 2.40.1
[PATCH mm-unstable v9 03/31] mm: add utility functions for ptdesc
Introduce utility functions setting the foundation for ptdescs. These will also assist in the splitting out of ptdesc from struct page. Functions that focus on the descriptor are prefixed with ptdesc_* while functions that focus on the pagetable are prefixed with pagetable_*. pagetable_alloc() is defined to allocate new ptdesc pages as compound pages. This is to standardize ptdescs by allowing for one allocation and one free function, in contrast to 2 allocation and 2 free functions. Signed-off-by: Vishal Moola (Oracle) --- include/asm-generic/tlb.h | 11 +++ include/linux/mm.h| 61 +++ include/linux/mm_types.h | 12 3 files changed, 84 insertions(+) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index bc32a2284c56..129a3a759976 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -480,6 +480,17 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) return tlb_remove_page_size(tlb, page, PAGE_SIZE); } +static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) +{ + tlb_remove_table(tlb, pt); +} + +/* Like tlb_remove_ptdesc, but for page-like page directories. */ +static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt) +{ + tlb_remove_page(tlb, ptdesc_page(pt)); +} + static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size) { diff --git a/include/linux/mm.h b/include/linux/mm.h index ec15ebc6def1..54dc176b90ea 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2806,6 +2806,57 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a } #endif /* CONFIG_MMU */ +static inline struct ptdesc *virt_to_ptdesc(const void *x) +{ + return page_ptdesc(virt_to_page(x)); +} + +static inline void *ptdesc_to_virt(const struct ptdesc *pt) +{ + return page_to_virt(ptdesc_page(pt)); +} + +static inline void *ptdesc_address(const struct ptdesc *pt) +{ + return folio_address(ptdesc_folio(pt)); +} + +static inline bool pagetable_is_reserved(struct ptdesc *pt) +{ + return folio_test_reserved(ptdesc_folio(pt)); +} + +/** + * pagetable_alloc - Allocate pagetables + * @gfp:GFP flags + * @order: desired pagetable order + * + * pagetable_alloc allocates memory for page tables as well as a page table + * descriptor to describe that memory. + * + * Return: The ptdesc describing the allocated page tables. + */ +static inline struct ptdesc *pagetable_alloc(gfp_t gfp, unsigned int order) +{ + struct page *page = alloc_pages(gfp | __GFP_COMP, order); + + return page_ptdesc(page); +} + +/** + * pagetable_free - Free pagetables + * @pt:The page table descriptor + * + * pagetable_free frees the memory of all page tables described by a page + * table descriptor and the memory for the descriptor itself. + */ +static inline void pagetable_free(struct ptdesc *pt) +{ + struct page *page = ptdesc_page(pt); + + __free_pages(page, compound_order(page)); +} + #if USE_SPLIT_PTE_PTLOCKS #if ALLOC_SPLIT_PTLOCKS void __init ptlock_cache_init(void); @@ -2932,6 +2983,11 @@ static inline struct page *pmd_pgtable_page(pmd_t *pmd) return virt_to_page((void *)((unsigned long) pmd & mask)); } +static inline struct ptdesc *pmd_ptdesc(pmd_t *pmd) +{ + return page_ptdesc(pmd_pgtable_page(pmd)); +} + static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) { return ptlock_ptr(pmd_pgtable_page(pmd)); @@ -3044,6 +3100,11 @@ static inline void mark_page_reserved(struct page *page) adjust_managed_page_count(page, -1); } +static inline void free_reserved_ptdesc(struct ptdesc *pt) +{ + free_reserved_page(ptdesc_page(pt)); +} + /* * Default method to free all the __init memory into the buddy system. * The freed pages will be poisoned with pattern "poison" if it's within diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index cb47438ae17f..ea34b22b4cbf 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -467,6 +467,18 @@ TABLE_MATCH(memcg_data, pt_memcg_data); #undef TABLE_MATCH static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); +#define ptdesc_page(pt)(_Generic((pt), \ + const struct ptdesc *: (const struct page *)(pt), \ + struct ptdesc *:(struct page *)(pt))) + +#define ptdesc_folio(pt) (_Generic((pt), \ + const struct ptdesc *: (const struct folio *)(pt), \ + struct ptdesc *:(struct folio *)(pt))) + +#define page_ptdesc(p) (_Generic((p), \ + const struct page *:(const struct ptdesc *)(p), \ + struct page *: (struct ptdesc *)(p))) + /* * Used for
[PATCH mm-unstable v9 02/31] pgtable: create struct ptdesc
Currently, page table information is stored within struct page. As part of simplifying struct page, create struct ptdesc for page table information. Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (IBM) --- include/linux/mm_types.h | 70 1 file changed, 70 insertions(+) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 18c8c3d793b0..cb47438ae17f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -397,6 +397,76 @@ FOLIO_MATCH(flags, _flags_2); FOLIO_MATCH(compound_head, _head_2); #undef FOLIO_MATCH +/** + * struct ptdesc -Memory descriptor for page tables. + * @__page_flags: Same as page flags. Unused for page tables. + * @pt_rcu_head: For freeing page table pages. + * @pt_list: List of used page tables. Used for s390 and x86. + * @_pt_pad_1:Padding that aliases with page's compound head. + * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs. + * @__page_mapping: Aliases with page->mapping. Unused for page tables. + * @pt_mm:Used for x86 pgds. + * @pt_frag_refcount: For fragmented page table tracking. Powerpc and s390 only. + * @_pt_pad_2:Padding to ensure proper alignment. + * @ptl: Lock for the page table. + * @__page_type: Same as page->page_type. Unused for page tables. + * @_refcount:Same as page refcount. Used for s390 page tables. + * @pt_memcg_data:Memcg data. Tracked for page tables here. + * + * This struct overlays struct page for now. Do not modify without a good + * understanding of the issues. + */ +struct ptdesc { + unsigned long __page_flags; + + union { + struct rcu_head pt_rcu_head; + struct list_head pt_list; + struct { + unsigned long _pt_pad_1; + pgtable_t pmd_huge_pte; + }; + }; + unsigned long __page_mapping; + + union { + struct mm_struct *pt_mm; + atomic_t pt_frag_refcount; + }; + + union { + unsigned long _pt_pad_2; +#if ALLOC_SPLIT_PTLOCKS + spinlock_t *ptl; +#else + spinlock_t ptl; +#endif + }; + unsigned int __page_type; + atomic_t _refcount; +#ifdef CONFIG_MEMCG + unsigned long pt_memcg_data; +#endif +}; + +#define TABLE_MATCH(pg, pt)\ + static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt)) +TABLE_MATCH(flags, __page_flags); +TABLE_MATCH(compound_head, pt_list); +TABLE_MATCH(compound_head, _pt_pad_1); +TABLE_MATCH(pmd_huge_pte, pmd_huge_pte); +TABLE_MATCH(mapping, __page_mapping); +TABLE_MATCH(pt_mm, pt_mm); +TABLE_MATCH(ptl, ptl); +TABLE_MATCH(rcu_head, pt_rcu_head); +TABLE_MATCH(page_type, __page_type); +TABLE_MATCH(_refcount, _refcount); +#ifdef CONFIG_MEMCG +TABLE_MATCH(memcg_data, pt_memcg_data); +#endif +#undef TABLE_MATCH +static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); + /* * Used for sizing the vmemmap region on some architectures */ -- 2.40.1
[PATCH mm-unstable v9 01/31] mm: Add PAGE_TYPE_OP folio functions
No folio equivalents for page type operations have been defined, so define them for later folio conversions. Also changes the Page##uname macros to take in const struct page* since we only read the memory here. Acked-by: Mike Rapoport (IBM) Signed-off-by: Vishal Moola (Oracle) --- include/linux/page-flags.h | 30 +++--- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 92a2063a0a23..9218028caf33 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -908,6 +908,8 @@ static inline bool is_page_hwpoison(struct page *page) #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) +#define folio_test_type(folio, flag) \ + ((folio->page.page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) static inline int page_type_has_type(unsigned int page_type) { @@ -919,27 +921,41 @@ static inline int page_has_type(struct page *page) return page_type_has_type(page->page_type); } -#define PAGE_TYPE_OPS(uname, lname)\ -static __always_inline int Page##uname(struct page *page) \ +#define PAGE_TYPE_OPS(uname, lname, fname) \ +static __always_inline int Page##uname(const struct page *page) \ { \ return PageType(page, PG_##lname); \ } \ +static __always_inline int folio_test_##fname(const struct folio *folio)\ +{ \ + return folio_test_type(folio, PG_##lname); \ +} \ static __always_inline void __SetPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!PageType(page, 0), page); \ page->page_type &= ~PG_##lname; \ } \ +static __always_inline void __folio_set_##fname(struct folio *folio) \ +{ \ + VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ + folio->page.page_type &= ~PG_##lname; \ +} \ static __always_inline void __ClearPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!Page##uname(page), page); \ page->page_type |= PG_##lname; \ -} +} \ +static __always_inline void __folio_clear_##fname(struct folio *folio) \ +{ \ + VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ + folio->page.page_type |= PG_##lname;\ +} \ /* * PageBuddy() indicates that the page is free and in the buddy system * (see mm/page_alloc.c). */ -PAGE_TYPE_OPS(Buddy, buddy) +PAGE_TYPE_OPS(Buddy, buddy, buddy) /* * PageOffline() indicates that the page is logically offline although the @@ -963,7 +979,7 @@ PAGE_TYPE_OPS(Buddy, buddy) * pages should check PageOffline() and synchronize with such drivers using * page_offline_freeze()/page_offline_thaw(). */ -PAGE_TYPE_OPS(Offline, offline) +PAGE_TYPE_OPS(Offline, offline, offline) extern void page_offline_freeze(void); extern void page_offline_thaw(void); @@ -973,12 +989,12 @@ extern void page_offline_end(void); /* * Marks pages in use as page tables. */ -PAGE_TYPE_OPS(Table, table) +PAGE_TYPE_OPS(Table, table, pgtable) /* * Marks guardpages used with debug_pagealloc. */ -PAGE_TYPE_OPS(Guard, guard) +PAGE_TYPE_OPS(Guard, guard, guard) extern bool is_free_buddy_page(struct page *page); -- 2.40.1
[PATCH mm-unstable v9 00/31] Split ptdesc from struct page
The MM subsystem is trying to shrink struct page. This patchset introduces a memory descriptor for page table tracking - struct ptdesc. This patchset introduces ptdesc, splits ptdesc from struct page, and converts many callers of page table constructor/destructors to use ptdescs. Ptdesc is a foundation to further standardize page tables, and eventually allow for dynamic allocation of page tables independent of struct page. However, the use of pages for page table tracking is quite deeply ingrained and varied across archictectures, so there is still a lot of work to be done before that can happen. This applies cleanly onto the current unstable after dropping v8 of this series. v9: Fix build errors for NOMMU configs - trying to define ptdesc before spinlock_t and struct page were defined. Moved definition of struct ptdesc to include/linux/mm_types.h instead include/linux/pgtable.h Vishal Moola (Oracle) (31): mm: Add PAGE_TYPE_OP folio functions pgtable: create struct ptdesc mm: add utility functions for ptdesc mm: Convert pmd_pgtable_page() callers to use pmd_ptdesc() mm: Convert ptlock_alloc() to use ptdescs mm: Convert ptlock_ptr() to use ptdescs mm: Convert pmd_ptlock_init() to use ptdescs mm: Convert ptlock_init() to use ptdescs mm: Convert pmd_ptlock_free() to use ptdescs mm: Convert ptlock_free() to use ptdescs mm: Create ptdesc equivalents for pgtable_{pte,pmd}_page_{ctor,dtor} powerpc: Convert various functions to use ptdescs x86: Convert various functions to use ptdescs s390: Convert various pgalloc functions to use ptdescs mm: remove page table members from struct page pgalloc: Convert various functions to use ptdescs arm: Convert various functions to use ptdescs arm64: Convert various functions to use ptdescs csky: Convert __pte_free_tlb() to use ptdescs hexagon: Convert __pte_free_tlb() to use ptdescs loongarch: Convert various functions to use ptdescs m68k: Convert various functions to use ptdescs mips: Convert various functions to use ptdescs nios2: Convert __pte_free_tlb() to use ptdescs openrisc: Convert __pte_free_tlb() to use ptdescs riscv: Convert alloc_{pmd, pte}_late() to use ptdescs sh: Convert pte_free_tlb() to use ptdescs sparc64: Convert various functions to use ptdescs sparc: Convert pgtable_pte_page_{ctor, dtor}() to ptdesc equivalents um: Convert {pmd, pte}_free_tlb() to use ptdescs mm: Remove pgtable_{pmd, pte}_page_{ctor, dtor}() wrappers Documentation/mm/split_page_table_lock.rst| 12 +- .../zh_CN/mm/split_page_table_lock.rst| 14 +- arch/arm/include/asm/tlb.h| 12 +- arch/arm/mm/mmu.c | 7 +- arch/arm64/include/asm/tlb.h | 14 +- arch/arm64/mm/mmu.c | 7 +- arch/csky/include/asm/pgalloc.h | 4 +- arch/hexagon/include/asm/pgalloc.h| 8 +- arch/loongarch/include/asm/pgalloc.h | 27 ++-- arch/loongarch/mm/pgtable.c | 7 +- arch/m68k/include/asm/mcf_pgalloc.h | 47 +++--- arch/m68k/include/asm/sun3_pgalloc.h | 8 +- arch/m68k/mm/motorola.c | 4 +- arch/mips/include/asm/pgalloc.h | 32 ++-- arch/mips/mm/pgtable.c| 8 +- arch/nios2/include/asm/pgalloc.h | 8 +- arch/openrisc/include/asm/pgalloc.h | 8 +- arch/powerpc/mm/book3s64/mmu_context.c| 10 +- arch/powerpc/mm/book3s64/pgtable.c| 32 ++-- arch/powerpc/mm/pgtable-frag.c| 58 +++ arch/riscv/include/asm/pgalloc.h | 8 +- arch/riscv/mm/init.c | 16 +- arch/s390/include/asm/pgalloc.h | 4 +- arch/s390/include/asm/tlb.h | 4 +- arch/s390/mm/pgalloc.c| 128 +++ arch/sh/include/asm/pgalloc.h | 9 +- arch/sparc/mm/init_64.c | 17 +- arch/sparc/mm/srmmu.c | 5 +- arch/um/include/asm/pgalloc.h | 18 +-- arch/x86/mm/pgtable.c | 47 +++--- arch/x86/xen/mmu_pv.c | 2 +- include/asm-generic/pgalloc.h | 88 +- include/asm-generic/tlb.h | 11 ++ include/linux/mm.h| 151 +- include/linux/mm_types.h | 97 --- include/linux/page-flags.h| 30 +++- mm/memory.c | 8 +- 37 files changed, 585 insertions(+), 385 deletions(-) -- 2.40.1
Re: linux-next: Tree for Aug 7 (sound/soc/fsl/)
On 8/6/23 22:47, Stephen Rothwell wrote: > Hi all, > > Changes since 20230804: > on PPC32: WARNING: unmet direct dependencies detected for SND_SOC_MPC5200_AC97 Depends on [n]: SOUND [=y] && SND [=y] && SND_SOC [=y] && SND_POWERPC_SOC [=y] && PPC_MPC52xx [=y] && PPC_BESTCOMM [=n] Selected by [y]: - SND_MPC52xx_SOC_PCM030 [=y] && SOUND [=y] && SND [=y] && SND_SOC [=y] && SND_POWERPC_SOC [=y] && PPC_MPC5200_SIMPLE [=y] - SND_MPC52xx_SOC_EFIKA [=y] && SOUND [=y] && SND [=y] && SND_SOC [=y] && SND_POWERPC_SOC [=y] && PPC_EFIKA [=y] powerpc-linux-ld: sound/soc/fsl/mpc5200_psc_ac97.o: in function `psc_ac97_of_remove': mpc5200_psc_ac97.c:(.text.psc_ac97_of_remove+0x1c): undefined reference to `mpc5200_audio_dma_destroy' powerpc-linux-ld: sound/soc/fsl/mpc5200_psc_ac97.o: in function `psc_ac97_of_probe': mpc5200_psc_ac97.c:(.text.psc_ac97_of_probe+0x20): undefined reference to `mpc5200_audio_dma_create' Full randconfig file is attached. -- ~Randy config-r4919.gz Description: application/gzip
Re: [PATCH V2 2/2] tools/perf/tests: perf all metricgroups test fails when perf_event access is restricted
On 04/08/23 10:30 am, Athira Rajeev wrote: Perf all metricgroups test fails as below when perf_event access is restricted. ./perf test -v "perf all metricgroups test" Testing Memory_BW Error: Access to performance monitoring and observability operations is limited. Enforced MAC policy settings (SELinux) can limit access to performance access to performance monitoring and observability operations for processes without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability. test child finished with -1 end perf all metricgroups test: FAILED! Fix the testcase to skip those metric events which needs perf_event access explicitly. The exit code of the testcase is based on return code of the perf stat command ( enabled by set -e option ). Hence save the exit status in a variable and use that to decide success or fail for the testcase. Signed-off-by: Athira Rajeev With this patch applied(on power) perf metricgroups test works correctly when perf_event access is restricted. # ./perf test "perf all metricgroups test" 96: perf all metricgroups test : Ok Tested-by: Disha Goel --- Changelog: v1 -> v2: Changed the condition to use "echo" and "grep" so it works on Posix shell as well. tools/perf/tests/shell/stat_all_metricgroups.sh | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/stat_all_metricgroups.sh b/tools/perf/tests/shell/stat_all_metricgroups.sh index cb35e488809a..eaa5e1172294 100755 --- a/tools/perf/tests/shell/stat_all_metricgroups.sh +++ b/tools/perf/tests/shell/stat_all_metricgroups.sh @@ -2,11 +2,19 @@ # perf all metricgroups test # SPDX-License-Identifier: GPL-2.0 -set -e - for m in $(perf list --raw-dump metricgroups); do echo "Testing $m" - perf stat -M "$m" -a true + result=$(perf stat -M "$m" -a true 2>&1) + rc=$? + # Skip if there is no access to perf_events monitoring + # Otherwise exit based on the return code of perf comamnd. + if echo "$result" | grep -q "Access to performance monitoring and observability operations is limited"; + then + continue + else + [ $rc -ne 0 ] && exit $rc + fi + done exit 0
Re: [PATCH V2 2/2] tools/perf/tests: perf all metricgroups test fails when perf_event access is restricted
Em Mon, Aug 07, 2023 at 08:14:39PM +0530, Disha Goel escreveu: > On 04/08/23 10:30 am, Athira Rajeev wrote: > > Perf all metricgroups test fails as below when perf_event access > > is restricted. > > > > ./perf test -v "perf all metricgroups test" > > Testing Memory_BW > > Error: > > Access to performance monitoring and observability operations is > > limited. > > Enforced MAC policy settings (SELinux) can limit access to performance > > access to performance monitoring and observability operations for > > processes > > without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability. > > > > test child finished with -1 > > end > > perf all metricgroups test: FAILED! > > > > Fix the testcase to skip those metric events which needs perf_event access > > explicitly. The exit code of the testcase is based on return code of > > the perf stat command ( enabled by set -e option ). Hence save the > > exit status in a variable and use that to decide success or fail for the > > testcase. I wonder if we shouldn't somehow check if the credentials needed to performing a test shouldn't be checked before trying it. This way we would check if the check that the tool or the kernel is doing is the appropriate one. I.e. the kernel refusal for doing something may be an error. - Arnaldo > > Signed-off-by: Athira Rajeev > > With this patch applied(on power) perf metricgroups test works correctly when > perf_event access is restricted. > > # ./perf test "perf all metricgroups test" > 96: perf all metricgroups test : Ok > > Tested-by: Disha Goel > > > --- > > Changelog: > > v1 -> v2: > > Changed the condition to use "echo" and "grep" so it works on > > Posix shell as well. > > > > tools/perf/tests/shell/stat_all_metricgroups.sh | 14 +++--- > > 1 file changed, 11 insertions(+), 3 deletions(-) > > > > diff --git a/tools/perf/tests/shell/stat_all_metricgroups.sh > > b/tools/perf/tests/shell/stat_all_metricgroups.sh > > index cb35e488809a..eaa5e1172294 100755 > > --- a/tools/perf/tests/shell/stat_all_metricgroups.sh > > +++ b/tools/perf/tests/shell/stat_all_metricgroups.sh > > @@ -2,11 +2,19 @@ > > # perf all metricgroups test > > # SPDX-License-Identifier: GPL-2.0 > > > > -set -e > > - > > for m in $(perf list --raw-dump metricgroups); do > > echo "Testing $m" > > - perf stat -M "$m" -a true > > + result=$(perf stat -M "$m" -a true 2>&1) > > + rc=$? > > + # Skip if there is no access to perf_events monitoring > > + # Otherwise exit based on the return code of perf comamnd. > > + if echo "$result" | grep -q "Access to performance monitoring and > > observability operations is limited"; > > + then > > + continue > > + else > > + [ $rc -ne 0 ] && exit $rc > > + fi > > + > > done > > > > exit 0 -- - Arnaldo
Re: [PATCH 1/2] PCI: Add pci_find_next_dvsec_capability to find next designated VSEC
[+cc David since drivers/platform/x86/intel/vsec.c does some similar things, although it seems to iterate over all Intel DVSEC IDs at once] In subject: PCI: Add pci_find_next_dvsec_capability() to find next Designated VSEC On Mon, Aug 07, 2023 at 11:18:45AM +0800, Xiongfeng Wang wrote: > Some devices may have several DVSEC(Designated Vendor-Specific Extended > Capability) entries with the same DVSEC ID. Add > pci_find_next_dvsec_capability() to find them all. Add space between "DVSEC" and "(Designated ...)". > Signed-off-by: Xiongfeng Wang Acked-by: Bjorn Helgaas so you can merge this along with the ocxl patch that uses it. > --- > drivers/pci/pci.c | 37 + > include/linux/pci.h | 2 ++ > 2 files changed, 27 insertions(+), 12 deletions(-) > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > index 60230da957e0..3455ca7306ae 100644 > --- a/drivers/pci/pci.c > +++ b/drivers/pci/pci.c > @@ -749,35 +749,48 @@ u16 pci_find_vsec_capability(struct pci_dev *dev, u16 > vendor, int cap) > EXPORT_SYMBOL_GPL(pci_find_vsec_capability); > > /** > - * pci_find_dvsec_capability - Find DVSEC for vendor > + * pci_find_next_dvsec_capability - Find next DVSEC for vendor > * @dev: PCI device to query > + * @start: address at which to start looking (0 to start at beginning of > list) s/address/Address/ to match other parameters > * @vendor: Vendor ID to match for the DVSEC > * @dvsec: Designated Vendor-specific capability ID There are a lot of IDs floating around here, so to better match the spec language: @dvsec: Vendor-defined DVSEC ID > - * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the capability > - * offset in config space; otherwise return 0. > + * Returns the address of the next DVSEC if the DVSEC has Vendor ID @vendor > and > + * DVSEC ID @dvsec; otherwise return 0. DVSEC can occur several times with > the > + * same DVSEC ID for some devices, and this provides a way to find them all. > */ > -u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec) > +u16 pci_find_next_dvsec_capability(struct pci_dev *dev, u16 start, u16 > vendor, > +u16 dvsec) > { > - int pos; > + u16 pos = start; > > - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DVSEC); > - if (!pos) > - return 0; > - > - while (pos) { > + while ((pos = pci_find_next_ext_capability(dev, pos, > + PCI_EXT_CAP_ID_DVSEC))) { > u16 v, id; > > pci_read_config_word(dev, pos + PCI_DVSEC_HEADER1, ); > pci_read_config_word(dev, pos + PCI_DVSEC_HEADER2, ); > if (vendor == v && dvsec == id) > return pos; > - > - pos = pci_find_next_ext_capability(dev, pos, > PCI_EXT_CAP_ID_DVSEC); > } > > return 0; > } > +EXPORT_SYMBOL_GPL(pci_find_next_dvsec_capability); > + > +/** > + * pci_find_dvsec_capability - Find DVSEC for vendor > + * @dev: PCI device to query > + * @vendor: Vendor ID to match for the DVSEC > + * @dvsec: Designated Vendor-specific capability ID > + * > + * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the capability > + * offset in config space; otherwise return 0. > + */ > +u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec) > +{ > + return pci_find_next_dvsec_capability(dev, 0, vendor, dvsec); > +} > EXPORT_SYMBOL_GPL(pci_find_dvsec_capability); > > /** > diff --git a/include/linux/pci.h b/include/linux/pci.h > index c69a2cc1f412..82bb905daf72 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -1168,6 +1168,8 @@ u16 pci_find_next_ext_capability(struct pci_dev *dev, > u16 pos, int cap); > struct pci_bus *pci_find_next_bus(const struct pci_bus *from); > u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap); > u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec); > +u16 pci_find_next_dvsec_capability(struct pci_dev *dev, u16 start, u16 > vendor, > +u16 dvsec); > > u64 pci_get_dsn(struct pci_dev *dev); > > -- > 2.20.1 >
Re: [PATCH v7 7/7] mm/memory_hotplug: Enable runtime update of memmap_on_memory parameter
On 07.08.23 14:41, David Hildenbrand wrote: On 07.08.23 14:27, Michal Hocko wrote: On Sat 05-08-23 19:54:23, Aneesh Kumar K V wrote: [...] Do you see a need for firmware-managed memory to be hotplugged in with different memory block sizes? In short. Yes. Slightly longer, a fixed size memory block semantic is just standing in the way and I would even argue it is actively harmful. Just have a look at ridicously small memory blocks on ppc. I do understand that it makes some sense to be aligned to the memory model (so sparsmem section aligned). In an ideal world, memory hotplug v2 interface (if we ever go that path) should be physical memory range based. Yes, we discussed that a couple of times already (and so far nobody cared to implement any of that). Small memory block sizes are very beneficial for use cases like PPC dlar, virtio-mem, hyperv-balloon, ... essentially in most virtual environments where you might want to add/remove memory in very small granularity. I don't see that changing any time soon. Rather the opposite. Small memory block sizes are suboptimal for large machines where you might never end up removing such memory (boot memory), or when dealing with devices that can only be removed in one piece (DIMM/kmem). We already have memory groups in place to model that. For the latter it might be beneficial to have memory blocks of larger size that correspond to the physical memory ranges. That might also make a memmap (re-)configuration easier. Not sure if that is standing in any way or is harmful, though. Just because I thought of something right now, I'll share it, maybe it makes sense. Assume when we get add_memory*(MHP_MEMMAP_ON_MEMORY) and it is enabled by the admin: 1) We create a single altmap at the beginning of the memory 2) We create the existing fixed-size memory block devices, but flag them to be part of a single "altmap" unit. 3) Whenever we trigger offlining of a single such memory block, we offline *all* memory blocks belonging to that altmap, essentially using a single offline_pages() call and updating all memory block states accordingly. 4) Whenever we trigger onlining of a single such memory block, we online *all* memory blocks belonging to that altmap, using a single online_pages() call. 5) We fail remove_memory() if it doesn't cover the same (altmap) range. So we can avoid having a memory block v2 (and all that comes with that ...) for now and still get that altmap stuff sorted out. As that altmap behavior can be controlled by the admin, we should be fine for now. I think all memory notifiers should already be able to handle bigger granularity, but it would be easy to check. Some internal things might require a bit of tweaking. Just a thought. -- Cheers, David / dhildenb
Re: [PATCH v2 24/28] pinctrl: Add support for the Lantic PEF2256 pinmux
Hi Linus, Andrew, On Mon, 7 Aug 2023 15:17:11 +0200 Andrew Lunn wrote: > On Mon, Aug 07, 2023 at 03:06:42PM +0200, Linus Walleij wrote: > > On Mon, Aug 7, 2023 at 3:05 PM Linus Walleij > > wrote: > > > > > > Signed-off-by: Herve Codina > > > > > > So it is a bridge chip? Please use that terminology since Linux > > > DRM often talks about bridges. > > > > Replying to self: no it's not a bridge, it's a WAN thingy. > > > > So perhaps write that this is a WAN interface adapter chip. > > Hi Linus > > In the E1/T1/J1 world, framer is a well understood concept. Maybe the > text needs a bit more background information to explain what this is > to somebody who does not have an old school telecoms background. > >Andrew Maybe I can add in my commit log: --- 8< --- This kind of component can be found in old telecommunication system. It was used to digital transmission of many simultaneous telephone calls by time-division multiplexing. Also using HDLC protocol, WAN networks can be reached through the framer. --- 8< --- Do you think it will be better ? Regards, Hervé Codina -- Hervé Codina, Bootlin Embedded Linux and Kernel engineering https://bootlin.com
Re: [PATCH v2 24/28] pinctrl: Add support for the Lantic PEF2256 pinmux
Hi Linus, On Mon, 7 Aug 2023 15:05:15 +0200 Linus Walleij wrote: > Hi Herve, > > thanks for your patch! > > First: is this patch something we could merge separately? I don't see > any dependency on the other patches. It depends on pef2256: in drivers/pinctrl/Kconfig: --- 8< --- +config PINCTRL_PEF2256 + tristate "Lantiq PEF2256 (FALC56) pin controller driver" + depends on OF && FRAMER_PEF2256 --- 8< --- in drivers/pinctrl/pinctrl-pef2256.c --- 8< --- +#include --- 8< --- All the pef2256 it depends on is provided by path 23/28 "net: wan: framer: Add support for the Lantiq PEF2256 framer" > > On Wed, Jul 26, 2023 at 5:04 PM Herve Codina wrote: > > > The Lantiq PEF2256 is a framer and line interface component designed to > > fulfill all required interfacing between an analog E1/T1/J1 line and the > > digital PCM system highway/H.100 bus. > > > > This pinmux support handles the pin muxing part (pins RP(A..D) and pins > > XP(A..D)) of the PEF2256. > > > > Signed-off-by: Herve Codina > > So it is a bridge chip? Please use that terminology since Linux > DRM often talks about bridges. > > > +++ b/drivers/pinctrl/pinctrl-pef2256-regs.h > (...) > > +#include "linux/bitfield.h" > > Really? I don't think there is such a file there. > > Do you mean and does this even compile? Yes and it compiles (even with quoted included file). I will be changed to in the next interation. > > > diff --git a/drivers/pinctrl/pinctrl-pef2256.c > > b/drivers/pinctrl/pinctrl-pef2256.c > (...) > > +struct pef2256_pinctrl { > > + struct device *dev; > > + struct regmap *regmap; > > + enum pef2256_version version; > > + struct { > > + struct pinctrl_desc pctrl_desc; > > + const struct pef2256_function_desc *functions; > > + unsigned int nfunctions; > > + } pinctrl; > > Uh anonymous struct... can't you just define the struct separately > with a name? Or fold it into struct pef2256_pinctrl without the > additional struct? Thanks. I will fold it into struct pef2256_pinctrl in the next iteration. Thanks Hervé > > Otherwise it looks neat! > > Yours, > Linus Walleij
Re: [PATCH v2 24/28] pinctrl: Add support for the Lantic PEF2256 pinmux
On Mon, Aug 07, 2023 at 03:06:42PM +0200, Linus Walleij wrote: > On Mon, Aug 7, 2023 at 3:05 PM Linus Walleij wrote: > > > > Signed-off-by: Herve Codina > > > > So it is a bridge chip? Please use that terminology since Linux > > DRM often talks about bridges. > > Replying to self: no it's not a bridge, it's a WAN thingy. > > So perhaps write that this is a WAN interface adapter chip. Hi Linus In the E1/T1/J1 world, framer is a well understood concept. Maybe the text needs a bit more background information to explain what this is to somebody who does not have an old school telecoms background. Andrew
Re: [PATCH v2 24/28] pinctrl: Add support for the Lantic PEF2256 pinmux
On Mon, Aug 07, 2023 at 03:05:15PM +0200, Linus Walleij wrote: > On Wed, Jul 26, 2023 at 5:04 PM Herve Codina wrote: > > +#include "linux/bitfield.h" > Really? I don't think there is such a file there. > Do you mean and does this even compile? #include "" means "try the local directory first then fall back to system includes" so it'll work, it picks up extra stuff on top of what <> does. There's a stylistic issue though. signature.asc Description: PGP signature
Re: [PATCH v2 24/28] pinctrl: Add support for the Lantic PEF2256 pinmux
On Mon, Aug 7, 2023 at 3:05 PM Linus Walleij wrote: > > Signed-off-by: Herve Codina > > So it is a bridge chip? Please use that terminology since Linux > DRM often talks about bridges. Replying to self: no it's not a bridge, it's a WAN thingy. So perhaps write that this is a WAN interface adapter chip. Yours, Linus Walleij
Re: [PATCH v2 24/28] pinctrl: Add support for the Lantic PEF2256 pinmux
Hi Herve, thanks for your patch! First: is this patch something we could merge separately? I don't see any dependency on the other patches. On Wed, Jul 26, 2023 at 5:04 PM Herve Codina wrote: > The Lantiq PEF2256 is a framer and line interface component designed to > fulfill all required interfacing between an analog E1/T1/J1 line and the > digital PCM system highway/H.100 bus. > > This pinmux support handles the pin muxing part (pins RP(A..D) and pins > XP(A..D)) of the PEF2256. > > Signed-off-by: Herve Codina So it is a bridge chip? Please use that terminology since Linux DRM often talks about bridges. > +++ b/drivers/pinctrl/pinctrl-pef2256-regs.h (...) > +#include "linux/bitfield.h" Really? I don't think there is such a file there. Do you mean and does this even compile? > diff --git a/drivers/pinctrl/pinctrl-pef2256.c > b/drivers/pinctrl/pinctrl-pef2256.c (...) > +struct pef2256_pinctrl { > + struct device *dev; > + struct regmap *regmap; > + enum pef2256_version version; > + struct { > + struct pinctrl_desc pctrl_desc; > + const struct pef2256_function_desc *functions; > + unsigned int nfunctions; > + } pinctrl; Uh anonymous struct... can't you just define the struct separately with a name? Or fold it into struct pef2256_pinctrl without the additional struct? Thanks. Otherwise it looks neat! Yours, Linus Walleij
Re: [PATCH v7 7/7] mm/memory_hotplug: Enable runtime update of memmap_on_memory parameter
On 03.08.23 13:30, Michal Hocko wrote: On Thu 03-08-23 11:24:08, David Hildenbrand wrote: [...] would be readable only when the block is offline and it would reallocate vmemmap on the change. Makes sense? Are there any risks? Maybe pfn walkers? The question is: is it of any real value such that it would be worth the cost and risk? One of the primary reasons for memmap_on_memory is that *memory hotplug* succeeds even in low-memory situations (including, low on ZONE_NORMAL situations). Sorry for the late reply, I'm busy with 100 other things. One usecase I would have in mind is a mix of smaller and larger memory blocks. For larger ones you want to have memmap_on_memory in general because they do not eat memory from outside but small(er) ones might be more tricky because now you can add a lot of blocks that would be internally fragmented to prevent larger allocations to form. Okay, I see what you mean. The internal fragmentation might become an issue at some point: for x86-64 with 128 MiB blocks / 2 MiB THP it's not a real issue right now. For a arm64 64k with 512 MiB blocks and 512 MiB THP / hugelb it could be one. I recall discussing that with Oscar back when he added memmap_on_memory, where we also discussed the variable-sized memory blocks to avoid such internal fragmentation. For small ones you probably want to only use memmap_on_memory when unavoidable: for example, when adding without memmap_on_memory would fail / already failed. Possibly some later memmap relocation might make sense in some scenarios. So you want that behavior already when hotplugging such devices. While there might be value to relocate it later, I'm not sure if that is really worth it, and it does not solve the main use case. Is it worth it? TBH I am not sure same as I am not sure the global default should be writable after boot. If we want to make it more dynamic we should however talk about the proper layer this is implemented on. Agreed. -- Cheers, David / dhildenb
Re: [PATCH v7 7/7] mm/memory_hotplug: Enable runtime update of memmap_on_memory parameter
On 07.08.23 14:27, Michal Hocko wrote: On Sat 05-08-23 19:54:23, Aneesh Kumar K V wrote: [...] Do you see a need for firmware-managed memory to be hotplugged in with different memory block sizes? In short. Yes. Slightly longer, a fixed size memory block semantic is just standing in the way and I would even argue it is actively harmful. Just have a look at ridicously small memory blocks on ppc. I do understand that it makes some sense to be aligned to the memory model (so sparsmem section aligned). In an ideal world, memory hotplug v2 interface (if we ever go that path) should be physical memory range based. Yes, we discussed that a couple of times already (and so far nobody cared to implement any of that). Small memory block sizes are very beneficial for use cases like PPC dlar, virtio-mem, hyperv-balloon, ... essentially in most virtual environments where you might want to add/remove memory in very small granularity. I don't see that changing any time soon. Rather the opposite. Small memory block sizes are suboptimal for large machines where you might never end up removing such memory (boot memory), or when dealing with devices that can only be removed in one piece (DIMM/kmem). We already have memory groups in place to model that. For the latter it might be beneficial to have memory blocks of larger size that correspond to the physical memory ranges. That might also make a memmap (re-)configuration easier. Not sure if that is standing in any way or is harmful, though. -- Cheers, David / dhildenb
Re: [PATCH v4 1/2] nmi_backtrace: Allow excluding an arbitrary CPU
On Fri, Aug 4, 2023 at 10:01 PM Douglas Anderson wrote: > > The APIs that allow backtracing across CPUs have always had a way to > exclude the current CPU. This convenience means callers didn't need to > find a place to allocate a CPU mask just to handle the common case. > > Let's extend the API to take a CPU ID to exclude instead of just a > boolean. This isn't any more complex for the API to handle and allows > the hardlockup detector to exclude a different CPU (the one it already > did a trace for) without needing to find space for a CPU mask. > > Arguably, this new API also encourages safer behavior. Specifically if > the caller wants to avoid tracing the current CPU (maybe because they > already traced the current CPU) this makes it more obvious to the > caller that they need to make sure that the current CPU ID can't > change. > > Acked-by: Michal Hocko > Signed-off-by: Douglas Anderson > --- > > Changes in v4: > - Renamed trigger_allbutself_cpu_backtrace() for when trigger is unsupported. > > Changes in v3: > - ("nmi_backtrace: Allow excluding an arbitrary CPU") new for v3. > > arch/arm/include/asm/irq.h | 2 +- > arch/arm/kernel/smp.c| 4 ++-- > arch/loongarch/include/asm/irq.h | 2 +- > arch/loongarch/kernel/process.c | 4 ++-- > arch/mips/include/asm/irq.h | 2 +- > arch/mips/kernel/process.c | 4 ++-- > arch/powerpc/include/asm/irq.h | 2 +- > arch/powerpc/kernel/stacktrace.c | 4 ++-- > arch/powerpc/kernel/watchdog.c | 4 ++-- > arch/sparc/include/asm/irq_64.h | 2 +- > arch/sparc/kernel/process_64.c | 6 +++--- > arch/x86/include/asm/irq.h | 2 +- > arch/x86/kernel/apic/hw_nmi.c| 4 ++-- > include/linux/nmi.h | 14 +++--- > kernel/watchdog.c| 2 +- > lib/nmi_backtrace.c | 6 +++--- > 16 files changed, 32 insertions(+), 32 deletions(-) > [...] > diff --git a/include/linux/nmi.h b/include/linux/nmi.h > index e3e6a64b98e0..7cf7801856a1 100644 > --- a/include/linux/nmi.h > +++ b/include/linux/nmi.h > @@ -157,31 +157,31 @@ static inline void touch_nmi_watchdog(void) > #ifdef arch_trigger_cpumask_backtrace > static inline bool trigger_all_cpu_backtrace(void) > { > - arch_trigger_cpumask_backtrace(cpu_online_mask, false); > + arch_trigger_cpumask_backtrace(cpu_online_mask, -1); > return true; > } > > -static inline bool trigger_allbutself_cpu_backtrace(void) > +static inline bool trigger_allbutcpu_cpu_backtrace(int exclude_cpu) > { > - arch_trigger_cpumask_backtrace(cpu_online_mask, true); > + arch_trigger_cpumask_backtrace(cpu_online_mask, exclude_cpu); > return true; > } > > static inline bool trigger_cpumask_backtrace(struct cpumask *mask) > { > - arch_trigger_cpumask_backtrace(mask, false); > + arch_trigger_cpumask_backtrace(mask, -1); > return true; > } > > static inline bool trigger_single_cpu_backtrace(int cpu) > { > - arch_trigger_cpumask_backtrace(cpumask_of(cpu), false); > + arch_trigger_cpumask_backtrace(cpumask_of(cpu), -1); > return true; > } > > /* generic implementation */ > void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, > - bool exclude_self, > + int exclude_cpu, >void (*raise)(cpumask_t *mask)); > bool nmi_cpu_backtrace(struct pt_regs *regs); > > @@ -190,7 +190,7 @@ static inline bool trigger_all_cpu_backtrace(void) > { > return false; > } > -static inline bool trigger_allbutself_cpu_backtrace(void) > +static inline bool trigger_allbutcpu_cpu_backtrace(void) ^ The parameter here is still wrong. It should be "int exclude_cpu". This patch in Andrew's queue is causing build errors on next-20230807 on arm64: kernel/watchdog.c: In function ‘watchdog_timer_fn’: kernel/watchdog.c:521:25: error: too many arguments to function ‘trigger_allbutcpu_cpu_backtrace’ 521 | trigger_allbutcpu_cpu_backtrace(smp_processor_id()); | ^~~ In file included from kernel/watchdog.c:17: ./include/linux/nmi.h:193:20: note: declared here 193 | static inline bool trigger_allbutcpu_cpu_backtrace(void) |^~~ make[3]: *** [scripts/Makefile.build:243: kernel/watchdog.o] Error 1 ChenYu
Re: [PATCH v7 7/7] mm/memory_hotplug: Enable runtime update of memmap_on_memory parameter
On Sat 05-08-23 19:54:23, Aneesh Kumar K V wrote: [...] > Do you see a need for firmware-managed memory to be hotplugged in with > different memory block sizes? In short. Yes. Slightly longer, a fixed size memory block semantic is just standing in the way and I would even argue it is actively harmful. Just have a look at ridicously small memory blocks on ppc. I do understand that it makes some sense to be aligned to the memory model (so sparsmem section aligned). In an ideal world, memory hotplug v2 interface (if we ever go that path) should be physical memory range based. -- Michal Hocko SUSE Labs
Re: [PATCH v3 1/2] nmi_backtrace: Allow excluding an arbitrary CPU
On Fri 04-08-23 09:06:07, Doug Anderson wrote: > Hi, > > On Fri, Aug 4, 2023 at 8:02 AM Michal Hocko wrote: > > > > > > It would have been slightly safer to modify > > > > arch_trigger_cpumask_backtrace > > > > by switching arguments so that some leftovers are captured easier. > > > > > > I'm not sure I understand. Oh, you're saying make the prototype of > > > arch_trigger_cpumask_backtrace() incompatible so that if someone is > > > directly calling it then it'll be a compile-time error? > > > > exactly. bool to int promotion would be too easy to miss while the > > pointer to int would complain loudly. > > > > > I guess the > > > hope is that nobody is calling that directly and they're calling > > > through the trigger_...() functions. > > > > Hope is one thing, being preventive another. > > > > > For now I'm going to leave this alone. > > > > If you are going to send another version then please consider this. Not > > a hard requirement but better. > > If I do send another version, do you have any suggestions for how to > change this to make it incompatible? I would swap parameters as this seems simplest. > I guess swapping the order of the > parameters would be best? I considered doing that for v4 but I felt > like long term the current order of the parameters was better. Yes the current ordering is better but having it other way around is not really horrendous either. > I also > considered a rename, but that different problems. ;-) If I rename both > the #define and the function then if someone has an out-of-tree patch > adding arch_trigger_cpumask_backtrace() for another architecture, like > say arm64, then there would be no compile-time failure indicating that > the out-of-tree patch needs updating. I could rename the functions but > _not_ the #define, I guess? I think that swapping would be simplest as the type mismatch should catch also pending out-of-tree potential implementations. -- Michal Hocko SUSE Labs
[PATCH v8 2/2] PCI: rpaphp: Error out on busy status from get-sensor-state
When certain PHB HW failure causes pHyp to recover PHB, it marks the PE state as temporarily unavailable until recovery is complete. This also triggers an EEH handler in Linux which needs to notify drivers, and perform recovery. But before notifying the driver about the PCI error it uses get_adapter_status()->rpaphp_get_sensor_state()->rtas_call(get-sensor-state) operation of the hotplug_slot to determine if the slot contains a device or not. If the slot is empty, the recovery is skipped entirely. eeh_event_handler() ->eeh_handle_normal_event() ->eeh_slot_presence_check() ->get_adapter_status() ->rpaphp_get_sensor_state() ->rtas_get_sensor() ->rtas_call(get-sensor-state) However on certain PHB failures, the RTAS call rtas_call(get-sensor-state) returns extended busy error (9902) until PHB is recovered by pHyp. Once PHB is recovered, the rtas_call(get-sensor-state) returns success with correct presence status. The RTAS call interface rtas_get_sensor() loops over the RTAS call on extended delay return code (9902) until the return value is either success (0) or error (-1). This causes the EEH handler to get stuck for ~6 seconds before it could notify that the PCI error has been detected and stop any active operations. Hence with running I/O traffic, during this 6 seconds, the network driver continues its operation and hits a timeout (netdev watchdog). [52732.244731] DEBUG: ibm_read_slot_reset_state2() [52732.244762] DEBUG: ret = 0, rets[0]=5, rets[1]=1, rets[2]=4000, rets[3]=> [52732.244798] DEBUG: in eeh_slot_presence_check [52732.244804] DEBUG: error state check [52732.244807] DEBUG: Is slot hotpluggable [52732.244810] DEBUG: hotpluggable ops ? [52732.244953] DEBUG: Calling ops->get_adapter_status [52732.244958] DEBUG: calling rpaphp_get_sensor_state [52736.564262] [ cut here ] [52736.564299] NETDEV WATCHDOG: enP64p1s0f3 (tg3): transmit queue 0 timed o> [52736.564324] WARNING: CPU: 1442 PID: 0 at net/sched/sch_generic.c:478 dev> [...] [52736.564505] NIP [c0c32368] dev_watchdog+0x438/0x440 [52736.564513] LR [c0c32364] dev_watchdog+0x434/0x440 On timeouts, network driver starts dumping debug information to console (e.g bnx2 driver calls bnx2x_panic_dump()), and go into recovery path while pHyp is still recovering the PHB. As part of recovery, the driver tries to reset the device and it keeps failing since every PCI read/write returns ff's. And when EEH recovery kicks-in, the driver is unable to recover the device. This impacts the ssh connection and leads to the system being inaccessible. To get the NIC working again it needs a reboot or re-assign the I/O adapter from HMC. [ 9531.168587] EEH: Beginning: 'slot_reset' [ 9531.168601] PCI 0013:01:00.0#1: EEH: Invoking bnx2x->slot_reset() [...] [ 9614.110094] bnx2x: [bnx2x_func_stop:9129(enP19p1s0f0)]FUNC_STOP ramrod failed. Running a dry transaction [ 9614.110300] bnx2x: [bnx2x_igu_int_disable:902(enP19p1s0f0)]BUG! Proper val not read from IGU! [ 9629.178067] bnx2x: [bnx2x_fw_command:3055(enP19p1s0f0)]FW failed to respond! [ 9629.178085] bnx2x 0013:01:00.0 enP19p1s0f0: bc 7.10.4 [ 9629.178091] bnx2x: [bnx2x_fw_dump_lvl:789(enP19p1s0f0)]Cannot dump MCP info while in PCI error [ 9644.241813] bnx2x: [bnx2x_io_slot_reset:14245(enP19p1s0f0)]IO slot reset --> driver unload [...] [ 9644.241819] PCI 0013:01:00.0#1: EEH: bnx2x driver reports: 'disconnect' [ 9644.241823] PCI 0013:01:00.1#1: EEH: Invoking bnx2x->slot_reset() [ 9644.241827] bnx2x: [bnx2x_io_slot_reset:14229(enP19p1s0f1)]IO slot reset initializing... [ 9644.241916] bnx2x 0013:01:00.1: enabling device (0140 -> 0142) [ 9644.258604] bnx2x: [bnx2x_io_slot_reset:14245(enP19p1s0f1)]IO slot reset --> driver unload [ 9644.258612] PCI 0013:01:00.1#1: EEH: bnx2x driver reports: 'disconnect' [ 9644.258615] EEH: Finished:'slot_reset' with aggregate recovery state:'disconnect' [ 9644.258620] EEH: Unable to recover from failure from PHB#13-PE#1. [ 9644.261811] EEH: Beginning: 'error_detected(permanent failure)' [...] [ 9644.261823] EEH: Finished:'error_detected(permanent failure)' Hence, it becomes important to inform driver about the PCI error detection as early as possible, so that driver is aware of PCI error and waits for EEH handler's next action for successful recovery. Current implementation uses rtas_get_sensor() API which blocks the slot check state until RTAS call returns success. To avoid this, fix the PCI hotplug driver (rpaphp) to return an error (-EBUSY) if the slot presence state can not be detected immediately while PE is in EEH recovery state. Change rpaphp_get_sensor_state() to invoke rtas_call(get-sensor-state) directly only if the respective PE is in EEH recovery state, and take actions based on RTAS return status. This way EEH handler will not be blocked on rpaphp_get_sensor_state() and can immediately notify driver about the PCI error and stop any active
[PATCH v8 1/2] powerpc/rtas: Rename rtas_error_rc to rtas_generic_errno
rtas_generic_errno() function will convert the generic rtas return codes into errno. Also, #define descriptive names for rtas return codes and use it instead of numeric values. Signed-off-by: Mahesh Salgaonkar --- (no changes since v7) Change in V7: - Until v6 there was only one patch with subject "PCI hotplug: rpaphp: Error out on busy status from get-sensor-state". Starting from v7, adding this new patch to introduce rtas_generic_errno() to handle generic rtas error codes. https://lore.kernel.org/all/20220429162545.GA79541@bhelgaas/ --- arch/powerpc/include/asm/rtas.h | 10 +++ arch/powerpc/kernel/rtas.c | 53 --- 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 3abe15ac79db1..5572a0a2f6e18 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -202,7 +202,9 @@ typedef struct { #define RTAS_USER_REGION_SIZE (64 * 1024) /* RTAS return status codes */ -#define RTAS_BUSY -2/* RTAS Busy */ +#define RTAS_HARDWARE_ERROR(-1) /* Hardware Error */ +#define RTAS_BUSY (-2) /* RTAS Busy */ +#define RTAS_INVALID_PARAMETER (-3) /* Invalid indicator/domain/sensor etc. */ #define RTAS_EXTENDED_DELAY_MIN9900 #define RTAS_EXTENDED_DELAY_MAX9905 @@ -212,6 +214,11 @@ typedef struct { #define RTAS_THREADS_ACTIVE -9005 /* Multiple processor threads active */ #define RTAS_OUTSTANDING_COPROC -9006 /* Outstanding coprocessor operations */ +/* statuses specific to get-sensor-state */ +#define RTAS_SLOT_UNISOLATED (-9000) +#define RTAS_SLOT_NOT_UNISOLATED (-9001) +#define RTAS_SLOT_NOT_USABLE (-9002) + /* RTAS event classes */ #define RTAS_INTERNAL_ERROR0x8000 /* set bit 0 */ #define RTAS_EPOW_WARNING 0x4000 /* set bit 1 */ @@ -425,6 +432,7 @@ extern int rtas_set_indicator(int indicator, int index, int new_value); extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); int rtas_ibm_suspend_me(int *fw_status); +int rtas_generic_errno(int rtas_rc); struct rtc_time; extern time64_t rtas_get_boot_time(void); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index c087320ff..80b6099e8ce20 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1330,33 +1330,34 @@ bool __ref rtas_busy_delay(int status) } EXPORT_SYMBOL_GPL(rtas_busy_delay); -static int rtas_error_rc(int rtas_rc) +int rtas_generic_errno(int rtas_rc) { int rc; switch (rtas_rc) { - case -1:/* Hardware Error */ - rc = -EIO; - break; - case -3:/* Bad indicator/domain/etc */ - rc = -EINVAL; - break; - case -9000: /* Isolation error */ - rc = -EFAULT; - break; - case -9001: /* Outstanding TCE/PTE */ - rc = -EEXIST; - break; - case -9002: /* No usable slot */ - rc = -ENODEV; - break; - default: - pr_err("%s: unexpected error %d\n", __func__, rtas_rc); - rc = -ERANGE; - break; + case RTAS_HARDWARE_ERROR: /* Hardware Error */ + rc = -EIO; + break; + case RTAS_INVALID_PARAMETER:/* Bad indicator/domain/etc */ + rc = -EINVAL; + break; + case RTAS_SLOT_UNISOLATED: /* Isolation error */ + rc = -EFAULT; + break; + case RTAS_SLOT_NOT_UNISOLATED: /* Outstanding TCE/PTE */ + rc = -EEXIST; + break; + case RTAS_SLOT_NOT_USABLE: /* No usable slot */ + rc = -ENODEV; + break; + default: + pr_err("%s: unexpected error %d\n", __func__, rtas_rc); + rc = -ERANGE; + break; } return rc; } +EXPORT_SYMBOL(rtas_generic_errno); int rtas_get_power_level(int powerdomain, int *level) { @@ -1370,7 +1371,7 @@ int rtas_get_power_level(int powerdomain, int *level) udelay(1); if (rc < 0) - return rtas_error_rc(rc); + return rtas_generic_errno(rc); return rc; } EXPORT_SYMBOL_GPL(rtas_get_power_level); @@ -1388,7 +1389,7 @@ int rtas_set_power_level(int powerdomain, int level, int *setlevel) } while (rtas_busy_delay(rc)); if (rc < 0) - return rtas_error_rc(rc); + return rtas_generic_errno(rc); return rc; } EXPORT_SYMBOL_GPL(rtas_set_power_level); @@
Re: [PATCH] perf test: Fix parse-events tests to skip parametrized events
> On 07-Aug-2023, at 10:20 AM, Athira Rajeev > wrote: > > Testcase "Parsing of all PMU events from sysfs" parse events for > all PMUs, and not just cpu. In case of powerpc, the PowerVM > environment supports events from hv_24x7 and hv_gpci PMU which > is of example format like below: > > - hv_24x7/CPM_ADJUNCT_INST,domain=?,core=?/ > - hv_gpci/event,partition_id=?/ > > The value for "?" needs to be filled in depending on system > configuration. It is better to skip these parametrized events > in this test as it is done in: > 'commit b50d691e50e6 ("perf test: Fix "all PMU test" to skip > parametrized events")' which handled a simialr instance with > "all PMU test". > > Fix parse-events test to skip parametrized events since > it needs proper setup of the parameters. > > Signed-off-by: Athira Rajeev > — Thanks Athira for the fix. With this fix applied the reported problem Is fixed. 6.1: Test event parsing: Ok 6.2: Parsing of all PMU events from sysfs : Ok 6.3: Parsing of given PMU events from sysfs: Ok Tested-by: Sachin Sant - Sachin
Re: [PATCH] tools/perf: Fix bpf__probe to set bpf_prog_type type only if differs from the desired one
> On 07-Aug-2023, at 10:22 AM, Athira Rajeev > wrote: > > The test "BPF prologue generation" fails as below: > > Writing event: p:perf_bpf_probe/func _text+10423200 f_mode=+20(%gpr3):x32 > offset=%gpr4:s64 orig=%gpr5:s32 > In map_prologue, ntevs=1 > mapping[0]=0 > libbpf: prog 'bpf_func__null_lseek': BPF program load failed: Permission > denied > libbpf: prog 'bpf_func__null_lseek': -- BEGIN PROG LOAD LOG -- > btf_vmlinux is malformed > reg type unsupported for arg#0 function bpf_func__null_lseek#5 > 0: R1=ctx(off=0,imm=0) R10=fp0 > ; > 0: (57) r3 &= 2 > R3 !read_ok > processed 1 insns (limit 100) max_states_per_insn 0 total_states 0 > peak_states 0 mark_read 0 > -- END PROG LOAD LOG -- > libbpf: prog 'bpf_func__null_lseek': failed to load: -13 > libbpf: failed to load object '[bpf_prologue_test]' > bpf: load objects failed: err=-13: (Permission denied) > Failed to add events selected by BPF > > This fails occurs after this commit: > commit d6e6286a12e7 ("libbpf: disassociate section handler > on explicit bpf_program__set_type() call")' > > With this change, SEC_DEF handler libbpf which is determined > initially based on program's SEC() is set to NULL. The change > is made because sec_def is not valid when user sets the program > type with bpf_program__set_type function. This commit also fixed > bpf_prog_test_load() helper in selftests/bpf to force-set program > type only if it differs from the desired one. > > The "bpf__probe" function in util/bpf-loader.c, also calls > bpf_program__set_type to set bpf_prog_type. Add similar fix in > here as well to avoid setting sec_def to NULL. > > Reported-by: Sachin Sant > Signed-off-by: Athira Rajeev > --- Thanks Athira for the fix. With this patch applied perf BPF prologue sub test works correctly. 42: BPF filter : 42.1: Basic BPF filtering: Ok 42.2: BPF pinning : Ok 42.3: BPF prologue generation : Ok Tested-by: Sachin Sant Can you please use the above mentioned id(without vnet) in the reported-by ? - Sachin