[PATCH 6/6] ACPI/OSL: Fix performance issue in system memory lockings.
It is reported that the synchronize_rcu() used in the hot path is not performance friendly: <6>[3.998532] acpi_ut_update_object_reference: obj:88003c305f78 type:10 <6>[4.006137] acpi_ut_update_ref_count:locked count:1 action:1 flags:286 <6>[4.013450] acpi_ut_delete_internal_obj: obj:88003c305f78 flag:2 <6>[4.020569] acpi_ut_delete_internal_obj:second_desc handler_desc:88003c0784c8 flags:1 <6>[4.029729] acpi_ut_delete_internal_obj: going to setup @ 81489299 <6>[4.037431] acpi_ev_system_memory_region_setup: addr:c90b0070, length:16 *<6>[4.045716] acpi_os_unmap_memory: locked *<6>[4.050111] acpi_os_unmap_memory: found map *<6>[4.054798] acpi_os_unmap_memory: map dropped *<6>[4.059678] acpi_os_unmap_memory: unlocked *<6>[ 122.761255] acpi_os_map_cleanup: after synchronize_rcu *<6>[ 122.767027] acpi_os_unmap_memory: end *<6>[ 122.771134] After unmap *<6>[ 122.773877] After ACPI_FREE <6>[ 122.777010] acpi_ut_delete_internal_obj: after setup <6>[ 122.782576] acpi_ut_update_object_reference: obj:88003c0784c8 type:24 <6>[ 122.790183] acpi_ut_update_ref_count:locked count:1c action:1 flags:282 <6>[ 122.797595] acpi_ut_delete_internal_obj: handler_desc removed <6>[ 122.804034] acpi_ut_delete_internal_obj: second_desc deleted <6>[ 122.810376] acpi_ut_delete_internal_obj: object deleted <6>[ 122.816231] acpi_ns_detach_object: reference removed Note: The "*" marked acpi_os_unmap_memory() instrumentation messages are added by the reporter. The patch converts RCU synchronization into the deferred executed work queue item to eliminate the consumed time from the hot path. By doing so, acpi_os_read_memory()/acpi_os_write_memory() need to hold the reference count and the RCU locks will then be unnecessary. The original acpi_ioremap_lock (renamed to acpi_map_lock) must be converted into spinlock to be used for both the IRQ and the task contexts. But it is still needed that the map operations are serialized because the spinlock cannot be held around ioremap(), so that the parallel map operations can wait for each other instead of inserting duplicated entries into the acpi_ioremaps. This is achieved by introducing another acpi_serial_map_mutex. Note that the unmap serialization is not required due to the reference count protected list removal, thus acpi_os_unmap_iomem() and acpi_map_relinquish() are not protected by the acpi_serial_map_mutex. The test feedback from the reporter: "The google reboot stability test passed with your 6 patches. Thanks for the help." Known issues: 1. It is better to use a seperate work queue other than the ACPICA dedicated GPE work queue to perform mapping relinquishment. Until a real case can be seen on bugzilla.kernel.org complaining this, no further improvements are done in this patch. Signed-off-by: Lv Zheng Reported-and-tested-by: Fei Yang --- drivers/acpi/mem.c | 87 ++-- 1 file changed, 58 insertions(+), 29 deletions(-) diff --git a/drivers/acpi/mem.c b/drivers/acpi/mem.c index c7c35f7..b631007 100644 --- a/drivers/acpi/mem.c +++ b/drivers/acpi/mem.c @@ -30,13 +30,13 @@ struct acpi_ioremap { unsigned long refcount; }; +/* ioremap() serialization, no need to serialize iounmap() operations */ +static DEFINE_MUTEX(acpi_serial_map_mutex); + static LIST_HEAD(acpi_ioremaps); -static DEFINE_MUTEX(acpi_ioremap_lock); +static DEFINE_SPINLOCK(acpi_map_lock); -/* - * The following functions must be called with 'acpi_ioremap_lock' or RCU - * read lock held. - */ +/* The following functions must be called with 'acpi_map_lock' held. */ static inline void acpi_map_get(struct acpi_ioremap *map) { map->refcount++; @@ -45,7 +45,7 @@ static inline void acpi_map_get(struct acpi_ioremap *map) static inline void acpi_map_put(struct acpi_ioremap *map) { if (!--map->refcount) - list_del_rcu(>list); + list_del(>list); } static struct acpi_ioremap * @@ -53,7 +53,7 @@ acpi_map_lookup_phys(acpi_physical_address phys, acpi_size size) { struct acpi_ioremap *map; - list_for_each_entry_rcu(map, _ioremaps, list) + list_for_each_entry(map, _ioremaps, list) if (map->phys <= phys && phys + size <= map->phys + map->size) return map; @@ -66,7 +66,7 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size) { struct acpi_ioremap *map; - list_for_each_entry_rcu(map, _ioremaps, list) + list_for_each_entry(map, _ioremaps, list) if (map->virt <= virt && virt + size <= map->virt + map->size) return map; @@ -80,6 +80,7 @@ acpi_map2virt(struct acpi_ioremap *map, acpi_physical_address phys) return map ? map->virt + (phys - map->phys) : NULL; } +/* The following functions must be called without 'acpi_map_lock' held. */ #ifndef
[PATCH 6/6] ACPI/OSL: Fix performance issue in system memory lockings.
It is reported that the synchronize_rcu() used in the hot path is not performance friendly: 6[3.998532] acpi_ut_update_object_reference: obj:88003c305f78 type:10 6[4.006137] acpi_ut_update_ref_count:locked count:1 action:1 flags:286 6[4.013450] acpi_ut_delete_internal_obj: obj:88003c305f78 flag:2 6[4.020569] acpi_ut_delete_internal_obj:second_desc handler_desc:88003c0784c8 flags:1 6[4.029729] acpi_ut_delete_internal_obj: going to setup @ 81489299 6[4.037431] acpi_ev_system_memory_region_setup: addr:c90b0070, length:16 *6[4.045716] acpi_os_unmap_memory: locked *6[4.050111] acpi_os_unmap_memory: found map *6[4.054798] acpi_os_unmap_memory: map dropped *6[4.059678] acpi_os_unmap_memory: unlocked *6[ 122.761255] acpi_os_map_cleanup: after synchronize_rcu *6[ 122.767027] acpi_os_unmap_memory: end *6[ 122.771134] After unmap *6[ 122.773877] After ACPI_FREE 6[ 122.777010] acpi_ut_delete_internal_obj: after setup 6[ 122.782576] acpi_ut_update_object_reference: obj:88003c0784c8 type:24 6[ 122.790183] acpi_ut_update_ref_count:locked count:1c action:1 flags:282 6[ 122.797595] acpi_ut_delete_internal_obj: handler_desc removed 6[ 122.804034] acpi_ut_delete_internal_obj: second_desc deleted 6[ 122.810376] acpi_ut_delete_internal_obj: object deleted 6[ 122.816231] acpi_ns_detach_object: reference removed Note: The * marked acpi_os_unmap_memory() instrumentation messages are added by the reporter. The patch converts RCU synchronization into the deferred executed work queue item to eliminate the consumed time from the hot path. By doing so, acpi_os_read_memory()/acpi_os_write_memory() need to hold the reference count and the RCU locks will then be unnecessary. The original acpi_ioremap_lock (renamed to acpi_map_lock) must be converted into spinlock to be used for both the IRQ and the task contexts. But it is still needed that the map operations are serialized because the spinlock cannot be held around ioremap(), so that the parallel map operations can wait for each other instead of inserting duplicated entries into the acpi_ioremaps. This is achieved by introducing another acpi_serial_map_mutex. Note that the unmap serialization is not required due to the reference count protected list removal, thus acpi_os_unmap_iomem() and acpi_map_relinquish() are not protected by the acpi_serial_map_mutex. The test feedback from the reporter: The google reboot stability test passed with your 6 patches. Thanks for the help. Known issues: 1. It is better to use a seperate work queue other than the ACPICA dedicated GPE work queue to perform mapping relinquishment. Until a real case can be seen on bugzilla.kernel.org complaining this, no further improvements are done in this patch. Signed-off-by: Lv Zheng lv.zh...@intel.com Reported-and-tested-by: Fei Yang fei.y...@intel.com --- drivers/acpi/mem.c | 87 ++-- 1 file changed, 58 insertions(+), 29 deletions(-) diff --git a/drivers/acpi/mem.c b/drivers/acpi/mem.c index c7c35f7..b631007 100644 --- a/drivers/acpi/mem.c +++ b/drivers/acpi/mem.c @@ -30,13 +30,13 @@ struct acpi_ioremap { unsigned long refcount; }; +/* ioremap() serialization, no need to serialize iounmap() operations */ +static DEFINE_MUTEX(acpi_serial_map_mutex); + static LIST_HEAD(acpi_ioremaps); -static DEFINE_MUTEX(acpi_ioremap_lock); +static DEFINE_SPINLOCK(acpi_map_lock); -/* - * The following functions must be called with 'acpi_ioremap_lock' or RCU - * read lock held. - */ +/* The following functions must be called with 'acpi_map_lock' held. */ static inline void acpi_map_get(struct acpi_ioremap *map) { map-refcount++; @@ -45,7 +45,7 @@ static inline void acpi_map_get(struct acpi_ioremap *map) static inline void acpi_map_put(struct acpi_ioremap *map) { if (!--map-refcount) - list_del_rcu(map-list); + list_del(map-list); } static struct acpi_ioremap * @@ -53,7 +53,7 @@ acpi_map_lookup_phys(acpi_physical_address phys, acpi_size size) { struct acpi_ioremap *map; - list_for_each_entry_rcu(map, acpi_ioremaps, list) + list_for_each_entry(map, acpi_ioremaps, list) if (map-phys = phys phys + size = map-phys + map-size) return map; @@ -66,7 +66,7 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size) { struct acpi_ioremap *map; - list_for_each_entry_rcu(map, acpi_ioremaps, list) + list_for_each_entry(map, acpi_ioremaps, list) if (map-virt = virt virt + size = map-virt + map-size) return map; @@ -80,6 +80,7 @@ acpi_map2virt(struct acpi_ioremap *map, acpi_physical_address phys) return map ? map-virt + (phys - map-phys) : NULL; } +/* The following functions must be called without 'acpi_map_lock' held. */ #ifndef