Supplies the required functionality to expose information and
statistics over sysfs for a given peer memory client.

This mechanism enables userspace application to check
which peers are available (based on name & version) and based on that
decides whether it can run successfully.

Root sysfs directory is /sys/kernel/mm/<peer_name>, under that directory
will reside some files that represent the statistics for that peer.

Signed-off-by: Yishai Hadas <yish...@mellanox.com>
Signed-off-by: Shachar Raindel <rain...@mellanox.com>
---
 Documentation/infiniband/peer_memory.txt |   64 +++++++++
 drivers/infiniband/core/peer_mem.c       |  211 +++++++++++++++++++++++++++++-
 drivers/infiniband/core/umem.c           |    6 +
 include/rdma/ib_peer_mem.h               |   13 ++
 4 files changed, 293 insertions(+), 1 deletions(-)
 create mode 100644 Documentation/infiniband/peer_memory.txt

diff --git a/Documentation/infiniband/peer_memory.txt 
b/Documentation/infiniband/peer_memory.txt
new file mode 100644
index 0000000..c09cde2
--- /dev/null
+++ b/Documentation/infiniband/peer_memory.txt
@@ -0,0 +1,64 @@
+Peer-Direct technology allows RDMA operations to directly target
+memory in external hardware devices, such as GPU cards, SSD based
+storage, dedicated ASIC accelerators, etc.
+
+This technology allows RDMA-based (over InfiniBand/RoCE) application
+to avoid unneeded data copying when sharing data between peer hardware
+devices.
+
+This file contains documentation for the sysfs interface provided by
+the feature. For documentation of the kernel level interface that peer
+memory clients should implement, please refer to the API documentation
+in include/rdma/peer_mem.h
+
+From the user application perspective, it is free to perform memory
+registration using pointers and handles provided by peer memory
+clients (i.e. OpenCL, Cuda, FPGA-specific handles, etc.). The kernel
+will transparently select the appropriate peer memory client to
+perform the memory registration, as needed.
+
+
+The peer-memory subsystem allows the user to monitor the current usage
+of the technology through a basic sysfs interface. For each peer
+memory client (i.e. GPU type, FPGA, etc.), the following files are
+created:
+
+* /sys/kernel/mm/memory_peers/<peer_name>/version - the version string
+  of the peer memory client
+
+* /sys/kernel/mm/memory_peers/<peer_name>/num_alloc_mrs - the number
+  of memory regions allocated using this peers memory. Note that this
+  counter is not decreased during de-registration of memory regions,
+  it is monotonically increasing. To get the number of memory regions
+  currently allocated on this peer, subtract the value of
+  num_dealloc_mrs from this counter.
+
+* /sys/kernel/mm/memory_peers/<peer_name>/num_dealloc_mrs - the number
+  of memory regions de-allocated, and were originally using peer
+  memory.
+
+* /sys/kernel/mm/memory_peers/<peer_name>/num_reg_pages - the amount
+  of peer_name's memory pages that have been mapped through peer
+  direct. Note that this is a monotonically increasing counter. To get
+  the number of pages currently mapped, subtract the value of
+  num_dereg_pages from this counter. Also, pay attention to the fact
+  that this counter is using device pages, which might differ in size
+  from the host memory page size.
+
+* /sys/kernel/mm/memory_peers/<peer_name>/num_dereg_pages - the amount
+  of peer memory pages that have been unmapped through peer direct for
+  peer_name.
+
+* /sys/kernel/mm/memory_peers/<peer_name>/num_reg_bytes - the number
+  of bytes that have been mapped through peer direct from
+  peer_name. Note that this is a monotonically increasing counter. To
+  get the number of bytes currently mapped, subtract the value of
+  num_dereg_bytes from this counter.
+
+* /sys/kernel/mm/memory_peers/<peer_name>/num_dereg_bytes - the number
+  of bytes that have been unmapped through peer direct from peer_name.
+
+* /sys/kernel/mm/memory_peers/<peer_name>/num_free_callbacks - the
+  number of times the peer used the "invalidate" callback to free a
+  memory region before the application de-registered the memory
+  region.
diff --git a/drivers/infiniband/core/peer_mem.c 
b/drivers/infiniband/core/peer_mem.c
index d4cf31c..e5e4c0c 100644
--- a/drivers/infiniband/core/peer_mem.c
+++ b/drivers/infiniband/core/peer_mem.c
@@ -36,6 +36,207 @@
 
 static DEFINE_MUTEX(peer_memory_mutex);
 static LIST_HEAD(peer_memory_list);
+static struct kobject *peers_kobj;
+
+static void complete_peer(struct kref *kref);
+static struct ib_peer_memory_client *get_peer_by_kobj(void *kobj);
+static ssize_t version_show(struct kobject *kobj,
+                           struct kobj_attribute *attr, char *buf)
+{
+       struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
+
+       if (ib_peer_client) {
+               sprintf(buf, "%s\n", ib_peer_client->peer_mem->version);
+               kref_put(&ib_peer_client->ref, complete_peer);
+               return strlen(buf);
+       }
+       /* not found - nothing is return */
+       return 0;
+}
+
+static ssize_t num_alloc_mrs_show(struct kobject *kobj,
+                                 struct kobj_attribute *attr, char *buf)
+{
+       struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
+
+       if (ib_peer_client) {
+               sprintf(buf, "%llu\n", 
(u64)atomic64_read(&ib_peer_client->stats.num_alloc_mrs));
+               kref_put(&ib_peer_client->ref, complete_peer);
+               return strlen(buf);
+       }
+       /* not found - nothing is return */
+       return 0;
+}
+
+static ssize_t num_dealloc_mrs_show(struct kobject *kobj,
+                                   struct kobj_attribute *attr, char *buf)
+{
+       struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
+
+       if (ib_peer_client) {
+               sprintf(buf, "%llu\n", 
(u64)atomic64_read(&ib_peer_client->stats.num_dealloc_mrs));
+               kref_put(&ib_peer_client->ref, complete_peer);
+               return strlen(buf);
+       }
+       /* not found - nothing is return */
+       return 0;
+}
+
+static ssize_t num_reg_pages_show(struct kobject *kobj,
+                                 struct kobj_attribute *attr, char *buf)
+{
+       struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
+
+       if (ib_peer_client) {
+               sprintf(buf, "%llu\n", 
(u64)atomic64_read(&ib_peer_client->stats.num_reg_pages));
+               kref_put(&ib_peer_client->ref, complete_peer);
+               return strlen(buf);
+       }
+       /* not found - nothing is return */
+       return 0;
+}
+
+static ssize_t num_dereg_pages_show(struct kobject *kobj,
+                                   struct kobj_attribute *attr, char *buf)
+{
+       struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
+
+       if (ib_peer_client) {
+               sprintf(buf, "%llu\n", 
(u64)atomic64_read(&ib_peer_client->stats.num_dereg_pages));
+               kref_put(&ib_peer_client->ref, complete_peer);
+               return strlen(buf);
+       }
+       /* not found - nothing is return */
+       return 0;
+}
+
+static ssize_t num_reg_bytes_show(struct kobject *kobj,
+                                 struct kobj_attribute *attr, char *buf)
+{
+       struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
+
+       if (ib_peer_client) {
+               sprintf(buf, "%llu\n", 
(u64)atomic64_read(&ib_peer_client->stats.num_reg_bytes));
+               kref_put(&ib_peer_client->ref, complete_peer);
+               return strlen(buf);
+       }
+       /* not found - nothing is return */
+       return 0;
+}
+
+static ssize_t num_dereg_bytes_show(struct kobject *kobj,
+                                   struct kobj_attribute *attr, char *buf)
+{
+       struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
+
+       if (ib_peer_client) {
+               sprintf(buf, "%llu\n", 
(u64)atomic64_read(&ib_peer_client->stats.num_dereg_bytes));
+               kref_put(&ib_peer_client->ref, complete_peer);
+               return strlen(buf);
+       }
+       /* not found - nothing is return */
+       return 0;
+}
+
+static ssize_t num_free_callbacks_show(struct kobject *kobj,
+                                      struct kobj_attribute *attr, char *buf)
+{
+       struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
+
+       if (ib_peer_client) {
+               sprintf(buf, "%lu\n", ib_peer_client->stats.num_free_callbacks);
+               kref_put(&ib_peer_client->ref, complete_peer);
+               return strlen(buf);
+       }
+       /* not found - nothing is return */
+       return 0;
+}
+
+static struct kobj_attribute version_attr = __ATTR_RO(version);
+static struct kobj_attribute num_alloc_mrs = __ATTR_RO(num_alloc_mrs);
+static struct kobj_attribute num_dealloc_mrs = __ATTR_RO(num_dealloc_mrs);
+static struct kobj_attribute num_reg_pages = __ATTR_RO(num_reg_pages);
+static struct kobj_attribute num_dereg_pages = __ATTR_RO(num_dereg_pages);
+static struct kobj_attribute num_reg_bytes = __ATTR_RO(num_reg_bytes);
+static struct kobj_attribute num_dereg_bytes = __ATTR_RO(num_dereg_bytes);
+static struct kobj_attribute num_free_callbacks = 
__ATTR_RO(num_free_callbacks);
+
+static struct attribute *peer_mem_attrs[] = {
+                       &version_attr.attr,
+                       &num_alloc_mrs.attr,
+                       &num_dealloc_mrs.attr,
+                       &num_reg_pages.attr,
+                       &num_dereg_pages.attr,
+                       &num_reg_bytes.attr,
+                       &num_dereg_bytes.attr,
+                       &num_free_callbacks.attr,
+                       NULL,
+};
+
+static void destroy_peer_sysfs(struct ib_peer_memory_client *ib_peer_client)
+{
+       kobject_put(ib_peer_client->kobj);
+       if (list_empty(&peer_memory_list))
+               kobject_put(peers_kobj);
+}
+
+static int create_peer_sysfs(struct ib_peer_memory_client *ib_peer_client)
+{
+       int ret;
+
+       if (list_empty(&peer_memory_list)) {
+               /* creating under /sys/kernel/mm */
+               peers_kobj = kobject_create_and_add("memory_peers", mm_kobj);
+               if (!peers_kobj)
+                       return -ENOMEM;
+       }
+
+       ib_peer_client->peer_mem_attr_group.attrs = peer_mem_attrs;
+       /* Dir alreday was created explicitly to get its kernel object for 
further usage */
+       ib_peer_client->peer_mem_attr_group.name =  NULL;
+       ib_peer_client->kobj = 
kobject_create_and_add(ib_peer_client->peer_mem->name,
+               peers_kobj);
+
+       if (!ib_peer_client->kobj) {
+               ret = -EINVAL;
+               goto free;
+       }
+
+       /* Create the files associated with this kobject */
+       ret = sysfs_create_group(ib_peer_client->kobj,
+                                &ib_peer_client->peer_mem_attr_group);
+       if (ret)
+               goto peer_free;
+
+       return 0;
+
+peer_free:
+       kobject_put(ib_peer_client->kobj);
+
+free:
+       if (list_empty(&peer_memory_list))
+               kobject_put(peers_kobj);
+
+       return ret;
+}
+
+static struct ib_peer_memory_client *get_peer_by_kobj(void *kobj)
+{
+       struct ib_peer_memory_client *ib_peer_client;
+
+       mutex_lock(&peer_memory_mutex);
+       list_for_each_entry(ib_peer_client, &peer_memory_list, core_peer_list) {
+               if (ib_peer_client->kobj == kobj) {
+                       kref_get(&ib_peer_client->ref);
+                       goto found;
+               }
+       }
+
+       ib_peer_client = NULL;
+found:
+       mutex_unlock(&peer_memory_mutex);
+       return ib_peer_client;
+}
 
 /* Caller should be holding the peer client lock, ib_peer_client->lock */
 static struct core_ticket *ib_peer_search_context(struct ib_peer_memory_client 
*ib_peer_client,
@@ -60,6 +261,7 @@ static int ib_invalidate_peer_memory(void *reg_handle, u64 
core_context)
        int need_unlock = 1;
 
        mutex_lock(&ib_peer_client->lock);
+       ib_peer_client->stats.num_free_callbacks += 1;
        core_ticket = ib_peer_search_context(ib_peer_client, core_context);
        if (!core_ticket)
                goto out;
@@ -251,9 +453,15 @@ void *ib_register_peer_memory_client(const struct 
peer_memory_client *peer_clien
        }
 
        mutex_lock(&peer_memory_mutex);
+       if (create_peer_sysfs(ib_peer_client)) {
+               kfree(ib_peer_client);
+               ib_peer_client = NULL;
+               goto end;
+       }
        list_add_tail(&ib_peer_client->core_peer_list, &peer_memory_list);
-       mutex_unlock(&peer_memory_mutex);
+end:
 
+       mutex_unlock(&peer_memory_mutex);
        return ib_peer_client;
 }
 EXPORT_SYMBOL(ib_register_peer_memory_client);
@@ -264,6 +472,7 @@ void ib_unregister_peer_memory_client(void *reg_handle)
 
        mutex_lock(&peer_memory_mutex);
        list_del(&ib_peer_client->core_peer_list);
+       destroy_peer_sysfs(ib_peer_client);
        mutex_unlock(&peer_memory_mutex);
 
        kref_put(&ib_peer_client->ref, complete_peer);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 6655d12..1fa5447 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -86,6 +86,9 @@ static struct ib_umem *peer_umem_get(struct 
ib_peer_memory_client *ib_peer_mem,
        if (ret)
                goto put_pages;
 
+       atomic64_add(umem->nmap, &ib_peer_mem->stats.num_reg_pages);
+       atomic64_add(umem->nmap * umem->page_size, 
&ib_peer_mem->stats.num_reg_bytes);
+       atomic64_inc(&ib_peer_mem->stats.num_alloc_mrs);
        return umem;
 
 put_pages:
@@ -114,6 +117,9 @@ static void peer_umem_release(struct ib_umem *umem)
                            umem->context->device->dma_device);
        peer_mem->put_pages(&umem->sg_head,
                            umem->peer_mem_client_context);
+       atomic64_add(umem->nmap, &ib_peer_mem->stats.num_dereg_pages);
+       atomic64_add(umem->nmap * umem->page_size, 
&ib_peer_mem->stats.num_dereg_bytes);
+       atomic64_inc(&ib_peer_mem->stats.num_dealloc_mrs);
        ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context);
        kfree(umem);
 }
diff --git a/include/rdma/ib_peer_mem.h b/include/rdma/ib_peer_mem.h
index 58e0f99..1b865c8 100644
--- a/include/rdma/ib_peer_mem.h
+++ b/include/rdma/ib_peer_mem.h
@@ -3,6 +3,16 @@
 
 #include <rdma/peer_mem.h>
 
+struct ib_peer_memory_statistics {
+       atomic64_t num_alloc_mrs;
+       atomic64_t num_dealloc_mrs;
+       atomic64_t num_reg_pages;
+       atomic64_t num_dereg_pages;
+       atomic64_t num_reg_bytes;
+       atomic64_t num_dereg_bytes;
+       unsigned long num_free_callbacks;
+};
+
 struct ib_ucontext;
 struct ib_umem;
 struct invalidation_ctx;
@@ -17,6 +27,9 @@ struct ib_peer_memory_client {
        struct mutex lock;
        struct list_head   core_ticket_list;
        u64     last_ticket;
+       struct kobject *kobj;
+       struct attribute_group peer_mem_attr_group;
+       struct ib_peer_memory_statistics stats;
 };
 
 enum ib_peer_mem_flags {
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to