[RFC 4/6] memtrack: Adds the accounting to keep track of all mmaped/unmapped pages.

2016-10-11 Thread Ruchi Kandoi
Since mmaped pages will be accounted by the PSS, memtrack needs a way
to differentiate the total memory that hasn't been accounted for.

Signed-off-by: Ruchi Kandoi <kandoiru...@google.com>
Signed-off-by: Greg Hackmann <ghackm...@google.com>
---
 drivers/misc/memtrack.c   | 175 --
 drivers/staging/android/ion/ion.c |   5 +-
 include/linux/memtrack.h  |  29 +++
 3 files changed, 180 insertions(+), 29 deletions(-)

diff --git a/drivers/misc/memtrack.c b/drivers/misc/memtrack.c
index e5c7e03..4b2d17f 100644
--- a/drivers/misc/memtrack.c
+++ b/drivers/misc/memtrack.c
@@ -22,12 +22,19 @@
 #include 
 #include 
 #include 
+#include 
+
+struct memtrack_vma_list {
+   struct hlist_node node;
+   const struct vm_area_struct *vma;
+};
 
 struct memtrack_handle {
struct memtrack_buffer *buffer;
struct rb_node node;
struct rb_root *root;
struct kref refcount;
+   struct hlist_head vma_list;
 };
 
 static struct kmem_cache *memtrack_handle_cache;
@@ -40,8 +47,8 @@ static DEFINE_IDR(mem_idr);
 static DEFINE_IDA(mem_ida);
 #endif
 
-static void memtrack_buffer_install_locked(struct rb_root *root,
-   struct memtrack_buffer *buffer)
+static struct memtrack_handle *memtrack_handle_find_locked(struct rb_root 
*root,
+   struct memtrack_buffer *buffer, bool alloc)
 {
struct rb_node **new = >rb_node, *parent = NULL;
struct memtrack_handle *handle;
@@ -56,22 +63,38 @@ static void memtrack_buffer_install_locked(struct rb_root 
*root,
} else if (handle->buffer->id < buffer->id) {
new = >rb_right;
} else {
-   kref_get(>refcount);
-   return;
+   return handle;
}
}
 
-   handle = kmem_cache_alloc(memtrack_handle_cache, GFP_KERNEL);
-   if (!handle)
-   return;
+   if (alloc) {
+   handle = kmem_cache_alloc(memtrack_handle_cache, GFP_KERNEL);
+   if (!handle)
+   return NULL;
 
-   handle->buffer = buffer;
-   handle->root = root;
-   kref_init(>refcount);
+   handle->buffer = buffer;
+   handle->root = root;
+   kref_init(>refcount);
+   INIT_HLIST_HEAD(>vma_list);
 
-   rb_link_node(>node, parent, new);
-   rb_insert_color(>node, root);
-   atomic_inc(>buffer->userspace_handles);
+   rb_link_node(>node, parent, new);
+   rb_insert_color(>node, root);
+   atomic_inc(>buffer->userspace_handles);
+   }
+
+   return NULL;
+}
+
+static void memtrack_buffer_install_locked(struct rb_root *root,
+   struct memtrack_buffer *buffer)
+{
+   struct memtrack_handle *handle;
+
+   handle = memtrack_handle_find_locked(root, buffer, true);
+   if (handle) {
+   kref_get(>refcount);
+   return;
+   }
 }
 
 /**
@@ -112,19 +135,41 @@ static void memtrack_handle_destroy(struct kref *ref)
 static void memtrack_buffer_uninstall_locked(struct rb_root *root,
struct memtrack_buffer *buffer)
 {
-   struct rb_node *node = root->rb_node;
+   struct memtrack_handle *handle;
 
-   while (node) {
-   struct memtrack_handle *handle = rb_entry(node,
-   struct memtrack_handle, node);
+   handle = memtrack_handle_find_locked(root, buffer, false);
 
-   if (handle->buffer->id > buffer->id) {
-   node = node->rb_left;
-   } else if (handle->buffer->id < buffer->id) {
-   node = node->rb_right;
-   } else {
-   kref_put(>refcount, memtrack_handle_destroy);
-   return;
+   if (handle)
+   kref_put(>refcount, memtrack_handle_destroy);
+}
+
+static void memtrack_buffer_vm_open_locked(struct rb_root *root,
+   struct memtrack_buffer *buffer,
+   struct memtrack_vma_list *vma_list)
+{
+   struct memtrack_handle *handle;
+
+   handle = memtrack_handle_find_locked(root, buffer, false);
+   if (handle)
+   hlist_add_head(_list->node, >vma_list);
+}
+
+static void memtrack_buffer_vm_close_locked(struct rb_root *root,
+   struct memtrack_buffer *buffer,
+   const struct vm_area_struct *vma)
+{
+   struct memtrack_handle *handle;
+
+   handle = memtrack_handle_find_locked(root, buffer, false);
+   if (handle) {
+   struct memtrack_vma_list *vma_list;
+
+   hlist_for_each_entry(vma_list, >vma_list, node) {
+   if (vma_list->vma == vma) {
+   hlist_del(_list->node);
+   

[RFC 4/6] memtrack: Adds the accounting to keep track of all mmaped/unmapped pages.

2016-10-11 Thread Ruchi Kandoi
Since mmaped pages will be accounted by the PSS, memtrack needs a way
to differentiate the total memory that hasn't been accounted for.

Signed-off-by: Ruchi Kandoi 
Signed-off-by: Greg Hackmann 
---
 drivers/misc/memtrack.c   | 175 --
 drivers/staging/android/ion/ion.c |   5 +-
 include/linux/memtrack.h  |  29 +++
 3 files changed, 180 insertions(+), 29 deletions(-)

diff --git a/drivers/misc/memtrack.c b/drivers/misc/memtrack.c
index e5c7e03..4b2d17f 100644
--- a/drivers/misc/memtrack.c
+++ b/drivers/misc/memtrack.c
@@ -22,12 +22,19 @@
 #include 
 #include 
 #include 
+#include 
+
+struct memtrack_vma_list {
+   struct hlist_node node;
+   const struct vm_area_struct *vma;
+};
 
 struct memtrack_handle {
struct memtrack_buffer *buffer;
struct rb_node node;
struct rb_root *root;
struct kref refcount;
+   struct hlist_head vma_list;
 };
 
 static struct kmem_cache *memtrack_handle_cache;
@@ -40,8 +47,8 @@ static DEFINE_IDR(mem_idr);
 static DEFINE_IDA(mem_ida);
 #endif
 
-static void memtrack_buffer_install_locked(struct rb_root *root,
-   struct memtrack_buffer *buffer)
+static struct memtrack_handle *memtrack_handle_find_locked(struct rb_root 
*root,
+   struct memtrack_buffer *buffer, bool alloc)
 {
struct rb_node **new = >rb_node, *parent = NULL;
struct memtrack_handle *handle;
@@ -56,22 +63,38 @@ static void memtrack_buffer_install_locked(struct rb_root 
*root,
} else if (handle->buffer->id < buffer->id) {
new = >rb_right;
} else {
-   kref_get(>refcount);
-   return;
+   return handle;
}
}
 
-   handle = kmem_cache_alloc(memtrack_handle_cache, GFP_KERNEL);
-   if (!handle)
-   return;
+   if (alloc) {
+   handle = kmem_cache_alloc(memtrack_handle_cache, GFP_KERNEL);
+   if (!handle)
+   return NULL;
 
-   handle->buffer = buffer;
-   handle->root = root;
-   kref_init(>refcount);
+   handle->buffer = buffer;
+   handle->root = root;
+   kref_init(>refcount);
+   INIT_HLIST_HEAD(>vma_list);
 
-   rb_link_node(>node, parent, new);
-   rb_insert_color(>node, root);
-   atomic_inc(>buffer->userspace_handles);
+   rb_link_node(>node, parent, new);
+   rb_insert_color(>node, root);
+   atomic_inc(>buffer->userspace_handles);
+   }
+
+   return NULL;
+}
+
+static void memtrack_buffer_install_locked(struct rb_root *root,
+   struct memtrack_buffer *buffer)
+{
+   struct memtrack_handle *handle;
+
+   handle = memtrack_handle_find_locked(root, buffer, true);
+   if (handle) {
+   kref_get(>refcount);
+   return;
+   }
 }
 
 /**
@@ -112,19 +135,41 @@ static void memtrack_handle_destroy(struct kref *ref)
 static void memtrack_buffer_uninstall_locked(struct rb_root *root,
struct memtrack_buffer *buffer)
 {
-   struct rb_node *node = root->rb_node;
+   struct memtrack_handle *handle;
 
-   while (node) {
-   struct memtrack_handle *handle = rb_entry(node,
-   struct memtrack_handle, node);
+   handle = memtrack_handle_find_locked(root, buffer, false);
 
-   if (handle->buffer->id > buffer->id) {
-   node = node->rb_left;
-   } else if (handle->buffer->id < buffer->id) {
-   node = node->rb_right;
-   } else {
-   kref_put(>refcount, memtrack_handle_destroy);
-   return;
+   if (handle)
+   kref_put(>refcount, memtrack_handle_destroy);
+}
+
+static void memtrack_buffer_vm_open_locked(struct rb_root *root,
+   struct memtrack_buffer *buffer,
+   struct memtrack_vma_list *vma_list)
+{
+   struct memtrack_handle *handle;
+
+   handle = memtrack_handle_find_locked(root, buffer, false);
+   if (handle)
+   hlist_add_head(_list->node, >vma_list);
+}
+
+static void memtrack_buffer_vm_close_locked(struct rb_root *root,
+   struct memtrack_buffer *buffer,
+   const struct vm_area_struct *vma)
+{
+   struct memtrack_handle *handle;
+
+   handle = memtrack_handle_find_locked(root, buffer, false);
+   if (handle) {
+   struct memtrack_vma_list *vma_list;
+
+   hlist_for_each_entry(vma_list, >vma_list, node) {
+   if (vma_list->vma == vma) {
+   hlist_del(_list->node);
+   kfree(vma_list);
+

[RFC 3/6] dma-buf: add memtrack support

2016-10-11 Thread Ruchi Kandoi
Signed-off-by: Greg Hackmann <ghackm...@google.com>
Signed-off-by: Ruchi Kandoi <kandoiru...@google.com>
---
 drivers/dma-buf/dma-buf.c  | 37 ++
 drivers/staging/android/ion/ion.c  | 14 +
 drivers/staging/android/ion/ion_priv.h |  2 ++
 include/linux/dma-buf.h|  5 +
 4 files changed, 58 insertions(+)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index ddaee60..f632c2b 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -297,12 +297,32 @@ static long dma_buf_ioctl(struct file *file,
}
 }
 
+static void dma_buf_installed(struct file *file, struct task_struct *task)
+{
+   struct memtrack_buffer *memtrack =
+   dma_buf_memtrack_buffer(file->private_data);
+
+   if (memtrack)
+   memtrack_buffer_install(memtrack, task);
+}
+
+static void dma_buf_uninstalled(struct file *file, struct task_struct *task)
+{
+   struct memtrack_buffer *memtrack =
+   dma_buf_memtrack_buffer(file->private_data);
+
+   if (memtrack)
+   memtrack_buffer_uninstall(memtrack, task);
+}
+
 static const struct file_operations dma_buf_fops = {
.release= dma_buf_release,
.mmap   = dma_buf_mmap_internal,
.llseek = dma_buf_llseek,
.poll   = dma_buf_poll,
.unlocked_ioctl = dma_buf_ioctl,
+   .installed  = dma_buf_installed,
+   .uninstalled= dma_buf_uninstalled,
 };
 
 /*
@@ -830,6 +850,23 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
 }
 EXPORT_SYMBOL_GPL(dma_buf_vunmap);
 
+/**
+ * dma_buf_memtrack_buffer - returns a memtrack entry associated with dma_buf
+ *
+ * @dmabuf:[in]pointer to dma_buf
+ *
+ * Returns the struct memtrack_buffer associated with this dma_buf's
+ * backing pages.  If memtrack isn't enabled in the kernel, or the dma_buf
+ * exporter doesn't have memtrack support, returns NULL.
+ */
+struct memtrack_buffer *dma_buf_memtrack_buffer(struct dma_buf *dmabuf)
+{
+   if (!dmabuf->ops->memtrack_buffer)
+   return NULL;
+   return dmabuf->ops->memtrack_buffer(dmabuf);
+}
+EXPORT_SYMBOL_GPL(dma_buf_memtrack_buffer);
+
 #ifdef CONFIG_DEBUG_FS
 static int dma_buf_debug_show(struct seq_file *s, void *unused)
 {
diff --git a/drivers/staging/android/ion/ion.c 
b/drivers/staging/android/ion/ion.c
index 396ded5..1c2df54 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -196,6 +196,7 @@ void ion_buffer_destroy(struct ion_buffer *buffer)
buffer->heap->ops->unmap_kernel(buffer->heap, buffer);
buffer->heap->ops->free(buffer);
vfree(buffer->pages);
+   memtrack_buffer_remove(>memtrack_buffer);
kfree(buffer);
 }
 
@@ -458,6 +459,8 @@ struct ion_handle *ion_alloc(struct ion_client *client, 
size_t len,
handle = ERR_PTR(ret);
}
 
+   memtrack_buffer_init(>memtrack_buffer, len);
+
return handle;
 }
 EXPORT_SYMBOL(ion_alloc);
@@ -1013,6 +1016,16 @@ static int ion_dma_buf_end_cpu_access(struct dma_buf 
*dmabuf,
return 0;
 }
 
+static struct memtrack_buffer *ion_memtrack_buffer(struct dma_buf *buffer)
+{
+   if (IS_ENABLED(CONFIG_MEMTRACK) && buffer && buffer->priv) {
+   struct ion_buffer *ion_buffer = buffer->priv;
+
+   return _buffer->memtrack_buffer;
+   }
+   return NULL;
+}
+
 static struct dma_buf_ops dma_buf_ops = {
.map_dma_buf = ion_map_dma_buf,
.unmap_dma_buf = ion_unmap_dma_buf,
@@ -1024,6 +1037,7 @@ static struct dma_buf_ops dma_buf_ops = {
.kunmap_atomic = ion_dma_buf_kunmap,
.kmap = ion_dma_buf_kmap,
.kunmap = ion_dma_buf_kunmap,
+   .memtrack_buffer = ion_memtrack_buffer,
 };
 
 struct dma_buf *ion_share_dma_buf(struct ion_client *client,
diff --git a/drivers/staging/android/ion/ion_priv.h 
b/drivers/staging/android/ion/ion_priv.h
index 3c3b324..74c38eb 100644
--- a/drivers/staging/android/ion/ion_priv.h
+++ b/drivers/staging/android/ion/ion_priv.h
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "ion.h"
 
@@ -78,6 +79,7 @@ struct ion_buffer {
int handle_count;
char task_comm[TASK_COMM_LEN];
pid_t pid;
+   struct memtrack_buffer memtrack_buffer;
 };
 void ion_buffer_destroy(struct ion_buffer *buffer);
 
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index e0b0741..dfcc2d0 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct device;
 struct dma_buf;
@@ -70,6 +71,8 @@ struct dma_buf_attachment;
  * @vmap: [optional] creates a virtual mapping for the buffer into kernel
  *   address space. Same restrictions as for vmap and friends apply.

[RFC 2/6] drivers: misc: add memtrack

2016-10-11 Thread Ruchi Kandoi
Shared-buffer allocators like ion or GEM traditionally call into CMA or
alloc_pages() to get backing memory, meaning these allocations will not
show up in any process's mm counters.  But since these allocations are
often used for things like graphics buffers that can be extremely large,
the user just sees a bunch of pages vanishing from the system without an
explanation.

CONFIG_MEMTRACK adds infrastructure for "blaming" these allocations back
to the processes currently holding a reference to the shared buffer.
This information is exposed to userspace through /proc/[pid]/memtrack.

To use memtrack, the shared memory allocator should:

(1) Embed a struct memtrack_buffer somewhere in the underlying buffer's
metadata, and initialize it with memtrack_buffer_init()

(3) Call memtrack_buffer_{install,uninstall} each time a task takes or
drops a reference to the shared buffer

(3) Call memtrack_buffer_remove() before destroying a tracked buffer

CONFIG_MEMTRACK_DEBUG adds a global list of all buffers tracked by
memtrack, accessible through /sys/kernel/debug/memtrack.  This involves
maintaining a global idr of buffers.  Due to the extra overhead,
CONFIG_MEMTRACK_DEBUG is intended for debugging memory leaks rather than
production use.

Signed-off-by: Greg Hackmann <ghackm...@google.com>
Signed-off-by: Ruchi Kandoi <kandoiru...@google.com>
---
 drivers/misc/Kconfig |  16 +++
 drivers/misc/Makefile|   1 +
 drivers/misc/memtrack.c  | 360 +++
 fs/proc/base.c   |   4 +
 include/linux/memtrack.h |  94 +
 include/linux/sched.h|   3 +
 kernel/fork.c|   4 +
 7 files changed, 482 insertions(+)
 create mode 100644 drivers/misc/memtrack.c
 create mode 100644 include/linux/memtrack.h

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 64971ba..7557fb1 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -766,6 +766,22 @@ config PANEL_BOOT_MESSAGE
  An empty message will only clear the display at driver init time. Any 
other
  printf()-formatted message is valid with newline and escape codes.
 
+config MEMTRACK
+   tristate "Per-pid memory statistics"
+   default n
+   ---help---
+ Keeps track of shared buffers allocated by the process and
+ exports them via /proc//memtrack.
+
+config MEMTRACK_DEBUG
+   tristate "Per-pid memory statistics debug option"
+   depends on MEMTRACK && DEBUG_FS
+   default n
+   ---help---
+ Keeps track of all shared buffers allocated and exports the list
+ via /sys/kernel/debug/memtrack.
+
+ source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 3198336..1fbb084 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -68,3 +68,4 @@ OBJCOPYFLAGS_lkdtm_rodata_objcopy.o := \
 targets += lkdtm_rodata.o lkdtm_rodata_objcopy.o
 $(obj)/lkdtm_rodata_objcopy.o: $(obj)/lkdtm_rodata.o FORCE
$(call if_changed,objcopy)
+obj-$(CONFIG_MEMTRACK)  += memtrack.o
diff --git a/drivers/misc/memtrack.c b/drivers/misc/memtrack.c
new file mode 100644
index 000..e5c7e03
--- /dev/null
+++ b/drivers/misc/memtrack.c
@@ -0,0 +1,360 @@
+/* drivers/misc/memtrack.c
+ *
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct memtrack_handle {
+   struct memtrack_buffer *buffer;
+   struct rb_node node;
+   struct rb_root *root;
+   struct kref refcount;
+};
+
+static struct kmem_cache *memtrack_handle_cache;
+
+static DEFINE_MUTEX(memtrack_id_lock);
+#if IS_ENABLED(CONFIG_MEMTRACK_DEBUG)
+static struct dentry *debugfs_file;
+static DEFINE_IDR(mem_idr);
+#else
+static DEFINE_IDA(mem_ida);
+#endif
+
+static void memtrack_buffer_install_locked(struct rb_root *root,
+   struct memtrack_buffer *buffer)
+{
+   struct rb_node **new = >rb_node, *parent = NULL;
+   struct memtrack_handle *handle;
+
+   while (*new) {
+   struct rb_node *node = *new;
+
+   handle = rb_entry(node, struct memtrack_handle, node);
+   parent = node;
+   if (handle->buffer->id > buffer->id) {
+   new = >rb

[RFC 3/6] dma-buf: add memtrack support

2016-10-11 Thread Ruchi Kandoi
Signed-off-by: Greg Hackmann 
Signed-off-by: Ruchi Kandoi 
---
 drivers/dma-buf/dma-buf.c  | 37 ++
 drivers/staging/android/ion/ion.c  | 14 +
 drivers/staging/android/ion/ion_priv.h |  2 ++
 include/linux/dma-buf.h|  5 +
 4 files changed, 58 insertions(+)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index ddaee60..f632c2b 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -297,12 +297,32 @@ static long dma_buf_ioctl(struct file *file,
}
 }
 
+static void dma_buf_installed(struct file *file, struct task_struct *task)
+{
+   struct memtrack_buffer *memtrack =
+   dma_buf_memtrack_buffer(file->private_data);
+
+   if (memtrack)
+   memtrack_buffer_install(memtrack, task);
+}
+
+static void dma_buf_uninstalled(struct file *file, struct task_struct *task)
+{
+   struct memtrack_buffer *memtrack =
+   dma_buf_memtrack_buffer(file->private_data);
+
+   if (memtrack)
+   memtrack_buffer_uninstall(memtrack, task);
+}
+
 static const struct file_operations dma_buf_fops = {
.release= dma_buf_release,
.mmap   = dma_buf_mmap_internal,
.llseek = dma_buf_llseek,
.poll   = dma_buf_poll,
.unlocked_ioctl = dma_buf_ioctl,
+   .installed  = dma_buf_installed,
+   .uninstalled= dma_buf_uninstalled,
 };
 
 /*
@@ -830,6 +850,23 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
 }
 EXPORT_SYMBOL_GPL(dma_buf_vunmap);
 
+/**
+ * dma_buf_memtrack_buffer - returns a memtrack entry associated with dma_buf
+ *
+ * @dmabuf:[in]pointer to dma_buf
+ *
+ * Returns the struct memtrack_buffer associated with this dma_buf's
+ * backing pages.  If memtrack isn't enabled in the kernel, or the dma_buf
+ * exporter doesn't have memtrack support, returns NULL.
+ */
+struct memtrack_buffer *dma_buf_memtrack_buffer(struct dma_buf *dmabuf)
+{
+   if (!dmabuf->ops->memtrack_buffer)
+   return NULL;
+   return dmabuf->ops->memtrack_buffer(dmabuf);
+}
+EXPORT_SYMBOL_GPL(dma_buf_memtrack_buffer);
+
 #ifdef CONFIG_DEBUG_FS
 static int dma_buf_debug_show(struct seq_file *s, void *unused)
 {
diff --git a/drivers/staging/android/ion/ion.c 
b/drivers/staging/android/ion/ion.c
index 396ded5..1c2df54 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -196,6 +196,7 @@ void ion_buffer_destroy(struct ion_buffer *buffer)
buffer->heap->ops->unmap_kernel(buffer->heap, buffer);
buffer->heap->ops->free(buffer);
vfree(buffer->pages);
+   memtrack_buffer_remove(>memtrack_buffer);
kfree(buffer);
 }
 
@@ -458,6 +459,8 @@ struct ion_handle *ion_alloc(struct ion_client *client, 
size_t len,
handle = ERR_PTR(ret);
}
 
+   memtrack_buffer_init(>memtrack_buffer, len);
+
return handle;
 }
 EXPORT_SYMBOL(ion_alloc);
@@ -1013,6 +1016,16 @@ static int ion_dma_buf_end_cpu_access(struct dma_buf 
*dmabuf,
return 0;
 }
 
+static struct memtrack_buffer *ion_memtrack_buffer(struct dma_buf *buffer)
+{
+   if (IS_ENABLED(CONFIG_MEMTRACK) && buffer && buffer->priv) {
+   struct ion_buffer *ion_buffer = buffer->priv;
+
+   return _buffer->memtrack_buffer;
+   }
+   return NULL;
+}
+
 static struct dma_buf_ops dma_buf_ops = {
.map_dma_buf = ion_map_dma_buf,
.unmap_dma_buf = ion_unmap_dma_buf,
@@ -1024,6 +1037,7 @@ static struct dma_buf_ops dma_buf_ops = {
.kunmap_atomic = ion_dma_buf_kunmap,
.kmap = ion_dma_buf_kmap,
.kunmap = ion_dma_buf_kunmap,
+   .memtrack_buffer = ion_memtrack_buffer,
 };
 
 struct dma_buf *ion_share_dma_buf(struct ion_client *client,
diff --git a/drivers/staging/android/ion/ion_priv.h 
b/drivers/staging/android/ion/ion_priv.h
index 3c3b324..74c38eb 100644
--- a/drivers/staging/android/ion/ion_priv.h
+++ b/drivers/staging/android/ion/ion_priv.h
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "ion.h"
 
@@ -78,6 +79,7 @@ struct ion_buffer {
int handle_count;
char task_comm[TASK_COMM_LEN];
pid_t pid;
+   struct memtrack_buffer memtrack_buffer;
 };
 void ion_buffer_destroy(struct ion_buffer *buffer);
 
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index e0b0741..dfcc2d0 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct device;
 struct dma_buf;
@@ -70,6 +71,8 @@ struct dma_buf_attachment;
  * @vmap: [optional] creates a virtual mapping for the buffer into kernel
  *   address space. Same restrictions as for vmap and friends apply.
  * @vunmap: [optional] unmaps a vmap from the b

[RFC 2/6] drivers: misc: add memtrack

2016-10-11 Thread Ruchi Kandoi
Shared-buffer allocators like ion or GEM traditionally call into CMA or
alloc_pages() to get backing memory, meaning these allocations will not
show up in any process's mm counters.  But since these allocations are
often used for things like graphics buffers that can be extremely large,
the user just sees a bunch of pages vanishing from the system without an
explanation.

CONFIG_MEMTRACK adds infrastructure for "blaming" these allocations back
to the processes currently holding a reference to the shared buffer.
This information is exposed to userspace through /proc/[pid]/memtrack.

To use memtrack, the shared memory allocator should:

(1) Embed a struct memtrack_buffer somewhere in the underlying buffer's
metadata, and initialize it with memtrack_buffer_init()

(3) Call memtrack_buffer_{install,uninstall} each time a task takes or
drops a reference to the shared buffer

(3) Call memtrack_buffer_remove() before destroying a tracked buffer

CONFIG_MEMTRACK_DEBUG adds a global list of all buffers tracked by
memtrack, accessible through /sys/kernel/debug/memtrack.  This involves
maintaining a global idr of buffers.  Due to the extra overhead,
CONFIG_MEMTRACK_DEBUG is intended for debugging memory leaks rather than
production use.

Signed-off-by: Greg Hackmann 
Signed-off-by: Ruchi Kandoi 
---
 drivers/misc/Kconfig |  16 +++
 drivers/misc/Makefile|   1 +
 drivers/misc/memtrack.c  | 360 +++
 fs/proc/base.c   |   4 +
 include/linux/memtrack.h |  94 +
 include/linux/sched.h|   3 +
 kernel/fork.c|   4 +
 7 files changed, 482 insertions(+)
 create mode 100644 drivers/misc/memtrack.c
 create mode 100644 include/linux/memtrack.h

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 64971ba..7557fb1 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -766,6 +766,22 @@ config PANEL_BOOT_MESSAGE
  An empty message will only clear the display at driver init time. Any 
other
  printf()-formatted message is valid with newline and escape codes.
 
+config MEMTRACK
+   tristate "Per-pid memory statistics"
+   default n
+   ---help---
+ Keeps track of shared buffers allocated by the process and
+ exports them via /proc//memtrack.
+
+config MEMTRACK_DEBUG
+   tristate "Per-pid memory statistics debug option"
+   depends on MEMTRACK && DEBUG_FS
+   default n
+   ---help---
+ Keeps track of all shared buffers allocated and exports the list
+ via /sys/kernel/debug/memtrack.
+
+ source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 3198336..1fbb084 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -68,3 +68,4 @@ OBJCOPYFLAGS_lkdtm_rodata_objcopy.o := \
 targets += lkdtm_rodata.o lkdtm_rodata_objcopy.o
 $(obj)/lkdtm_rodata_objcopy.o: $(obj)/lkdtm_rodata.o FORCE
$(call if_changed,objcopy)
+obj-$(CONFIG_MEMTRACK)  += memtrack.o
diff --git a/drivers/misc/memtrack.c b/drivers/misc/memtrack.c
new file mode 100644
index 000..e5c7e03
--- /dev/null
+++ b/drivers/misc/memtrack.c
@@ -0,0 +1,360 @@
+/* drivers/misc/memtrack.c
+ *
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct memtrack_handle {
+   struct memtrack_buffer *buffer;
+   struct rb_node node;
+   struct rb_root *root;
+   struct kref refcount;
+};
+
+static struct kmem_cache *memtrack_handle_cache;
+
+static DEFINE_MUTEX(memtrack_id_lock);
+#if IS_ENABLED(CONFIG_MEMTRACK_DEBUG)
+static struct dentry *debugfs_file;
+static DEFINE_IDR(mem_idr);
+#else
+static DEFINE_IDA(mem_ida);
+#endif
+
+static void memtrack_buffer_install_locked(struct rb_root *root,
+   struct memtrack_buffer *buffer)
+{
+   struct rb_node **new = >rb_node, *parent = NULL;
+   struct memtrack_handle *handle;
+
+   while (*new) {
+   struct rb_node *node = *new;
+
+   handle = rb_entry(node, struct memtrack_handle, node);
+   parent = node;
+   if (handle->buffer->id > buffer->id) {
+   new = >rb_left;
+   } else if (ha

[RFC 5/6] memtrack: Add memtrack accounting for forked processes.

2016-10-11 Thread Ruchi Kandoi
When a process is forked, all the buffers are shared with the forked
process too. Adds the functionality to add memtrack accounting for the
forked processes.

Forked process gets a copy of the mapped pages of the parent process.
This patch makes sure that the new mapped pages are attributed to the
child process instead of the parent.

Signed-off-by: Ruchi Kandoi <kandoiru...@google.com>
---
 drivers/misc/memtrack.c   | 45 +++
 drivers/staging/android/ion/ion.c | 45 +--
 include/linux/memtrack.h  | 19 +++--
 include/linux/mm.h|  3 +++
 kernel/fork.c | 19 +++--
 5 files changed, 117 insertions(+), 14 deletions(-)

diff --git a/drivers/misc/memtrack.c b/drivers/misc/memtrack.c
index 4b2d17f..fa2601a 100644
--- a/drivers/misc/memtrack.c
+++ b/drivers/misc/memtrack.c
@@ -204,12 +204,13 @@ EXPORT_SYMBOL(memtrack_buffer_uninstall);
  * @buffer: the buffer's memtrack entry
  *
  * @vma: vma being opened
+ * @task: task which mapped the pages
  */
 void memtrack_buffer_vm_open(struct memtrack_buffer *buffer,
-   const struct vm_area_struct *vma)
+   const struct vm_area_struct *vma, struct task_struct *task)
 {
unsigned long flags;
-   struct task_struct *leader = current->group_leader;
+   struct task_struct *leader = task->group_leader;
struct memtrack_vma_list *vma_list;
 
vma_list = kmalloc(sizeof(*vma_list), GFP_KERNEL);
@@ -228,12 +229,13 @@ EXPORT_SYMBOL(memtrack_buffer_vm_open);
  *
  * @buffer: the buffer's memtrack entry
  * @vma: the vma being closed
+ * @task: task that mmaped the pages
  */
 void memtrack_buffer_vm_close(struct memtrack_buffer *buffer,
-   const struct vm_area_struct *vma)
+   const struct vm_area_struct *vma, struct task_struct *task)
 {
unsigned long flags;
-   struct task_struct *leader = current->group_leader;
+   struct task_struct *leader = task->group_leader;
 
write_lock_irqsave(>memtrack_lock, flags);
memtrack_buffer_vm_close_locked(>memtrack_rb, buffer, vma);
@@ -241,6 +243,41 @@ void memtrack_buffer_vm_close(struct memtrack_buffer 
*buffer,
 }
 EXPORT_SYMBOL(memtrack_buffer_vm_close);
 
+/**
+ * memtrack_buffer_install_fork - Install all parent's handles into
+ *  child.
+ *
+ * @parent: parent task
+ * @child: child task
+ */
+void memtrack_buffer_install_fork(struct task_struct *parent,
+   struct task_struct *child)
+{
+   struct task_struct *leader, *leader_child;
+   struct rb_root *root;
+   struct rb_node *node;
+   unsigned long flags;
+
+   if (!child || !parent)
+   return;
+
+   leader = parent->group_leader;
+   leader_child = child->group_leader;
+   write_lock_irqsave(>memtrack_lock, flags);
+   root = >memtrack_rb;
+   node = rb_first(root);
+   while (node) {
+   struct memtrack_handle *handle;
+
+   handle = rb_entry(node, struct memtrack_handle, node);
+   memtrack_buffer_install_locked(_child->memtrack_rb,
+   handle->buffer);
+   node = rb_next(node);
+   }
+   write_unlock_irqrestore(>memtrack_lock, flags);
+}
+EXPORT_SYMBOL(memtrack_buffer_install_fork);
+
 static int memtrack_id_alloc(struct memtrack_buffer *buffer)
 {
int ret;
diff --git a/drivers/staging/android/ion/ion.c 
b/drivers/staging/android/ion/ion.c
index c32d520..451aa0f 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -906,7 +906,7 @@ static void ion_vm_open(struct vm_area_struct *vma)
list_add(_list->list, >vmas);
mutex_unlock(>lock);
pr_debug("%s: adding %p\n", __func__, vma);
-   memtrack_buffer_vm_open(>memtrack_buffer, vma);
+   memtrack_buffer_vm_open(>memtrack_buffer, vma, current);
 }
 
 static void ion_vm_close(struct vm_area_struct *vma)
@@ -925,13 +925,51 @@ static void ion_vm_close(struct vm_area_struct *vma)
break;
}
mutex_unlock(>lock);
-   memtrack_buffer_vm_close(>memtrack_buffer, vma);
+   memtrack_buffer_vm_close(>memtrack_buffer, vma, current);
+}
+
+void vm_track(struct vm_area_struct *vma, struct task_struct *task)
+{
+   struct ion_buffer *buffer = vma->vm_private_data;
+
+   memtrack_buffer_vm_open(>memtrack_buffer, vma, task);
+}
+
+void vm_untrack(struct vm_area_struct *vma, struct task_struct *task)
+{
+   struct ion_buffer *buffer = vma->vm_private_data;
+
+   memtrack_buffer_vm_close(>memtrack_buffer, vma, task);
 }
 
 static const struct vm_operations_struct ion_vma_ops = {
.open = ion_vm_open,
.close = ion_vm_close,
.fault = ion_vm_fault,
+   .track = vm_track,
+   .untrack = vm_untrack,
+};
+
+sta

[RFC 5/6] memtrack: Add memtrack accounting for forked processes.

2016-10-11 Thread Ruchi Kandoi
When a process is forked, all the buffers are shared with the forked
process too. Adds the functionality to add memtrack accounting for the
forked processes.

Forked process gets a copy of the mapped pages of the parent process.
This patch makes sure that the new mapped pages are attributed to the
child process instead of the parent.

Signed-off-by: Ruchi Kandoi 
---
 drivers/misc/memtrack.c   | 45 +++
 drivers/staging/android/ion/ion.c | 45 +--
 include/linux/memtrack.h  | 19 +++--
 include/linux/mm.h|  3 +++
 kernel/fork.c | 19 +++--
 5 files changed, 117 insertions(+), 14 deletions(-)

diff --git a/drivers/misc/memtrack.c b/drivers/misc/memtrack.c
index 4b2d17f..fa2601a 100644
--- a/drivers/misc/memtrack.c
+++ b/drivers/misc/memtrack.c
@@ -204,12 +204,13 @@ EXPORT_SYMBOL(memtrack_buffer_uninstall);
  * @buffer: the buffer's memtrack entry
  *
  * @vma: vma being opened
+ * @task: task which mapped the pages
  */
 void memtrack_buffer_vm_open(struct memtrack_buffer *buffer,
-   const struct vm_area_struct *vma)
+   const struct vm_area_struct *vma, struct task_struct *task)
 {
unsigned long flags;
-   struct task_struct *leader = current->group_leader;
+   struct task_struct *leader = task->group_leader;
struct memtrack_vma_list *vma_list;
 
vma_list = kmalloc(sizeof(*vma_list), GFP_KERNEL);
@@ -228,12 +229,13 @@ EXPORT_SYMBOL(memtrack_buffer_vm_open);
  *
  * @buffer: the buffer's memtrack entry
  * @vma: the vma being closed
+ * @task: task that mmaped the pages
  */
 void memtrack_buffer_vm_close(struct memtrack_buffer *buffer,
-   const struct vm_area_struct *vma)
+   const struct vm_area_struct *vma, struct task_struct *task)
 {
unsigned long flags;
-   struct task_struct *leader = current->group_leader;
+   struct task_struct *leader = task->group_leader;
 
write_lock_irqsave(>memtrack_lock, flags);
memtrack_buffer_vm_close_locked(>memtrack_rb, buffer, vma);
@@ -241,6 +243,41 @@ void memtrack_buffer_vm_close(struct memtrack_buffer 
*buffer,
 }
 EXPORT_SYMBOL(memtrack_buffer_vm_close);
 
+/**
+ * memtrack_buffer_install_fork - Install all parent's handles into
+ *  child.
+ *
+ * @parent: parent task
+ * @child: child task
+ */
+void memtrack_buffer_install_fork(struct task_struct *parent,
+   struct task_struct *child)
+{
+   struct task_struct *leader, *leader_child;
+   struct rb_root *root;
+   struct rb_node *node;
+   unsigned long flags;
+
+   if (!child || !parent)
+   return;
+
+   leader = parent->group_leader;
+   leader_child = child->group_leader;
+   write_lock_irqsave(>memtrack_lock, flags);
+   root = >memtrack_rb;
+   node = rb_first(root);
+   while (node) {
+   struct memtrack_handle *handle;
+
+   handle = rb_entry(node, struct memtrack_handle, node);
+   memtrack_buffer_install_locked(_child->memtrack_rb,
+   handle->buffer);
+   node = rb_next(node);
+   }
+   write_unlock_irqrestore(>memtrack_lock, flags);
+}
+EXPORT_SYMBOL(memtrack_buffer_install_fork);
+
 static int memtrack_id_alloc(struct memtrack_buffer *buffer)
 {
int ret;
diff --git a/drivers/staging/android/ion/ion.c 
b/drivers/staging/android/ion/ion.c
index c32d520..451aa0f 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -906,7 +906,7 @@ static void ion_vm_open(struct vm_area_struct *vma)
list_add(_list->list, >vmas);
mutex_unlock(>lock);
pr_debug("%s: adding %p\n", __func__, vma);
-   memtrack_buffer_vm_open(>memtrack_buffer, vma);
+   memtrack_buffer_vm_open(>memtrack_buffer, vma, current);
 }
 
 static void ion_vm_close(struct vm_area_struct *vma)
@@ -925,13 +925,51 @@ static void ion_vm_close(struct vm_area_struct *vma)
break;
}
mutex_unlock(>lock);
-   memtrack_buffer_vm_close(>memtrack_buffer, vma);
+   memtrack_buffer_vm_close(>memtrack_buffer, vma, current);
+}
+
+void vm_track(struct vm_area_struct *vma, struct task_struct *task)
+{
+   struct ion_buffer *buffer = vma->vm_private_data;
+
+   memtrack_buffer_vm_open(>memtrack_buffer, vma, task);
+}
+
+void vm_untrack(struct vm_area_struct *vma, struct task_struct *task)
+{
+   struct ion_buffer *buffer = vma->vm_private_data;
+
+   memtrack_buffer_vm_close(>memtrack_buffer, vma, task);
 }
 
 static const struct vm_operations_struct ion_vma_ops = {
.open = ion_vm_open,
.close = ion_vm_close,
.fault = ion_vm_fault,
+   .track = vm_track,
+   .untrack = vm_untrack,
+};
+
+static void memtrack_vm_close(st

[RFC 6/6] drivers: staging: ion: add ION_IOC_TAG ioctl

2016-10-11 Thread Ruchi Kandoi
From: Greg Hackmann <ghackm...@google.com>

ION_IOC_TAG provides a userspace interface for tagging buffers with
their memtrack usage after allocation.

Signed-off-by: Ruchi Kandoi <kandoiru...@google.com>
---
 drivers/staging/android/ion/ion-ioctl.c | 17 +
 drivers/staging/android/uapi/ion.h  | 25 +
 2 files changed, 42 insertions(+)

diff --git a/drivers/staging/android/ion/ion-ioctl.c 
b/drivers/staging/android/ion/ion-ioctl.c
index 7e7431d..8745a85 100644
--- a/drivers/staging/android/ion/ion-ioctl.c
+++ b/drivers/staging/android/ion/ion-ioctl.c
@@ -28,6 +28,7 @@ union ion_ioctl_arg {
struct ion_handle_data handle;
struct ion_custom_data custom;
struct ion_heap_query query;
+   struct ion_tag_data tag;
 };
 
 static int validate_ioctl_arg(unsigned int cmd, union ion_ioctl_arg *arg)
@@ -162,6 +163,22 @@ long ion_ioctl(struct file *filp, unsigned int cmd, 
unsigned long arg)
case ION_IOC_HEAP_QUERY:
ret = ion_query_heaps(client, );
break;
+   case ION_IOC_TAG:
+   {
+#ifdef CONFIG_MEMTRACK
+   struct ion_handle *handle;
+
+   handle = ion_handle_get_by_id(client, data.tag.handle);
+   if (IS_ERR(handle))
+   return PTR_ERR(handle);
+   data.tag.tag[sizeof(data.tag.tag) - 1] = 0;
+   memtrack_buffer_set_tag(>buffer->memtrack_buffer,
+   data.tag.tag);
+#else
+   ret = -ENOTTY;
+#endif
+   break;
+   }
default:
return -ENOTTY;
}
diff --git a/drivers/staging/android/uapi/ion.h 
b/drivers/staging/android/uapi/ion.h
index 14cd873..4c26196 100644
--- a/drivers/staging/android/uapi/ion.h
+++ b/drivers/staging/android/uapi/ion.h
@@ -115,6 +115,22 @@ struct ion_handle_data {
ion_user_handle_t handle;
 };
 
+#define ION_MAX_TAG_LEN 32
+
+/**
+ * struct ion_fd_data - metadata passed from userspace for a handle
+ * @handle:a handle
+ * @tag: a string describing the buffer
+ *
+ * For ION_IOC_TAG userspace populates the handle field with
+ * the handle returned from ion alloc and type contains the memtrack_type which
+ * accurately describes the usage for the memory.
+ */
+struct ion_tag_data {
+   ion_user_handle_t handle;
+   char tag[ION_MAX_TAG_LEN];
+};
+
 /**
  * struct ion_custom_data - metadata passed to/from userspace for a custom 
ioctl
  * @cmd:   the custom ioctl function to call
@@ -217,6 +233,15 @@ struct ion_heap_query {
 #define ION_IOC_SYNC   _IOWR(ION_IOC_MAGIC, 7, struct ion_fd_data)
 
 /**
+ * DOC: ION_IOC_TAG - adds a memtrack descriptor tag to memory
+ *
+ * Takes an ion_tag_data struct with the type field populated with a
+ * memtrack_type and handle populated with a valid opaque handle. The
+ * memtrack_type should accurately define the usage for the memory.
+ */
+#define ION_IOC_TAG_IOWR(ION_IOC_MAGIC, 8, struct ion_tag_data)
+
+/**
  * DOC: ION_IOC_CUSTOM - call architecture specific ion ioctl
  *
  * Takes the argument of the architecture specific ioctl to call and
-- 
2.8.0.rc3.226.g39d4020



[RFC 6/6] drivers: staging: ion: add ION_IOC_TAG ioctl

2016-10-11 Thread Ruchi Kandoi
From: Greg Hackmann 

ION_IOC_TAG provides a userspace interface for tagging buffers with
their memtrack usage after allocation.

Signed-off-by: Ruchi Kandoi 
---
 drivers/staging/android/ion/ion-ioctl.c | 17 +
 drivers/staging/android/uapi/ion.h  | 25 +
 2 files changed, 42 insertions(+)

diff --git a/drivers/staging/android/ion/ion-ioctl.c 
b/drivers/staging/android/ion/ion-ioctl.c
index 7e7431d..8745a85 100644
--- a/drivers/staging/android/ion/ion-ioctl.c
+++ b/drivers/staging/android/ion/ion-ioctl.c
@@ -28,6 +28,7 @@ union ion_ioctl_arg {
struct ion_handle_data handle;
struct ion_custom_data custom;
struct ion_heap_query query;
+   struct ion_tag_data tag;
 };
 
 static int validate_ioctl_arg(unsigned int cmd, union ion_ioctl_arg *arg)
@@ -162,6 +163,22 @@ long ion_ioctl(struct file *filp, unsigned int cmd, 
unsigned long arg)
case ION_IOC_HEAP_QUERY:
ret = ion_query_heaps(client, );
break;
+   case ION_IOC_TAG:
+   {
+#ifdef CONFIG_MEMTRACK
+   struct ion_handle *handle;
+
+   handle = ion_handle_get_by_id(client, data.tag.handle);
+   if (IS_ERR(handle))
+   return PTR_ERR(handle);
+   data.tag.tag[sizeof(data.tag.tag) - 1] = 0;
+   memtrack_buffer_set_tag(>buffer->memtrack_buffer,
+   data.tag.tag);
+#else
+   ret = -ENOTTY;
+#endif
+   break;
+   }
default:
return -ENOTTY;
}
diff --git a/drivers/staging/android/uapi/ion.h 
b/drivers/staging/android/uapi/ion.h
index 14cd873..4c26196 100644
--- a/drivers/staging/android/uapi/ion.h
+++ b/drivers/staging/android/uapi/ion.h
@@ -115,6 +115,22 @@ struct ion_handle_data {
ion_user_handle_t handle;
 };
 
+#define ION_MAX_TAG_LEN 32
+
+/**
+ * struct ion_fd_data - metadata passed from userspace for a handle
+ * @handle:a handle
+ * @tag: a string describing the buffer
+ *
+ * For ION_IOC_TAG userspace populates the handle field with
+ * the handle returned from ion alloc and type contains the memtrack_type which
+ * accurately describes the usage for the memory.
+ */
+struct ion_tag_data {
+   ion_user_handle_t handle;
+   char tag[ION_MAX_TAG_LEN];
+};
+
 /**
  * struct ion_custom_data - metadata passed to/from userspace for a custom 
ioctl
  * @cmd:   the custom ioctl function to call
@@ -217,6 +233,15 @@ struct ion_heap_query {
 #define ION_IOC_SYNC   _IOWR(ION_IOC_MAGIC, 7, struct ion_fd_data)
 
 /**
+ * DOC: ION_IOC_TAG - adds a memtrack descriptor tag to memory
+ *
+ * Takes an ion_tag_data struct with the type field populated with a
+ * memtrack_type and handle populated with a valid opaque handle. The
+ * memtrack_type should accurately define the usage for the memory.
+ */
+#define ION_IOC_TAG_IOWR(ION_IOC_MAGIC, 8, struct ion_tag_data)
+
+/**
  * DOC: ION_IOC_CUSTOM - call architecture specific ion ioctl
  *
  * Takes the argument of the architecture specific ioctl to call and
-- 
2.8.0.rc3.226.g39d4020



[RFC 0/6] Module for tracking/accounting shared memory buffers

2016-10-11 Thread Ruchi Kandoi
This patchstack introduces a new "memtrack" module for tracking and accounting
memory exported to userspace as shared buffers, like dma-buf fds or GEM handles.

Any process holding a reference to these buffers will keep the kernel from
reclaiming its backing pages.  mm counters don't provide a complete picture of
these allocations, since they only account for pages that are mapped into a
process's address space.  This problem is especially bad for systems like
Android that use dma-buf fds to share graphics and multimedia buffers between
processes: these allocations are often large, have complex sharing patterns,
and are rarely mapped into every process that holds a reference to them.

memtrack maintains a per-process list of shared buffer references, which is
exported to userspace as /proc/[pid]/memtrack.  Buffers can be optionally
"tagged" with a short string: for example, Android userspace would use this
tag to identify whether buffers were allocated on behalf of the camera stack,
GL, etc.  memtrack also exports the VMAs associated with these buffers so
that pages already included in the process's mm counters aren't double-counted.

Shared-buffer allocators can hook into memtrack by embedding
struct memtrack_buffer in their buffer metadata, calling
memtrack_buffer_{init,remove} at buffer allocation and free time, and
memtrack_buffer_{install,uninstall} when a userspace process takes or
drops a reference to the buffer.  For fd-backed buffers like dma-bufs, hooks in
fdtable.c and fork.c automatically notify memtrack when references are added or
removed from a process's fd table.

This patchstack adds memtrack hooks into dma-buf and ion.  If there's upstream
interest in memtrack, it can be extended to other memory allocators as well,
such as GEM implementations.

Greg Hackmann (1):
  drivers: staging: ion: add ION_IOC_TAG ioctl

Ruchi Kandoi (5):
  fs: add installed and uninstalled file_operations
  drivers: misc: add memtrack
  dma-buf: add memtrack support
  memtrack: Adds the accounting to keep track of all mmaped/unmapped
pages.
  memtrack: Add memtrack accounting for forked processes.

 drivers/android/binder.c|   4 +-
 drivers/dma-buf/dma-buf.c   |  37 +++
 drivers/misc/Kconfig|  16 +
 drivers/misc/Makefile   |   1 +
 drivers/misc/memtrack.c | 516 
 drivers/staging/android/ion/ion-ioctl.c |  17 ++
 drivers/staging/android/ion/ion.c   |  60 +++-
 drivers/staging/android/ion/ion_priv.h  |   2 +
 drivers/staging/android/uapi/ion.h  |  25 ++
 fs/file.c   |  38 ++-
 fs/open.c   |   2 +-
 fs/proc/base.c  |   4 +
 include/linux/dma-buf.h |   5 +
 include/linux/fdtable.h |   4 +-
 include/linux/fs.h  |   2 +
 include/linux/memtrack.h| 130 
 include/linux/mm.h  |   3 +
 include/linux/sched.h   |   3 +
 kernel/fork.c   |  23 +-
 19 files changed, 875 insertions(+), 17 deletions(-)
 create mode 100644 drivers/misc/memtrack.c
 create mode 100644 include/linux/memtrack.h

-- 
2.8.0.rc3.226.g39d4020



[RFC 1/6] fs: add installed and uninstalled file_operations

2016-10-11 Thread Ruchi Kandoi
These optional file_operations notify a file implementation when it is
installed or uninstalled from a task's fd table.  This can be used for
accounting of file-backed shared resources like dma-buf.

This involves some changes to the __fd_install() and __close_fd() APIs
to actually pass along the responsible task_struct.  These are low-level
APIs with only two in-tree callers, both adjusted in this patch.

Signed-off-by: Greg Hackmann <ghackm...@google.com>
Signed-off-by: Ruchi Kandoi <kandoiru...@google.com>
---
 drivers/android/binder.c |  4 ++--
 fs/file.c| 38 +-
 fs/open.c|  2 +-
 include/linux/fdtable.h  |  4 ++--
 include/linux/fs.h   |  2 ++
 5 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 562af94..0bb174e 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -398,7 +398,7 @@ static void task_fd_install(
struct binder_proc *proc, unsigned int fd, struct file *file)
 {
if (proc->files)
-   __fd_install(proc->files, fd, file);
+   __fd_install(proc->tsk, fd, file);
 }
 
 /*
@@ -411,7 +411,7 @@ static long task_close_fd(struct binder_proc *proc, 
unsigned int fd)
if (proc->files == NULL)
return -ESRCH;
 
-   retval = __close_fd(proc->files, fd);
+   retval = __close_fd(proc->tsk, fd);
/* can't restart close syscall because file table entry was cleared */
if (unlikely(retval == -ERESTARTSYS ||
 retval == -ERESTARTNOINTR ||
diff --git a/fs/file.c b/fs/file.c
index 69d6990..19c5fad 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -282,6 +282,24 @@ static unsigned int count_open_files(struct fdtable *fdt)
return i;
 }
 
+static inline void fdt_install(struct fdtable *fdt, int fd, struct file *file,
+   struct task_struct *task)
+{
+   if (file->f_op->installed)
+   file->f_op->installed(file, task);
+   rcu_assign_pointer(fdt->fd[fd], file);
+}
+
+static inline void fdt_uninstall(struct fdtable *fdt, int fd,
+   struct task_struct *task)
+{
+   struct file *old_file = fdt->fd[fd];
+
+   if (old_file->f_op->uninstalled)
+   old_file->f_op->uninstalled(old_file, task);
+   rcu_assign_pointer(fdt->fd[fd], NULL);
+}
+
 /*
  * Allocate a new files structure and copy contents from the
  * passed in files structure.
@@ -543,7 +561,7 @@ int __alloc_fd(struct files_struct *files,
/* Sanity check */
if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
-   rcu_assign_pointer(fdt->fd[fd], NULL);
+   fdt_uninstall(fdt, fd, current);
}
 #endif
 
@@ -601,10 +619,11 @@ EXPORT_SYMBOL(put_unused_fd);
  * fd_install() instead.
  */
 
-void __fd_install(struct files_struct *files, unsigned int fd,
+void __fd_install(struct task_struct *task, unsigned int fd,
struct file *file)
 {
struct fdtable *fdt;
+   struct files_struct *files = task->files;
 
might_sleep();
rcu_read_lock_sched();
@@ -618,13 +637,13 @@ void __fd_install(struct files_struct *files, unsigned 
int fd,
smp_rmb();
fdt = rcu_dereference_sched(files->fdt);
BUG_ON(fdt->fd[fd] != NULL);
-   rcu_assign_pointer(fdt->fd[fd], file);
+   fdt_install(fdt, fd, file, task);
rcu_read_unlock_sched();
 }
 
 void fd_install(unsigned int fd, struct file *file)
 {
-   __fd_install(current->files, fd, file);
+   __fd_install(current, fd, file);
 }
 
 EXPORT_SYMBOL(fd_install);
@@ -632,10 +651,11 @@ EXPORT_SYMBOL(fd_install);
 /*
  * The same warnings as for __alloc_fd()/__fd_install() apply here...
  */
-int __close_fd(struct files_struct *files, unsigned fd)
+int __close_fd(struct task_struct *task, unsigned fd)
 {
struct file *file;
struct fdtable *fdt;
+   struct files_struct *files = task->files;
 
spin_lock(>file_lock);
fdt = files_fdtable(files);
@@ -644,7 +664,7 @@ int __close_fd(struct files_struct *files, unsigned fd)
file = fdt->fd[fd];
if (!file)
goto out_unlock;
-   rcu_assign_pointer(fdt->fd[fd], NULL);
+   fdt_uninstall(fdt, fd, task);
__clear_close_on_exec(fd, fdt);
__put_unused_fd(files, fd);
spin_unlock(>file_lock);
@@ -679,7 +699,7 @@ void do_close_on_exec(struct files_struct *files)
file = fdt->fd[fd];
if (!file)
continue;
-   rcu_assign_pointer(fdt->fd[fd], NULL);
+   fdt_uninstall(fdt, fd, current);
__put_unused_fd(files, fd);

[RFC 0/6] Module for tracking/accounting shared memory buffers

2016-10-11 Thread Ruchi Kandoi
This patchstack introduces a new "memtrack" module for tracking and accounting
memory exported to userspace as shared buffers, like dma-buf fds or GEM handles.

Any process holding a reference to these buffers will keep the kernel from
reclaiming its backing pages.  mm counters don't provide a complete picture of
these allocations, since they only account for pages that are mapped into a
process's address space.  This problem is especially bad for systems like
Android that use dma-buf fds to share graphics and multimedia buffers between
processes: these allocations are often large, have complex sharing patterns,
and are rarely mapped into every process that holds a reference to them.

memtrack maintains a per-process list of shared buffer references, which is
exported to userspace as /proc/[pid]/memtrack.  Buffers can be optionally
"tagged" with a short string: for example, Android userspace would use this
tag to identify whether buffers were allocated on behalf of the camera stack,
GL, etc.  memtrack also exports the VMAs associated with these buffers so
that pages already included in the process's mm counters aren't double-counted.

Shared-buffer allocators can hook into memtrack by embedding
struct memtrack_buffer in their buffer metadata, calling
memtrack_buffer_{init,remove} at buffer allocation and free time, and
memtrack_buffer_{install,uninstall} when a userspace process takes or
drops a reference to the buffer.  For fd-backed buffers like dma-bufs, hooks in
fdtable.c and fork.c automatically notify memtrack when references are added or
removed from a process's fd table.

This patchstack adds memtrack hooks into dma-buf and ion.  If there's upstream
interest in memtrack, it can be extended to other memory allocators as well,
such as GEM implementations.

Greg Hackmann (1):
  drivers: staging: ion: add ION_IOC_TAG ioctl

Ruchi Kandoi (5):
  fs: add installed and uninstalled file_operations
  drivers: misc: add memtrack
  dma-buf: add memtrack support
  memtrack: Adds the accounting to keep track of all mmaped/unmapped
pages.
  memtrack: Add memtrack accounting for forked processes.

 drivers/android/binder.c|   4 +-
 drivers/dma-buf/dma-buf.c   |  37 +++
 drivers/misc/Kconfig|  16 +
 drivers/misc/Makefile   |   1 +
 drivers/misc/memtrack.c | 516 
 drivers/staging/android/ion/ion-ioctl.c |  17 ++
 drivers/staging/android/ion/ion.c   |  60 +++-
 drivers/staging/android/ion/ion_priv.h  |   2 +
 drivers/staging/android/uapi/ion.h  |  25 ++
 fs/file.c   |  38 ++-
 fs/open.c   |   2 +-
 fs/proc/base.c  |   4 +
 include/linux/dma-buf.h |   5 +
 include/linux/fdtable.h |   4 +-
 include/linux/fs.h  |   2 +
 include/linux/memtrack.h| 130 
 include/linux/mm.h  |   3 +
 include/linux/sched.h   |   3 +
 kernel/fork.c   |  23 +-
 19 files changed, 875 insertions(+), 17 deletions(-)
 create mode 100644 drivers/misc/memtrack.c
 create mode 100644 include/linux/memtrack.h

-- 
2.8.0.rc3.226.g39d4020



[RFC 1/6] fs: add installed and uninstalled file_operations

2016-10-11 Thread Ruchi Kandoi
These optional file_operations notify a file implementation when it is
installed or uninstalled from a task's fd table.  This can be used for
accounting of file-backed shared resources like dma-buf.

This involves some changes to the __fd_install() and __close_fd() APIs
to actually pass along the responsible task_struct.  These are low-level
APIs with only two in-tree callers, both adjusted in this patch.

Signed-off-by: Greg Hackmann 
Signed-off-by: Ruchi Kandoi 
---
 drivers/android/binder.c |  4 ++--
 fs/file.c| 38 +-
 fs/open.c|  2 +-
 include/linux/fdtable.h  |  4 ++--
 include/linux/fs.h   |  2 ++
 5 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 562af94..0bb174e 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -398,7 +398,7 @@ static void task_fd_install(
struct binder_proc *proc, unsigned int fd, struct file *file)
 {
if (proc->files)
-   __fd_install(proc->files, fd, file);
+   __fd_install(proc->tsk, fd, file);
 }
 
 /*
@@ -411,7 +411,7 @@ static long task_close_fd(struct binder_proc *proc, 
unsigned int fd)
if (proc->files == NULL)
return -ESRCH;
 
-   retval = __close_fd(proc->files, fd);
+   retval = __close_fd(proc->tsk, fd);
/* can't restart close syscall because file table entry was cleared */
if (unlikely(retval == -ERESTARTSYS ||
 retval == -ERESTARTNOINTR ||
diff --git a/fs/file.c b/fs/file.c
index 69d6990..19c5fad 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -282,6 +282,24 @@ static unsigned int count_open_files(struct fdtable *fdt)
return i;
 }
 
+static inline void fdt_install(struct fdtable *fdt, int fd, struct file *file,
+   struct task_struct *task)
+{
+   if (file->f_op->installed)
+   file->f_op->installed(file, task);
+   rcu_assign_pointer(fdt->fd[fd], file);
+}
+
+static inline void fdt_uninstall(struct fdtable *fdt, int fd,
+   struct task_struct *task)
+{
+   struct file *old_file = fdt->fd[fd];
+
+   if (old_file->f_op->uninstalled)
+   old_file->f_op->uninstalled(old_file, task);
+   rcu_assign_pointer(fdt->fd[fd], NULL);
+}
+
 /*
  * Allocate a new files structure and copy contents from the
  * passed in files structure.
@@ -543,7 +561,7 @@ int __alloc_fd(struct files_struct *files,
/* Sanity check */
if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
-   rcu_assign_pointer(fdt->fd[fd], NULL);
+   fdt_uninstall(fdt, fd, current);
}
 #endif
 
@@ -601,10 +619,11 @@ EXPORT_SYMBOL(put_unused_fd);
  * fd_install() instead.
  */
 
-void __fd_install(struct files_struct *files, unsigned int fd,
+void __fd_install(struct task_struct *task, unsigned int fd,
struct file *file)
 {
struct fdtable *fdt;
+   struct files_struct *files = task->files;
 
might_sleep();
rcu_read_lock_sched();
@@ -618,13 +637,13 @@ void __fd_install(struct files_struct *files, unsigned 
int fd,
smp_rmb();
fdt = rcu_dereference_sched(files->fdt);
BUG_ON(fdt->fd[fd] != NULL);
-   rcu_assign_pointer(fdt->fd[fd], file);
+   fdt_install(fdt, fd, file, task);
rcu_read_unlock_sched();
 }
 
 void fd_install(unsigned int fd, struct file *file)
 {
-   __fd_install(current->files, fd, file);
+   __fd_install(current, fd, file);
 }
 
 EXPORT_SYMBOL(fd_install);
@@ -632,10 +651,11 @@ EXPORT_SYMBOL(fd_install);
 /*
  * The same warnings as for __alloc_fd()/__fd_install() apply here...
  */
-int __close_fd(struct files_struct *files, unsigned fd)
+int __close_fd(struct task_struct *task, unsigned fd)
 {
struct file *file;
struct fdtable *fdt;
+   struct files_struct *files = task->files;
 
spin_lock(>file_lock);
fdt = files_fdtable(files);
@@ -644,7 +664,7 @@ int __close_fd(struct files_struct *files, unsigned fd)
file = fdt->fd[fd];
if (!file)
goto out_unlock;
-   rcu_assign_pointer(fdt->fd[fd], NULL);
+   fdt_uninstall(fdt, fd, task);
__clear_close_on_exec(fd, fdt);
__put_unused_fd(files, fd);
spin_unlock(>file_lock);
@@ -679,7 +699,7 @@ void do_close_on_exec(struct files_struct *files)
file = fdt->fd[fd];
if (!file)
continue;
-   rcu_assign_pointer(fdt->fd[fd], NULL);
+   fdt_uninstall(fdt, fd, current);
__put_unused_fd(files, fd);
spin_unlock(>file_lock);
fil

[PATCH v2] timekeeping: Prints the amounts of time spent during suspend

2016-08-11 Thread Ruchi Kandoi
This helps to keep track of real time while debugging using kernel logs.

Cc: John Stultz <john.stu...@linaro.org>
Signed-off-by: Ruchi Kandoi <kandoiru...@google.com>
---
Changelog since v1:
- removed cross platform warnings.

 kernel/time/timekeeping_debug.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index f6bd652..7fdb34f 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -70,5 +70,7 @@ late_initcall(tk_debug_sleep_time_init);
 void tk_debug_account_sleep_time(struct timespec64 *t)
 {
sleep_time_bin[fls(t->tv_sec)]++;
+   pr_info("Suspended for %lld.%03lu seconds\n", (s64)t->tv_sec,
+   t->tv_nsec / NSEC_PER_MSEC);
 }
 
-- 
2.8.0.rc3.226.g39d4020



[PATCH v2] timekeeping: Prints the amounts of time spent during suspend

2016-08-11 Thread Ruchi Kandoi
This helps to keep track of real time while debugging using kernel logs.

Cc: John Stultz 
Signed-off-by: Ruchi Kandoi 
---
Changelog since v1:
- removed cross platform warnings.

 kernel/time/timekeeping_debug.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index f6bd652..7fdb34f 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -70,5 +70,7 @@ late_initcall(tk_debug_sleep_time_init);
 void tk_debug_account_sleep_time(struct timespec64 *t)
 {
sleep_time_bin[fls(t->tv_sec)]++;
+   pr_info("Suspended for %lld.%03lu seconds\n", (s64)t->tv_sec,
+   t->tv_nsec / NSEC_PER_MSEC);
 }
 
-- 
2.8.0.rc3.226.g39d4020



[PATCH] timekeeping: Prints the amounts of time spent during suspend

2016-08-11 Thread Ruchi Kandoi
This helps to keep track of real time while debugging using kernel logs.

Cc: John Stultz <john.stu...@linaro.org>
Signed-off-by: Ruchi Kandoi <kandoiru...@google.com>
---
 kernel/time/timekeeping_debug.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index f6bd652..b67abe8 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -70,5 +70,7 @@ late_initcall(tk_debug_sleep_time_init);
 void tk_debug_account_sleep_time(struct timespec64 *t)
 {
sleep_time_bin[fls(t->tv_sec)]++;
+   pr_info("Suspended for %lu.%03lu seconds\n", t->tv_sec,
+   t->tv_nsec / NSEC_PER_MSEC);
 }
 
-- 
2.8.0.rc3.226.g39d4020



[PATCH] timekeeping: Prints the amounts of time spent during suspend

2016-08-11 Thread Ruchi Kandoi
This helps to keep track of real time while debugging using kernel logs.

Cc: John Stultz 
Signed-off-by: Ruchi Kandoi 
---
 kernel/time/timekeeping_debug.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index f6bd652..b67abe8 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -70,5 +70,7 @@ late_initcall(tk_debug_sleep_time_init);
 void tk_debug_account_sleep_time(struct timespec64 *t)
 {
sleep_time_bin[fls(t->tv_sec)]++;
+   pr_info("Suspended for %lu.%03lu seconds\n", t->tv_sec,
+   t->tv_nsec / NSEC_PER_MSEC);
 }
 
-- 
2.8.0.rc3.226.g39d4020



Re: [PATCH v2 0/2] Adds cpu power accounting per-pid basis.

2015-05-28 Thread Ruchi Kandoi
On Thu, May 21, 2015 at 7:34 AM, Daniel Lezcano
 wrote:
> Hi Ruchi,
>
> On 05/15/2015 02:12 AM, Ruchi Kandoi wrote:
>>
>> These patches add a mechanism which will accurately caculate the CPU power
>> used by all the processes in the system. In order to account for the power
>> used by all the processes a data field "cpu_power" has been added in the
>> task_struct.
>
>
> The term 'energy' makes more sense than 'power'.

>> This field adds power for both the system as well as user
>> time. cpu_power contains the total amount of charge(in uAmsec units) used
>
>
> Why not use the Joules unit ?
>
Because most of the devices working on battery has their capacity
defined in mAh(to avoid floating point and to prevent losing precision
uAmsec is used). It will be be easier to keep it in that unit so that
it can be aggregated when we are trying to find the total capacity
which was used by a process(which will be combined for a particular
application).

>> by the process. This model takes into account the frequency at which the
>> process was running(i.e higher power for processes running at higher
>> frequencies). It requires the cpufreq_stats module to be initialized with
>> the current numbers for each of the CPU core at each frequency. This will
>> be initialized during init time.
>
>
> The energy task accounting is an interesting feature in my opinion. But your
> patchset does not deal with the power management hardware complexity.
>
> If we reduce the scope of the task energy accounting to the cpu, we are
> facing several issues:
>
>  * A cpu may be supposed to run at a specific OPP but it could share a clock
> line with another cpu which is in a higher frequency. So the frequency is
> actually at a higher rate than what is assumed
>
>  * The firmware may override the cpufreq decisions
>
>  * A process may be idle but its behavior forces the cpuidle governor to
> choose shallow states (that won't occur without the process). For example,
> the process is using very short timers, does a small processing and then go
> to sleep again waiting for the next timer expiration. The result will be a
> process having a low energy consumption but actually because of these
> timers, it will prevent the cpu to enter deep idle state
>
> Beside that, the process may be soliciting a subsystem (another process or
> hardware) which consumes a lot of energy. That won't be accounted even if
> the process is responsible of this extra consumption.
>
True, there will be cases where the accounting for the energy/power
will be deceptive, because we are not taking into consideration the
idle time and time intervals between which the process is running.
This was aimed to be a simplistic model where only the active time for
the process were taken into account and the processes were blamed for
the active power that they are consuming.
 There are similar efforts for other subsystem too which will be
keeping track of the subsystem power used by a particular pid/uid.

> And the last point is: how do you expect to have the energy numbers as
> nobody is willing to share them for their platform ?
>
This is a tough question. Yes it is difficult to get these numbers,
but I don't think it is unfeasible. We get some numbers from SoC
vendors for the CPUs, trying to drive it to a point where we can get
more accurate numbers.
>> Ruchi Kandoi (2):
>>cpufreq_stats: Adds sysfs file
>>  /sys/devices/system/cpu/cpufreq/current_in_state
>>sched: cpufreq: Adds a field cpu_power in the task_struct
>>
>>   drivers/cpufreq/cpufreq_stats.c | 191
>> +++-
>>   include/linux/cpufreq.h |   8 ++
>>   include/linux/sched.h   |   2 +
>>   kernel/fork.c   |   1 +
>>   kernel/sched/cputime.c  |   7 ++
>>   5 files changed, 207 insertions(+), 2 deletions(-)
>
>
>
>
> --
>  <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>
> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> <http://twitter.com/#!/linaroorg> Twitter |
> <http://www.linaro.org/linaro-blog/> Blog
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 0/2] Adds cpu power accounting per-pid basis.

2015-05-28 Thread Ruchi Kandoi
On Thu, May 21, 2015 at 7:34 AM, Daniel Lezcano
daniel.lezc...@linaro.org wrote:
 Hi Ruchi,

 On 05/15/2015 02:12 AM, Ruchi Kandoi wrote:

 These patches add a mechanism which will accurately caculate the CPU power
 used by all the processes in the system. In order to account for the power
 used by all the processes a data field cpu_power has been added in the
 task_struct.


 The term 'energy' makes more sense than 'power'.

 This field adds power for both the system as well as user
 time. cpu_power contains the total amount of charge(in uAmsec units) used


 Why not use the Joules unit ?

Because most of the devices working on battery has their capacity
defined in mAh(to avoid floating point and to prevent losing precision
uAmsec is used). It will be be easier to keep it in that unit so that
it can be aggregated when we are trying to find the total capacity
which was used by a process(which will be combined for a particular
application).

 by the process. This model takes into account the frequency at which the
 process was running(i.e higher power for processes running at higher
 frequencies). It requires the cpufreq_stats module to be initialized with
 the current numbers for each of the CPU core at each frequency. This will
 be initialized during init time.


 The energy task accounting is an interesting feature in my opinion. But your
 patchset does not deal with the power management hardware complexity.

 If we reduce the scope of the task energy accounting to the cpu, we are
 facing several issues:

  * A cpu may be supposed to run at a specific OPP but it could share a clock
 line with another cpu which is in a higher frequency. So the frequency is
 actually at a higher rate than what is assumed

  * The firmware may override the cpufreq decisions

  * A process may be idle but its behavior forces the cpuidle governor to
 choose shallow states (that won't occur without the process). For example,
 the process is using very short timers, does a small processing and then go
 to sleep again waiting for the next timer expiration. The result will be a
 process having a low energy consumption but actually because of these
 timers, it will prevent the cpu to enter deep idle state

 Beside that, the process may be soliciting a subsystem (another process or
 hardware) which consumes a lot of energy. That won't be accounted even if
 the process is responsible of this extra consumption.

True, there will be cases where the accounting for the energy/power
will be deceptive, because we are not taking into consideration the
idle time and time intervals between which the process is running.
This was aimed to be a simplistic model where only the active time for
the process were taken into account and the processes were blamed for
the active power that they are consuming.
 There are similar efforts for other subsystem too which will be
keeping track of the subsystem power used by a particular pid/uid.

 And the last point is: how do you expect to have the energy numbers as
 nobody is willing to share them for their platform ?

This is a tough question. Yes it is difficult to get these numbers,
but I don't think it is unfeasible. We get some numbers from SoC
vendors for the CPUs, trying to drive it to a point where we can get
more accurate numbers.
 Ruchi Kandoi (2):
cpufreq_stats: Adds sysfs file
  /sys/devices/system/cpu/cpufreq/current_in_state
sched: cpufreq: Adds a field cpu_power in the task_struct

   drivers/cpufreq/cpufreq_stats.c | 191
 +++-
   include/linux/cpufreq.h |   8 ++
   include/linux/sched.h   |   2 +
   kernel/fork.c   |   1 +
   kernel/sched/cputime.c  |   7 ++
   5 files changed, 207 insertions(+), 2 deletions(-)




 --
  http://www.linaro.org/ Linaro.org │ Open source software for ARM SoCs

 Follow Linaro:  http://www.facebook.com/pages/Linaro Facebook |
 http://twitter.com/#!/linaroorg Twitter |
 http://www.linaro.org/linaro-blog/ Blog

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] sched: cpufreq: Adds a field cpu_power in the task_struct

2015-05-18 Thread Ruchi Kandoi
On Fri, May 15, 2015 at 9:07 AM, Peter Zijlstra  wrote:
>
> On Thu, May 14, 2015 at 04:55:48PM -0700, Ruchi Kandoi wrote:
> > cpu_power has been added to keep track of amount of power each task is
> > consuming. cpu_power is updated whenever stime and utime are updated for
> > a task. power is computed by taking into account the frequency at which
> > the current core was running and the current for cpu actively
> > running at hat frequency.
> >
>
> Both you patches completely lack any reason for me to even start
> considering this.
>
> _WHY_ and _what_ are you doing?

We need a mechanism in which we can get information about how much cpu
power each of the process(which is then aggregated fro each
uid/application) is consuming. In the current architecture, it is
based on the amount of the time the process ran. This brings in
inaccuracy because running x seconds at low frequency will have
different power consumption as compared to running at the higher
frequency. With these changes we have the information about the power
which is not only dependent on the time it was running but also takes
into account the frequency it was running at as well as the CPU # it
was running at. Because the cost of running at different CPUs at the
same frequency is different. This gives a better overview of the
current power state of the system wrt cpu power. Please let me know if
more information is required for the same.

Thanks,
Ruchi Kandoi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 0/2] Adds cpu power accounting per-pid basis.

2015-05-18 Thread Ruchi Kandoi
On Thu, May 14, 2015 at 11:34 PM, Heinrich Schuchardt
 wrote:
>
> On 15.05.2015 02:12, Ruchi Kandoi wrote:
> > These patches add a mechanism which will accurately caculate the CPU power
> > used by all the processes in the system. In order to account for the power
> > used by all the processes a data field "cpu_power" has been added in the
> > task_struct.
>
> Hello Ruchi,
>
> could you, please, explain why the CPU power consumption per task
> information is needed. Please, consider that the CPU causes only part of
> the total system power consumption which also comprises GPU, cooling,
> RAM, etc.

In order to accurately account for the battery used by each of the
process, keeping a track of how long the process ran is not
sufficient. Since running at different frequency has varying power
consumption, we want to track a power number which takes into
consideration the frequency as well as the core on which it was
running. There are similar efforts for other subsystems too to account
for the power used by each process which can then accurately be
aggregated for an application.
>
> The patch series increases the memory size of the kernel, the memory
> consumption per thread and the thread switching time. So, please,
> introduce a configuration switch to enable/disable the function.
>

Yes, configuration can be added. Will update that in the next patch.

> > This field adds power for both the system as well as user
> > time. cpu_power contains the total amount of charge(in uAmsec units) used
> > by the process.
>
> Is there any reasonable way to assign the power consumption to a single
> task if multiple tasks are executed on the same core at the same time
> (e.g. using hyperthreading)?
>
I think the power will be accounted for both the processes on their
respective cores. With hyperthreading, as far as kernel is concerned
they are running on different cores and the time for all the tasks
will be accounted appropriately and hence power. Correct me if I am
wrong.

> > This model takes into account the frequency at which the
> > process was running(i.e higher power for processes running at higher
> > frequencies). It requires the cpufreq_stats module to be initialized with
> > the current numbers for each of the CPU core at each frequency. This will
> > be initialized during init time.
>
> This does not account for power consumption depending on anything else
> but frequency, e.g. floating point commands consuming more power than NOPs.

Currently we have been able to get power numbers for a core when they
are active and running at a particular frequency. Agreed that will be
a better and more accurate mode.But getting the power numbers for the
type of instruction and keeping track of number of such instructions
will be cumbersome.
>
>
> Best regards
>
> Heinrich Schuchardt
> >
> > Ruchi Kandoi (2):
> >   cpufreq_stats: Adds sysfs file
> > /sys/devices/system/cpu/cpufreq/current_in_state
> >   sched: cpufreq: Adds a field cpu_power in the task_struct
> >
> >  drivers/cpufreq/cpufreq_stats.c | 191 
> > +++-
> >  include/linux/cpufreq.h |   8 ++
> >  include/linux/sched.h   |   2 +
> >  kernel/fork.c   |   1 +
> >  kernel/sched/cputime.c  |   7 ++
> >  5 files changed, 207 insertions(+), 2 deletions(-)
> >
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 0/2] Adds cpu power accounting per-pid basis.

2015-05-18 Thread Ruchi Kandoi
On Thu, May 14, 2015 at 11:34 PM, Heinrich Schuchardt
xypron.g...@gmx.de wrote:

 On 15.05.2015 02:12, Ruchi Kandoi wrote:
  These patches add a mechanism which will accurately caculate the CPU power
  used by all the processes in the system. In order to account for the power
  used by all the processes a data field cpu_power has been added in the
  task_struct.

 Hello Ruchi,

 could you, please, explain why the CPU power consumption per task
 information is needed. Please, consider that the CPU causes only part of
 the total system power consumption which also comprises GPU, cooling,
 RAM, etc.

In order to accurately account for the battery used by each of the
process, keeping a track of how long the process ran is not
sufficient. Since running at different frequency has varying power
consumption, we want to track a power number which takes into
consideration the frequency as well as the core on which it was
running. There are similar efforts for other subsystems too to account
for the power used by each process which can then accurately be
aggregated for an application.

 The patch series increases the memory size of the kernel, the memory
 consumption per thread and the thread switching time. So, please,
 introduce a configuration switch to enable/disable the function.


Yes, configuration can be added. Will update that in the next patch.

  This field adds power for both the system as well as user
  time. cpu_power contains the total amount of charge(in uAmsec units) used
  by the process.

 Is there any reasonable way to assign the power consumption to a single
 task if multiple tasks are executed on the same core at the same time
 (e.g. using hyperthreading)?

I think the power will be accounted for both the processes on their
respective cores. With hyperthreading, as far as kernel is concerned
they are running on different cores and the time for all the tasks
will be accounted appropriately and hence power. Correct me if I am
wrong.

  This model takes into account the frequency at which the
  process was running(i.e higher power for processes running at higher
  frequencies). It requires the cpufreq_stats module to be initialized with
  the current numbers for each of the CPU core at each frequency. This will
  be initialized during init time.

 This does not account for power consumption depending on anything else
 but frequency, e.g. floating point commands consuming more power than NOPs.

Currently we have been able to get power numbers for a core when they
are active and running at a particular frequency. Agreed that will be
a better and more accurate mode.But getting the power numbers for the
type of instruction and keeping track of number of such instructions
will be cumbersome.


 Best regards

 Heinrich Schuchardt
 
  Ruchi Kandoi (2):
cpufreq_stats: Adds sysfs file
  /sys/devices/system/cpu/cpufreq/current_in_state
sched: cpufreq: Adds a field cpu_power in the task_struct
 
   drivers/cpufreq/cpufreq_stats.c | 191 
  +++-
   include/linux/cpufreq.h |   8 ++
   include/linux/sched.h   |   2 +
   kernel/fork.c   |   1 +
   kernel/sched/cputime.c  |   7 ++
   5 files changed, 207 insertions(+), 2 deletions(-)
 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] sched: cpufreq: Adds a field cpu_power in the task_struct

2015-05-18 Thread Ruchi Kandoi
On Fri, May 15, 2015 at 9:07 AM, Peter Zijlstra pet...@infradead.org wrote:

 On Thu, May 14, 2015 at 04:55:48PM -0700, Ruchi Kandoi wrote:
  cpu_power has been added to keep track of amount of power each task is
  consuming. cpu_power is updated whenever stime and utime are updated for
  a task. power is computed by taking into account the frequency at which
  the current core was running and the current for cpu actively
  running at hat frequency.
 

 Both you patches completely lack any reason for me to even start
 considering this.

 _WHY_ and _what_ are you doing?

We need a mechanism in which we can get information about how much cpu
power each of the process(which is then aggregated fro each
uid/application) is consuming. In the current architecture, it is
based on the amount of the time the process ran. This brings in
inaccuracy because running x seconds at low frequency will have
different power consumption as compared to running at the higher
frequency. With these changes we have the information about the power
which is not only dependent on the time it was running but also takes
into account the frequency it was running at as well as the CPU # it
was running at. Because the cost of running at different CPUs at the
same frequency is different. This gives a better overview of the
current power state of the system wrt cpu power. Please let me know if
more information is required for the same.

Thanks,
Ruchi Kandoi
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 1/2] cpufreq_stats: Adds sysfs file /sys/devices/system/cpu/cpufreq/current_in_state

2015-05-15 Thread Ruchi Kandoi
On Thu, May 14, 2015 at 7:48 PM, Viresh Kumar  wrote:
> I am not replying for concept here, as sched maintainers are in a
> better position for that, but a nit below..
>
> On 14-05-15, 17:12, Ruchi Kandoi wrote:
>> Adds the sysfs file for userspace to initialize the active current
>> values for all the cores at each of the frequencies.
>>
>> The format for storing the values is as follows:
>> echo "CPU:= =,CPU:
>> ..." > /sys/devices/system/cpu/cpufreq/current_in_state
>
> Why this file? And not
> /sys/devices/system/cpu/cpuX/cpufreq/stats/current_in_state ? That way
> you don't have to replicate the same information for all CPUs, as the
> stats folder can be shared by multiple CPUs (which share their
> clock/voltage rails)..

Some of the hand-held devices support hot-plugging of the cpus and
when the core is hot-plugged out the
/sys/devices/system/cpu/cpuX/cpufreq directory is removed too. So it
won't be possible to share folders by multiple CPUs.

>
> --
> viresh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 1/2] cpufreq_stats: Adds sysfs file /sys/devices/system/cpu/cpufreq/current_in_state

2015-05-15 Thread Ruchi Kandoi
On Thu, May 14, 2015 at 7:48 PM, Viresh Kumar viresh.ku...@linaro.org wrote:
 I am not replying for concept here, as sched maintainers are in a
 better position for that, but a nit below..

 On 14-05-15, 17:12, Ruchi Kandoi wrote:
 Adds the sysfs file for userspace to initialize the active current
 values for all the cores at each of the frequencies.

 The format for storing the values is as follows:
 echo CPUcpu#:freq1=current in uA freq2=current,CPUcpu#:
 ...  /sys/devices/system/cpu/cpufreq/current_in_state

 Why this file? And not
 /sys/devices/system/cpu/cpuX/cpufreq/stats/current_in_state ? That way
 you don't have to replicate the same information for all CPUs, as the
 stats folder can be shared by multiple CPUs (which share their
 clock/voltage rails)..

Some of the hand-held devices support hot-plugging of the cpus and
when the core is hot-plugged out the
/sys/devices/system/cpu/cpuX/cpufreq directory is removed too. So it
won't be possible to share folders by multiple CPUs.


 --
 viresh
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 1/2] cpufreq_stats: Adds sysfs file /sys/devices/system/cpu/cpufreq/current_in_state

2015-05-14 Thread Ruchi Kandoi
Adds the sysfs file for userspace to initialize the active current
values for all the cores at each of the frequencies.

The format for storing the values is as follows:
echo "CPU:= =,CPU:
..." > /sys/devices/system/cpu/cpufreq/current_in_state

Signed-off-by: Ruchi Kandoi 
---
 drivers/cpufreq/cpufreq_stats.c | 163 +++-
 1 file changed, 161 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 5e370a3..6f0b562 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -30,6 +30,14 @@ struct cpufreq_stats {
 #endif
 };
 
+struct cpufreq_power_stats {
+   unsigned int state_num;
+   unsigned int *curr;
+   unsigned int *freq_table;
+};
+
+static DEFINE_PER_CPU(struct cpufreq_power_stats *, cpufreq_power_stats);
+
 static int cpufreq_stats_update(struct cpufreq_stats *stats)
 {
unsigned long long cur_time = get_jiffies_64();
@@ -61,6 +69,87 @@ static ssize_t show_time_in_state(struct cpufreq_policy 
*policy, char *buf)
return len;
 }
 
+static void store_current_value(struct cpufreq_power_stats *powerstats,
+   int freq, int curr)
+{
+   int i;
+
+   /* freq_table doesn't contain any CPU_FREQ_INVALID */
+   for (i = 0; i < powerstats->state_num; i++) {
+   if (powerstats->freq_table[i] == freq) {
+   powerstats->curr[i] = curr;
+   break;
+   }
+   }
+}
+
+static ssize_t store_current_in_state(struct cpufreq_policy *policy,
+   const char *buf, size_t len)
+{
+   char *cp, *cp2, *start, *buffer;
+   unsigned int cpu_num, ret, curr, freq;
+   struct cpufreq_power_stats *powerstats;
+
+   if (!buf || len < 0)
+   return len;
+
+   buffer = kzalloc(len + 1, GFP_KERNEL);
+   if (!buffer)
+   return len;
+
+   strncpy(buffer, buf, len);
+   buffer[len] = '\0';
+   cp = buffer;
+   spin_lock(_stats_lock);
+   while ((start = strsep(, ","))) {
+   ret = sscanf(start, "CPU%u:", _num);
+   if (ret != 1 || cpu_num > (num_possible_cpus() - 1)) {
+   ret = -EINVAL;
+   goto error;
+   }
+   powerstats = per_cpu(cpufreq_power_stats, cpu_num);
+   if (!powerstats)
+   continue;
+
+   /* sscanf makes sure that strchr doesn't return a NULL */
+   cp2 = strchr(start, ':') + 1;
+   while ((start = strsep(, " "))) {
+   if (sscanf(start, "%u=%u", , ) != 2) {
+   ret = -EINVAL;
+   goto error;
+   }
+   store_current_value(powerstats, freq, curr);
+   }
+   }
+   ret = len;
+error:
+   spin_unlock(_stats_lock);
+   kfree(buffer);
+   return ret;
+}
+
+static ssize_t show_current_in_state(struct cpufreq_policy *policy, char *buf)
+{
+   ssize_t len = 0;
+   unsigned int i, cpu;
+   struct cpufreq_power_stats *powerstats;
+
+   spin_lock(_stats_lock);
+   for_each_possible_cpu(cpu) {
+   powerstats = per_cpu(cpufreq_power_stats, cpu);
+   if (!powerstats)
+   continue;
+   len += scnprintf(buf + len, PAGE_SIZE - len, "CPU%d:", cpu);
+   for (i = 0; i < powerstats->state_num; i++)
+   len += scnprintf(buf + len, PAGE_SIZE - len,
+   "%d=%d ", powerstats->freq_table[i],
+   powerstats->curr[i]);
+   len += scnprintf(buf + len, PAGE_SIZE - len, "\n");
+   }
+   spin_unlock(_stats_lock);
+   return len;
+}
+
 #ifdef CONFIG_CPU_FREQ_STAT_DETAILS
 static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
 {
@@ -107,6 +196,7 @@ cpufreq_freq_attr_ro(trans_table);
 
 cpufreq_freq_attr_ro(total_trans);
 cpufreq_freq_attr_ro(time_in_state);
+cpufreq_freq_attr_rw(current_in_state);
 
 static struct attribute *default_attrs[] = {
_trans.attr,
@@ -159,6 +249,67 @@ static void cpufreq_stats_free_table(unsigned int cpu)
cpufreq_cpu_put(policy);
 }
 
+static void cpufreq_powerstats_free(void)
+{
+   int cpu;
+   struct cpufreq_power_stats *powerstats;
+
+   sysfs_remove_file(cpufreq_global_kobject, _in_state.attr);
+
+   for_each_possible_cpu(cpu) {
+   powerstats = per_cpu(cpufreq_power_stats, cpu);
+   if (!powerstats)
+   continue;
+   kfree(powerstats->curr);
+   kfree(powerstats);
+   per_cpu(cpufreq_power_stats, cpu) = NULL;
+   }
+}
+
+static void cpufreq_powerstats_crea

[PATCH v2 2/2] sched: cpufreq: Adds a field cpu_power in the task_struct

2015-05-14 Thread Ruchi Kandoi
cpu_power has been added to keep track of amount of power each task is
consuming. cpu_power is updated whenever stime and utime are updated for
a task. power is computed by taking into account the frequency at which
the current core was running and the current for cpu actively
running at hat frequency.

Signed-off-by: Ruchi Kandoi 
---
 drivers/cpufreq/cpufreq_stats.c | 28 
 include/linux/cpufreq.h |  8 
 include/linux/sched.h   |  2 ++
 kernel/fork.c   |  1 +
 kernel/sched/cputime.c  |  7 +++
 5 files changed, 46 insertions(+)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 6f0b562..682ed898 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static spinlock_t cpufreq_stats_lock;
 
@@ -83,6 +84,33 @@ static void store_current_value(struct cpufreq_power_stats 
*powerstats,
}
 }
 
+void acct_update_power(struct task_struct *task, cputime_t cputime)
+{
+   struct cpufreq_power_stats *powerstats;
+   struct cpufreq_stats *stats;
+   struct cpufreq_policy *policy;
+   unsigned int cpu_num, curr;
+
+   if (!task)
+   return;
+   cpu_num = task_cpu(task);
+   powerstats = per_cpu(cpufreq_power_stats, cpu_num);
+   policy = cpufreq_cpu_get(cpu_num);
+   if (!policy)
+   return;
+
+   if (!powerstats || !(policy->stats)) {
+   cpufreq_cpu_put(policy);
+   return;
+   }
+
+   stats = policy->stats;
+   curr = powerstats->curr[stats->last_index];
+   task->cpu_power += curr * cputime_to_usecs(cputime);
+   cpufreq_cpu_put(policy);
+}
+EXPORT_SYMBOL_GPL(acct_update_power);
+
 static ssize_t store_current_in_state(struct cpufreq_policy *policy,
const char *buf, size_t len)
 {
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 2ee4888..86826c8 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  *CPUFREQ INTERFACE  *
@@ -601,4 +602,11 @@ unsigned int cpufreq_generic_get(unsigned int cpu);
 int cpufreq_generic_init(struct cpufreq_policy *policy,
struct cpufreq_frequency_table *table,
unsigned int transition_latency);
+
+/*
+ * CPUFREQ STATS *
+ */
+
+void acct_update_power(struct task_struct *p, cputime_t cputime);
+
 #endif /* _LINUX_CPUFREQ_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e61..1f2400a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1429,6 +1429,7 @@ struct task_struct {
int __user *clear_child_tid;/* CLONE_CHILD_CLEARTID */
 
cputime_t utime, stime, utimescaled, stimescaled;
+   unsigned long long cpu_power;
cputime_t gtime;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct cputime prev_cputime;
@@ -1441,6 +1442,7 @@ struct task_struct {
VTIME_USER,
VTIME_SYS,
} vtime_snap_whence;
+
 #endif
unsigned long nvcsw, nivcsw; /* context switch counts */
u64 start_time; /* monotonic time in nsec */
diff --git a/kernel/fork.c b/kernel/fork.c
index 03c1eaa..2ca0e9e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1341,6 +1341,7 @@ static struct task_struct *copy_process(unsigned long 
clone_flags,
 
p->utime = p->stime = p->gtime = 0;
p->utimescaled = p->stimescaled = 0;
+   p->cpu_power = 0;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
p->prev_cputime.utime = p->prev_cputime.stime = 0;
 #endif
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 8394b1e..53a79d5 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -4,6 +4,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "sched.h"
 
 
@@ -149,6 +150,9 @@ void account_user_time(struct task_struct *p, cputime_t 
cputime,
 
/* Account for user time used */
acct_account_cputime(p);
+
+   /* Account power usage for user time */
+   acct_update_power(p, cputime);
 }
 
 /*
@@ -199,6 +203,9 @@ void __account_system_time(struct task_struct *p, cputime_t 
cputime,
 
/* Account for system time used */
acct_account_cputime(p);
+
+   /* Account power usage for system time */
+   acct_update_power(p, cputime);
 }
 
 /*
-- 
2.2.0.rc0.207.ga3a616c

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More ma

[PATCH v2 0/2] Adds cpu power accounting per-pid basis.

2015-05-14 Thread Ruchi Kandoi
These patches add a mechanism which will accurately caculate the CPU power
used by all the processes in the system. In order to account for the power
used by all the processes a data field "cpu_power" has been added in the
task_struct. This field adds power for both the system as well as user
time. cpu_power contains the total amount of charge(in uAmsec units) used
by the process. This model takes into account the frequency at which the
process was running(i.e higher power for processes running at higher
frequencies). It requires the cpufreq_stats module to be initialized with
the current numbers for each of the CPU core at each frequency. This will
be initialized during init time.

Ruchi Kandoi (2):
  cpufreq_stats: Adds sysfs file
/sys/devices/system/cpu/cpufreq/current_in_state
  sched: cpufreq: Adds a field cpu_power in the task_struct

 drivers/cpufreq/cpufreq_stats.c | 191 +++-
 include/linux/cpufreq.h |   8 ++
 include/linux/sched.h   |   2 +
 kernel/fork.c   |   1 +
 kernel/sched/cputime.c  |   7 ++
 5 files changed, 207 insertions(+), 2 deletions(-)

-- 
2.2.0.rc0.207.ga3a616c

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] sched: cpufreq: Adds a field cpu_power in the task_struct

2015-05-14 Thread Ruchi Kandoi
cpu_power has been added to keep track of amount of power each task is
consuming. cpu_power is updated whenever stime and utime are updated for
a task. power is computed by taking into account the frequency at which
the current core was running and the current for cpu actively
running at hat frequency.

Signed-off-by: Ruchi Kandoi 
---
 drivers/cpufreq/cpufreq_stats.c | 23 +++
 include/linux/cpufreq.h |  8 
 include/linux/sched.h   |  2 ++
 kernel/fork.c   |  1 +
 kernel/sched/cputime.c  |  7 +++
 5 files changed, 41 insertions(+)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 6f0b562..4a0bd9a 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static spinlock_t cpufreq_stats_lock;
 
@@ -83,6 +84,28 @@ static void store_current_value(struct cpufreq_power_stats 
*powerstats,
}
 }
 
+void acct_update_power(struct task_struct *task, cputime_t cputime)
+{
+   struct cpufreq_power_stats *powerstats;
+   struct cpufreq_stats *stats;
+   struct cpufreq_policy *policy;
+   unsigned int cpu_num, curr;
+
+   if (!task)
+   return;
+   cpu_num = task_cpu(task);
+   powerstats = per_cpu(cpufreq_power_stats, cpu_num);
+   policy = cpufreq_cpu_get(cpu_num);
+   if (!powerstats || !policy || !(policy->stats))
+   return;
+
+   stats = policy->stats;
+   curr = powerstats->curr[stats->last_index];
+   task->cpu_power += curr * cputime_to_usecs(cputime);
+   cpufreq_cpu_put(cpu_num);
+}
+EXPORT_SYMBOL_GPL(acct_update_power);
+
 static ssize_t store_current_in_state(struct cpufreq_policy *policy,
const char *buf, size_t len)
 {
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 2ee4888..86826c8 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  *CPUFREQ INTERFACE  *
@@ -601,4 +602,11 @@ unsigned int cpufreq_generic_get(unsigned int cpu);
 int cpufreq_generic_init(struct cpufreq_policy *policy,
struct cpufreq_frequency_table *table,
unsigned int transition_latency);
+
+/*
+ * CPUFREQ STATS *
+ */
+
+void acct_update_power(struct task_struct *p, cputime_t cputime);
+
 #endif /* _LINUX_CPUFREQ_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e61..1f2400a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1429,6 +1429,7 @@ struct task_struct {
int __user *clear_child_tid;/* CLONE_CHILD_CLEARTID */
 
cputime_t utime, stime, utimescaled, stimescaled;
+   unsigned long long cpu_power;
cputime_t gtime;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct cputime prev_cputime;
@@ -1441,6 +1442,7 @@ struct task_struct {
VTIME_USER,
VTIME_SYS,
} vtime_snap_whence;
+
 #endif
unsigned long nvcsw, nivcsw; /* context switch counts */
u64 start_time; /* monotonic time in nsec */
diff --git a/kernel/fork.c b/kernel/fork.c
index 03c1eaa..2ca0e9e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1341,6 +1341,7 @@ static struct task_struct *copy_process(unsigned long 
clone_flags,
 
p->utime = p->stime = p->gtime = 0;
p->utimescaled = p->stimescaled = 0;
+   p->cpu_power = 0;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
p->prev_cputime.utime = p->prev_cputime.stime = 0;
 #endif
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 8394b1e..53a79d5 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -4,6 +4,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "sched.h"
 
 
@@ -149,6 +150,9 @@ void account_user_time(struct task_struct *p, cputime_t 
cputime,
 
/* Account for user time used */
acct_account_cputime(p);
+
+   /* Account power usage for user time */
+   acct_update_power(p, cputime);
 }
 
 /*
@@ -199,6 +203,9 @@ void __account_system_time(struct task_struct *p, cputime_t 
cputime,
 
/* Account for system time used */
acct_account_cputime(p);
+
+   /* Account power usage for system time */
+   acct_update_power(p, cputime);
 }
 
 /*
-- 
2.2.0.rc0.207.ga3a616c

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] cpufreq_stats: Adds sysfs file /sys/devices/system/cpu/cpufreq/current_in_state

2015-05-14 Thread Ruchi Kandoi
Adds the sysfs file for userspace to initialize the active current
values for all the cores at each of the frequencies.

The format for storing the values is as follows:
echo "CPU:= =,CPU:
..." > /sys/devices/system/cpu/cpufreq/current_in_state

Signed-off-by: Ruchi Kandoi 
---
 drivers/cpufreq/cpufreq_stats.c | 163 +++-
 1 file changed, 161 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 5e370a3..6f0b562 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -30,6 +30,14 @@ struct cpufreq_stats {
 #endif
 };
 
+struct cpufreq_power_stats {
+   unsigned int state_num;
+   unsigned int *curr;
+   unsigned int *freq_table;
+};
+
+static DEFINE_PER_CPU(struct cpufreq_power_stats *, cpufreq_power_stats);
+
 static int cpufreq_stats_update(struct cpufreq_stats *stats)
 {
unsigned long long cur_time = get_jiffies_64();
@@ -61,6 +69,87 @@ static ssize_t show_time_in_state(struct cpufreq_policy 
*policy, char *buf)
return len;
 }
 
+static void store_current_value(struct cpufreq_power_stats *powerstats,
+   int freq, int curr)
+{
+   int i;
+
+   /* freq_table doesn't contain any CPU_FREQ_INVALID */
+   for (i = 0; i < powerstats->state_num; i++) {
+   if (powerstats->freq_table[i] == freq) {
+   powerstats->curr[i] = curr;
+   break;
+   }
+   }
+}
+
+static ssize_t store_current_in_state(struct cpufreq_policy *policy,
+   const char *buf, size_t len)
+{
+   char *cp, *cp2, *start, *buffer;
+   unsigned int cpu_num, ret, curr, freq;
+   struct cpufreq_power_stats *powerstats;
+
+   if (!buf || len < 0)
+   return len;
+
+   buffer = kzalloc(len + 1, GFP_KERNEL);
+   if (!buffer)
+   return len;
+
+   strncpy(buffer, buf, len);
+   buffer[len] = '\0';
+   cp = buffer;
+   spin_lock(_stats_lock);
+   while ((start = strsep(, ","))) {
+   ret = sscanf(start, "CPU%u:", _num);
+   if (ret != 1 || cpu_num > (num_possible_cpus() - 1)) {
+   ret = -EINVAL;
+   goto error;
+   }
+   powerstats = per_cpu(cpufreq_power_stats, cpu_num);
+   if (!powerstats)
+   continue;
+
+   /* sscanf makes sure that strchr doesn't return a NULL */
+   cp2 = strchr(start, ':') + 1;
+   while ((start = strsep(, " "))) {
+   if (sscanf(start, "%u=%u", , ) != 2) {
+   ret = -EINVAL;
+   goto error;
+   }
+   store_current_value(powerstats, freq, curr);
+   }
+   }
+   ret = len;
+error:
+   spin_unlock(_stats_lock);
+   kfree(buffer);
+   return ret;
+}
+
+static ssize_t show_current_in_state(struct cpufreq_policy *policy, char *buf)
+{
+   ssize_t len = 0;
+   unsigned int i, cpu;
+   struct cpufreq_power_stats *powerstats;
+
+   spin_lock(_stats_lock);
+   for_each_possible_cpu(cpu) {
+   powerstats = per_cpu(cpufreq_power_stats, cpu);
+   if (!powerstats)
+   continue;
+   len += scnprintf(buf + len, PAGE_SIZE - len, "CPU%d:", cpu);
+   for (i = 0; i < powerstats->state_num; i++)
+   len += scnprintf(buf + len, PAGE_SIZE - len,
+   "%d=%d ", powerstats->freq_table[i],
+   powerstats->curr[i]);
+   len += scnprintf(buf + len, PAGE_SIZE - len, "\n");
+   }
+   spin_unlock(_stats_lock);
+   return len;
+}
+
 #ifdef CONFIG_CPU_FREQ_STAT_DETAILS
 static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
 {
@@ -107,6 +196,7 @@ cpufreq_freq_attr_ro(trans_table);
 
 cpufreq_freq_attr_ro(total_trans);
 cpufreq_freq_attr_ro(time_in_state);
+cpufreq_freq_attr_rw(current_in_state);
 
 static struct attribute *default_attrs[] = {
_trans.attr,
@@ -159,6 +249,67 @@ static void cpufreq_stats_free_table(unsigned int cpu)
cpufreq_cpu_put(policy);
 }
 
+static void cpufreq_powerstats_free(void)
+{
+   int cpu;
+   struct cpufreq_power_stats *powerstats;
+
+   sysfs_remove_file(cpufreq_global_kobject, _in_state.attr);
+
+   for_each_possible_cpu(cpu) {
+   powerstats = per_cpu(cpufreq_power_stats, cpu);
+   if (!powerstats)
+   continue;
+   kfree(powerstats->curr);
+   kfree(powerstats);
+   per_cpu(cpufreq_power_stats, cpu) = NULL;
+   }
+}
+
+static void cpufreq_powerstats_crea

[PATCH 0/2] Adds cpu power accounting per-pid basis.

2015-05-14 Thread Ruchi Kandoi
These patches add a mechanism which will accurately caculate the CPU power
used by all the processes in the system. In order to account for the power
used by all the processes a data field "cpu_power" has been added in the
task_struct. This field adds power for both the system as well as user
time. cpu_power contains the total amount of charge(in uAmsec units) used
by the process. This model takes into account the frequency at which the
process was running(i.e higher power for processes running at higher
frequencies). It requires the cpufreq_stats module to be initialized with
the current numbers for each of the CPU core at each frequency. This will
be initialized during init time.

Ruchi Kandoi (2):
  cpufreq_stats: Adds sysfs file
/sys/devices/system/cpu/cpufreq/current_in_state
  sched: cpufreq: Adds a field cpu_power in the task_struct

 drivers/cpufreq/cpufreq_stats.c | 186 +++-
 include/linux/cpufreq.h |   8 ++
 include/linux/sched.h   |   2 +
 kernel/fork.c   |   1 +
 kernel/sched/cputime.c  |   7 ++
 5 files changed, 202 insertions(+), 2 deletions(-)

-- 
2.2.0.rc0.207.ga3a616c

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] cpufreq_stats: Adds sysfs file /sys/devices/system/cpu/cpufreq/current_in_state

2015-05-14 Thread Ruchi Kandoi
Adds the sysfs file for userspace to initialize the active current
values for all the cores at each of the frequencies.

The format for storing the values is as follows:
echo CPUcpu#:freq1=current in uA freq2=current,CPUcpu#:
...  /sys/devices/system/cpu/cpufreq/current_in_state

Signed-off-by: Ruchi Kandoi kandoiru...@google.com
---
 drivers/cpufreq/cpufreq_stats.c | 163 +++-
 1 file changed, 161 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 5e370a3..6f0b562 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -30,6 +30,14 @@ struct cpufreq_stats {
 #endif
 };
 
+struct cpufreq_power_stats {
+   unsigned int state_num;
+   unsigned int *curr;
+   unsigned int *freq_table;
+};
+
+static DEFINE_PER_CPU(struct cpufreq_power_stats *, cpufreq_power_stats);
+
 static int cpufreq_stats_update(struct cpufreq_stats *stats)
 {
unsigned long long cur_time = get_jiffies_64();
@@ -61,6 +69,87 @@ static ssize_t show_time_in_state(struct cpufreq_policy 
*policy, char *buf)
return len;
 }
 
+static void store_current_value(struct cpufreq_power_stats *powerstats,
+   int freq, int curr)
+{
+   int i;
+
+   /* freq_table doesn't contain any CPU_FREQ_INVALID */
+   for (i = 0; i  powerstats-state_num; i++) {
+   if (powerstats-freq_table[i] == freq) {
+   powerstats-curr[i] = curr;
+   break;
+   }
+   }
+}
+
+static ssize_t store_current_in_state(struct cpufreq_policy *policy,
+   const char *buf, size_t len)
+{
+   char *cp, *cp2, *start, *buffer;
+   unsigned int cpu_num, ret, curr, freq;
+   struct cpufreq_power_stats *powerstats;
+
+   if (!buf || len  0)
+   return len;
+
+   buffer = kzalloc(len + 1, GFP_KERNEL);
+   if (!buffer)
+   return len;
+
+   strncpy(buffer, buf, len);
+   buffer[len] = '\0';
+   cp = buffer;
+   spin_lock(cpufreq_stats_lock);
+   while ((start = strsep(cp, ,))) {
+   ret = sscanf(start, CPU%u:, cpu_num);
+   if (ret != 1 || cpu_num  (num_possible_cpus() - 1)) {
+   ret = -EINVAL;
+   goto error;
+   }
+   powerstats = per_cpu(cpufreq_power_stats, cpu_num);
+   if (!powerstats)
+   continue;
+
+   /* sscanf makes sure that strchr doesn't return a NULL */
+   cp2 = strchr(start, ':') + 1;
+   while ((start = strsep(cp2,  ))) {
+   if (sscanf(start, %u=%u, freq, curr) != 2) {
+   ret = -EINVAL;
+   goto error;
+   }
+   store_current_value(powerstats, freq, curr);
+   }
+   }
+   ret = len;
+error:
+   spin_unlock(cpufreq_stats_lock);
+   kfree(buffer);
+   return ret;
+}
+
+static ssize_t show_current_in_state(struct cpufreq_policy *policy, char *buf)
+{
+   ssize_t len = 0;
+   unsigned int i, cpu;
+   struct cpufreq_power_stats *powerstats;
+
+   spin_lock(cpufreq_stats_lock);
+   for_each_possible_cpu(cpu) {
+   powerstats = per_cpu(cpufreq_power_stats, cpu);
+   if (!powerstats)
+   continue;
+   len += scnprintf(buf + len, PAGE_SIZE - len, CPU%d:, cpu);
+   for (i = 0; i  powerstats-state_num; i++)
+   len += scnprintf(buf + len, PAGE_SIZE - len,
+   %d=%d , powerstats-freq_table[i],
+   powerstats-curr[i]);
+   len += scnprintf(buf + len, PAGE_SIZE - len, \n);
+   }
+   spin_unlock(cpufreq_stats_lock);
+   return len;
+}
+
 #ifdef CONFIG_CPU_FREQ_STAT_DETAILS
 static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
 {
@@ -107,6 +196,7 @@ cpufreq_freq_attr_ro(trans_table);
 
 cpufreq_freq_attr_ro(total_trans);
 cpufreq_freq_attr_ro(time_in_state);
+cpufreq_freq_attr_rw(current_in_state);
 
 static struct attribute *default_attrs[] = {
total_trans.attr,
@@ -159,6 +249,67 @@ static void cpufreq_stats_free_table(unsigned int cpu)
cpufreq_cpu_put(policy);
 }
 
+static void cpufreq_powerstats_free(void)
+{
+   int cpu;
+   struct cpufreq_power_stats *powerstats;
+
+   sysfs_remove_file(cpufreq_global_kobject, current_in_state.attr);
+
+   for_each_possible_cpu(cpu) {
+   powerstats = per_cpu(cpufreq_power_stats, cpu);
+   if (!powerstats)
+   continue;
+   kfree(powerstats-curr);
+   kfree(powerstats);
+   per_cpu(cpufreq_power_stats, cpu) = NULL;
+   }
+}
+
+static void cpufreq_powerstats_create(unsigned int cpu

[PATCH 0/2] Adds cpu power accounting per-pid basis.

2015-05-14 Thread Ruchi Kandoi
These patches add a mechanism which will accurately caculate the CPU power
used by all the processes in the system. In order to account for the power
used by all the processes a data field cpu_power has been added in the
task_struct. This field adds power for both the system as well as user
time. cpu_power contains the total amount of charge(in uAmsec units) used
by the process. This model takes into account the frequency at which the
process was running(i.e higher power for processes running at higher
frequencies). It requires the cpufreq_stats module to be initialized with
the current numbers for each of the CPU core at each frequency. This will
be initialized during init time.

Ruchi Kandoi (2):
  cpufreq_stats: Adds sysfs file
/sys/devices/system/cpu/cpufreq/current_in_state
  sched: cpufreq: Adds a field cpu_power in the task_struct

 drivers/cpufreq/cpufreq_stats.c | 186 +++-
 include/linux/cpufreq.h |   8 ++
 include/linux/sched.h   |   2 +
 kernel/fork.c   |   1 +
 kernel/sched/cputime.c  |   7 ++
 5 files changed, 202 insertions(+), 2 deletions(-)

-- 
2.2.0.rc0.207.ga3a616c

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 2/2] sched: cpufreq: Adds a field cpu_power in the task_struct

2015-05-14 Thread Ruchi Kandoi
cpu_power has been added to keep track of amount of power each task is
consuming. cpu_power is updated whenever stime and utime are updated for
a task. power is computed by taking into account the frequency at which
the current core was running and the current for cpu actively
running at hat frequency.

Signed-off-by: Ruchi Kandoi kandoiru...@google.com
---
 drivers/cpufreq/cpufreq_stats.c | 28 
 include/linux/cpufreq.h |  8 
 include/linux/sched.h   |  2 ++
 kernel/fork.c   |  1 +
 kernel/sched/cputime.c  |  7 +++
 5 files changed, 46 insertions(+)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 6f0b562..682ed898 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -14,6 +14,7 @@
 #include linux/module.h
 #include linux/slab.h
 #include linux/cputime.h
+#include linux/sched.h
 
 static spinlock_t cpufreq_stats_lock;
 
@@ -83,6 +84,33 @@ static void store_current_value(struct cpufreq_power_stats 
*powerstats,
}
 }
 
+void acct_update_power(struct task_struct *task, cputime_t cputime)
+{
+   struct cpufreq_power_stats *powerstats;
+   struct cpufreq_stats *stats;
+   struct cpufreq_policy *policy;
+   unsigned int cpu_num, curr;
+
+   if (!task)
+   return;
+   cpu_num = task_cpu(task);
+   powerstats = per_cpu(cpufreq_power_stats, cpu_num);
+   policy = cpufreq_cpu_get(cpu_num);
+   if (!policy)
+   return;
+
+   if (!powerstats || !(policy-stats)) {
+   cpufreq_cpu_put(policy);
+   return;
+   }
+
+   stats = policy-stats;
+   curr = powerstats-curr[stats-last_index];
+   task-cpu_power += curr * cputime_to_usecs(cputime);
+   cpufreq_cpu_put(policy);
+}
+EXPORT_SYMBOL_GPL(acct_update_power);
+
 static ssize_t store_current_in_state(struct cpufreq_policy *policy,
const char *buf, size_t len)
 {
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 2ee4888..86826c8 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -18,6 +18,7 @@
 #include linux/notifier.h
 #include linux/spinlock.h
 #include linux/sysfs.h
+#include asm/cputime.h
 
 /*
  *CPUFREQ INTERFACE  *
@@ -601,4 +602,11 @@ unsigned int cpufreq_generic_get(unsigned int cpu);
 int cpufreq_generic_init(struct cpufreq_policy *policy,
struct cpufreq_frequency_table *table,
unsigned int transition_latency);
+
+/*
+ * CPUFREQ STATS *
+ */
+
+void acct_update_power(struct task_struct *p, cputime_t cputime);
+
 #endif /* _LINUX_CPUFREQ_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e61..1f2400a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1429,6 +1429,7 @@ struct task_struct {
int __user *clear_child_tid;/* CLONE_CHILD_CLEARTID */
 
cputime_t utime, stime, utimescaled, stimescaled;
+   unsigned long long cpu_power;
cputime_t gtime;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct cputime prev_cputime;
@@ -1441,6 +1442,7 @@ struct task_struct {
VTIME_USER,
VTIME_SYS,
} vtime_snap_whence;
+
 #endif
unsigned long nvcsw, nivcsw; /* context switch counts */
u64 start_time; /* monotonic time in nsec */
diff --git a/kernel/fork.c b/kernel/fork.c
index 03c1eaa..2ca0e9e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1341,6 +1341,7 @@ static struct task_struct *copy_process(unsigned long 
clone_flags,
 
p-utime = p-stime = p-gtime = 0;
p-utimescaled = p-stimescaled = 0;
+   p-cpu_power = 0;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
p-prev_cputime.utime = p-prev_cputime.stime = 0;
 #endif
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 8394b1e..53a79d5 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -4,6 +4,7 @@
 #include linux/kernel_stat.h
 #include linux/static_key.h
 #include linux/context_tracking.h
+#include linux/cpufreq.h
 #include sched.h
 
 
@@ -149,6 +150,9 @@ void account_user_time(struct task_struct *p, cputime_t 
cputime,
 
/* Account for user time used */
acct_account_cputime(p);
+
+   /* Account power usage for user time */
+   acct_update_power(p, cputime);
 }
 
 /*
@@ -199,6 +203,9 @@ void __account_system_time(struct task_struct *p, cputime_t 
cputime,
 
/* Account for system time used */
acct_account_cputime(p);
+
+   /* Account power usage for system time */
+   acct_update_power(p, cputime);
 }
 
 /*
-- 
2.2.0.rc0.207.ga3a616c

[PATCH v2 0/2] Adds cpu power accounting per-pid basis.

2015-05-14 Thread Ruchi Kandoi
These patches add a mechanism which will accurately caculate the CPU power
used by all the processes in the system. In order to account for the power
used by all the processes a data field cpu_power has been added in the
task_struct. This field adds power for both the system as well as user
time. cpu_power contains the total amount of charge(in uAmsec units) used
by the process. This model takes into account the frequency at which the
process was running(i.e higher power for processes running at higher
frequencies). It requires the cpufreq_stats module to be initialized with
the current numbers for each of the CPU core at each frequency. This will
be initialized during init time.

Ruchi Kandoi (2):
  cpufreq_stats: Adds sysfs file
/sys/devices/system/cpu/cpufreq/current_in_state
  sched: cpufreq: Adds a field cpu_power in the task_struct

 drivers/cpufreq/cpufreq_stats.c | 191 +++-
 include/linux/cpufreq.h |   8 ++
 include/linux/sched.h   |   2 +
 kernel/fork.c   |   1 +
 kernel/sched/cputime.c  |   7 ++
 5 files changed, 207 insertions(+), 2 deletions(-)

-- 
2.2.0.rc0.207.ga3a616c

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] sched: cpufreq: Adds a field cpu_power in the task_struct

2015-05-14 Thread Ruchi Kandoi
cpu_power has been added to keep track of amount of power each task is
consuming. cpu_power is updated whenever stime and utime are updated for
a task. power is computed by taking into account the frequency at which
the current core was running and the current for cpu actively
running at hat frequency.

Signed-off-by: Ruchi Kandoi kandoiru...@google.com
---
 drivers/cpufreq/cpufreq_stats.c | 23 +++
 include/linux/cpufreq.h |  8 
 include/linux/sched.h   |  2 ++
 kernel/fork.c   |  1 +
 kernel/sched/cputime.c  |  7 +++
 5 files changed, 41 insertions(+)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 6f0b562..4a0bd9a 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -14,6 +14,7 @@
 #include linux/module.h
 #include linux/slab.h
 #include linux/cputime.h
+#include linux/sched.h
 
 static spinlock_t cpufreq_stats_lock;
 
@@ -83,6 +84,28 @@ static void store_current_value(struct cpufreq_power_stats 
*powerstats,
}
 }
 
+void acct_update_power(struct task_struct *task, cputime_t cputime)
+{
+   struct cpufreq_power_stats *powerstats;
+   struct cpufreq_stats *stats;
+   struct cpufreq_policy *policy;
+   unsigned int cpu_num, curr;
+
+   if (!task)
+   return;
+   cpu_num = task_cpu(task);
+   powerstats = per_cpu(cpufreq_power_stats, cpu_num);
+   policy = cpufreq_cpu_get(cpu_num);
+   if (!powerstats || !policy || !(policy-stats))
+   return;
+
+   stats = policy-stats;
+   curr = powerstats-curr[stats-last_index];
+   task-cpu_power += curr * cputime_to_usecs(cputime);
+   cpufreq_cpu_put(cpu_num);
+}
+EXPORT_SYMBOL_GPL(acct_update_power);
+
 static ssize_t store_current_in_state(struct cpufreq_policy *policy,
const char *buf, size_t len)
 {
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 2ee4888..86826c8 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -18,6 +18,7 @@
 #include linux/notifier.h
 #include linux/spinlock.h
 #include linux/sysfs.h
+#include asm/cputime.h
 
 /*
  *CPUFREQ INTERFACE  *
@@ -601,4 +602,11 @@ unsigned int cpufreq_generic_get(unsigned int cpu);
 int cpufreq_generic_init(struct cpufreq_policy *policy,
struct cpufreq_frequency_table *table,
unsigned int transition_latency);
+
+/*
+ * CPUFREQ STATS *
+ */
+
+void acct_update_power(struct task_struct *p, cputime_t cputime);
+
 #endif /* _LINUX_CPUFREQ_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e61..1f2400a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1429,6 +1429,7 @@ struct task_struct {
int __user *clear_child_tid;/* CLONE_CHILD_CLEARTID */
 
cputime_t utime, stime, utimescaled, stimescaled;
+   unsigned long long cpu_power;
cputime_t gtime;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct cputime prev_cputime;
@@ -1441,6 +1442,7 @@ struct task_struct {
VTIME_USER,
VTIME_SYS,
} vtime_snap_whence;
+
 #endif
unsigned long nvcsw, nivcsw; /* context switch counts */
u64 start_time; /* monotonic time in nsec */
diff --git a/kernel/fork.c b/kernel/fork.c
index 03c1eaa..2ca0e9e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1341,6 +1341,7 @@ static struct task_struct *copy_process(unsigned long 
clone_flags,
 
p-utime = p-stime = p-gtime = 0;
p-utimescaled = p-stimescaled = 0;
+   p-cpu_power = 0;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
p-prev_cputime.utime = p-prev_cputime.stime = 0;
 #endif
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 8394b1e..53a79d5 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -4,6 +4,7 @@
 #include linux/kernel_stat.h
 #include linux/static_key.h
 #include linux/context_tracking.h
+#include linux/cpufreq.h
 #include sched.h
 
 
@@ -149,6 +150,9 @@ void account_user_time(struct task_struct *p, cputime_t 
cputime,
 
/* Account for user time used */
acct_account_cputime(p);
+
+   /* Account power usage for user time */
+   acct_update_power(p, cputime);
 }
 
 /*
@@ -199,6 +203,9 @@ void __account_system_time(struct task_struct *p, cputime_t 
cputime,
 
/* Account for system time used */
acct_account_cputime(p);
+
+   /* Account power usage for system time */
+   acct_update_power(p, cputime);
 }
 
 /*
-- 
2.2.0.rc0.207.ga3a616c

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body

[PATCH v2 1/2] cpufreq_stats: Adds sysfs file /sys/devices/system/cpu/cpufreq/current_in_state

2015-05-14 Thread Ruchi Kandoi
Adds the sysfs file for userspace to initialize the active current
values for all the cores at each of the frequencies.

The format for storing the values is as follows:
echo CPUcpu#:freq1=current in uA freq2=current,CPUcpu#:
...  /sys/devices/system/cpu/cpufreq/current_in_state

Signed-off-by: Ruchi Kandoi kandoiru...@google.com
---
 drivers/cpufreq/cpufreq_stats.c | 163 +++-
 1 file changed, 161 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 5e370a3..6f0b562 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -30,6 +30,14 @@ struct cpufreq_stats {
 #endif
 };
 
+struct cpufreq_power_stats {
+   unsigned int state_num;
+   unsigned int *curr;
+   unsigned int *freq_table;
+};
+
+static DEFINE_PER_CPU(struct cpufreq_power_stats *, cpufreq_power_stats);
+
 static int cpufreq_stats_update(struct cpufreq_stats *stats)
 {
unsigned long long cur_time = get_jiffies_64();
@@ -61,6 +69,87 @@ static ssize_t show_time_in_state(struct cpufreq_policy 
*policy, char *buf)
return len;
 }
 
+static void store_current_value(struct cpufreq_power_stats *powerstats,
+   int freq, int curr)
+{
+   int i;
+
+   /* freq_table doesn't contain any CPU_FREQ_INVALID */
+   for (i = 0; i  powerstats-state_num; i++) {
+   if (powerstats-freq_table[i] == freq) {
+   powerstats-curr[i] = curr;
+   break;
+   }
+   }
+}
+
+static ssize_t store_current_in_state(struct cpufreq_policy *policy,
+   const char *buf, size_t len)
+{
+   char *cp, *cp2, *start, *buffer;
+   unsigned int cpu_num, ret, curr, freq;
+   struct cpufreq_power_stats *powerstats;
+
+   if (!buf || len  0)
+   return len;
+
+   buffer = kzalloc(len + 1, GFP_KERNEL);
+   if (!buffer)
+   return len;
+
+   strncpy(buffer, buf, len);
+   buffer[len] = '\0';
+   cp = buffer;
+   spin_lock(cpufreq_stats_lock);
+   while ((start = strsep(cp, ,))) {
+   ret = sscanf(start, CPU%u:, cpu_num);
+   if (ret != 1 || cpu_num  (num_possible_cpus() - 1)) {
+   ret = -EINVAL;
+   goto error;
+   }
+   powerstats = per_cpu(cpufreq_power_stats, cpu_num);
+   if (!powerstats)
+   continue;
+
+   /* sscanf makes sure that strchr doesn't return a NULL */
+   cp2 = strchr(start, ':') + 1;
+   while ((start = strsep(cp2,  ))) {
+   if (sscanf(start, %u=%u, freq, curr) != 2) {
+   ret = -EINVAL;
+   goto error;
+   }
+   store_current_value(powerstats, freq, curr);
+   }
+   }
+   ret = len;
+error:
+   spin_unlock(cpufreq_stats_lock);
+   kfree(buffer);
+   return ret;
+}
+
+static ssize_t show_current_in_state(struct cpufreq_policy *policy, char *buf)
+{
+   ssize_t len = 0;
+   unsigned int i, cpu;
+   struct cpufreq_power_stats *powerstats;
+
+   spin_lock(cpufreq_stats_lock);
+   for_each_possible_cpu(cpu) {
+   powerstats = per_cpu(cpufreq_power_stats, cpu);
+   if (!powerstats)
+   continue;
+   len += scnprintf(buf + len, PAGE_SIZE - len, CPU%d:, cpu);
+   for (i = 0; i  powerstats-state_num; i++)
+   len += scnprintf(buf + len, PAGE_SIZE - len,
+   %d=%d , powerstats-freq_table[i],
+   powerstats-curr[i]);
+   len += scnprintf(buf + len, PAGE_SIZE - len, \n);
+   }
+   spin_unlock(cpufreq_stats_lock);
+   return len;
+}
+
 #ifdef CONFIG_CPU_FREQ_STAT_DETAILS
 static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
 {
@@ -107,6 +196,7 @@ cpufreq_freq_attr_ro(trans_table);
 
 cpufreq_freq_attr_ro(total_trans);
 cpufreq_freq_attr_ro(time_in_state);
+cpufreq_freq_attr_rw(current_in_state);
 
 static struct attribute *default_attrs[] = {
total_trans.attr,
@@ -159,6 +249,67 @@ static void cpufreq_stats_free_table(unsigned int cpu)
cpufreq_cpu_put(policy);
 }
 
+static void cpufreq_powerstats_free(void)
+{
+   int cpu;
+   struct cpufreq_power_stats *powerstats;
+
+   sysfs_remove_file(cpufreq_global_kobject, current_in_state.attr);
+
+   for_each_possible_cpu(cpu) {
+   powerstats = per_cpu(cpufreq_power_stats, cpu);
+   if (!powerstats)
+   continue;
+   kfree(powerstats-curr);
+   kfree(powerstats);
+   per_cpu(cpufreq_power_stats, cpu) = NULL;
+   }
+}
+
+static void cpufreq_powerstats_create(unsigned int cpu

[PATCH] suspend: Return error when pending wakeup source is found.

2015-05-07 Thread Ruchi Kandoi
If a wakeup source is found to be pending in the last stage of suspend
after syscore suspend then the device doesn't suspend but the error is
not propogated which causes an error in the accounting for the number
of suspend aborts and successful suspends.

Signed-off-by: Ruchi Kandoi 
---
 kernel/power/suspend.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 8d7a1ef..343b4e4 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -366,6 +366,8 @@ static int suspend_enter(suspend_state_t state, bool 
*wakeup)
trace_suspend_resume(TPS("machine_suspend"),
state, false);
events_check_enabled = false;
+   } else if (*wakeup) {
+   error = -EBUSY;
}
syscore_resume();
}
-- 
2.2.0.rc0.207.ga3a616c

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] suspend: Return error when pending wakeup source is found.

2015-05-07 Thread Ruchi Kandoi
If a wakeup source is found to be pending in the last stage of suspend
after syscore suspend then the device doesn't suspend but the error is
not propogated which causes an error in the accounting for the number
of suspend aborts and successful suspends.

Signed-off-by: Ruchi Kandoi kandoiru...@google.com
---
 kernel/power/suspend.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 8d7a1ef..343b4e4 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -366,6 +366,8 @@ static int suspend_enter(suspend_state_t state, bool 
*wakeup)
trace_suspend_resume(TPS(machine_suspend),
state, false);
events_check_enabled = false;
+   } else if (*wakeup) {
+   error = -EBUSY;
}
syscore_resume();
}
-- 
2.2.0.rc0.207.ga3a616c

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3] power: add an API to log wakeup reasons

2014-03-13 Thread Ruchi Kandoi
True, we could create new wakeup sources specifically to
track this information, perhaps as needed once an IRQ is first
observed to trigger a wakeup.

We would want to know which wakeup sources were responsible for the
most recent wakeup, since we keep a timeline of suspend/resume events
with wakeup reasons and durations.  It may require some extra work to
keep track of this information.

Apart from Google, other vendors like Qualcomm and Nvidia have already
introduced similar kinds of logging in their respective interrupt
controller drivers. We would really like to make this a standardized
logging for debugging purposes.



On Thu, Mar 13, 2014 at 6:06 PM, Rafael J. Wysocki  wrote:
> On Thursday, March 13, 2014 05:43:20 PM Ruchi Kandoi wrote:
>> This should be true most of the times.
>>
>> But there might be cases otherwise too.
>>
>> For instance, there was a bug earlier with wi-fi which would cause the
>> system to wake up but not get hold of a wakeup source because there
>> wasn't any work for it to do. In that case, the wakeup sources would
>> not log such an event.
>>
>> Additionally, there could be a situation where an IRQ caused the
>> system to resume from suspend. And since the system was up, a driver
>> could take a wakeup source. In this case we would assume that the
>> driver would have woken the system, but in reality the driver held the
>> wakeup source only because the system was up and did not cause the
>> wake up to happen.
>
> But you can create special wakeup sources associated with interrupts (in
> addition to the existing ones) and use the statistics for those.
>
> It is possible to define wakeup sources that don't correspond to any
> devices.
>
> Rafael
>
>
>> On Thu, Mar 13, 2014 at 3:18 PM, Rafael J. Wysocki  
>> wrote:
>> > Hi,
>> >
>> > I saw the v4, but I don't have it handy, so replying here.
>> >
>> > On Wednesday, March 12, 2014 12:46:38 PM Ruchi Kandoi wrote:
>> >> For power management diagnostic purposes, it is often useful to know
>> >> what interrupts are frequently waking the system from low power
>> >> suspend mode, especially on battery-powered consumer electronics
>> >> devices that are expected to spend much of their time in low-power
>> >> suspend while not in active use.  For example, reduced battery life on
>> >> a mobile phone may be caused in part by frequent wakeups by broadcast
>> >> traffic on a busy wireless LAN even while the screen is off and the
>> >> phone not in active use.
>> >>
>> >> Add API log_wakeup_reason() exposes it to userspace via the sysfs path
>> >> /sys/kernel/wakeup_reasons/last_resume_reason. This API would be called
>> >> from the paltform specific, or from the driver for the interrupt 
>> >> controller,
>> >> when the system resumes because of an IRQ. It logs the reasons which 
>> >> caused
>> >> the system to wakeup from the low-power mode.
>> >
>> > So what exactly is wrong with using wakeup sources for this purpose?
>> >
>> > --
>> > I speak only for myself.
>> > Rafael J. Wysocki, Intel Open Source Technology Center.
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
>
> --
> I speak only for myself.
> Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3] power: add an API to log wakeup reasons

2014-03-13 Thread Ruchi Kandoi
This should be true most of the times.

But there might be cases otherwise too.

For instance, there was a bug earlier with wi-fi which would cause the
system to wake up but not get hold of a wakeup source because there
wasn't any work for it to do. In that case, the wakeup sources would
not log such an event.

Additionally, there could be a situation where an IRQ caused the
system to resume from suspend. And since the system was up, a driver
could take a wakeup source. In this case we would assume that the
driver would have woken the system, but in reality the driver held the
wakeup source only because the system was up and did not cause the
wake up to happen.

Regards,
Ruchi Kandoi

On Thu, Mar 13, 2014 at 3:18 PM, Rafael J. Wysocki  wrote:
> Hi,
>
> I saw the v4, but I don't have it handy, so replying here.
>
> On Wednesday, March 12, 2014 12:46:38 PM Ruchi Kandoi wrote:
>> For power management diagnostic purposes, it is often useful to know
>> what interrupts are frequently waking the system from low power
>> suspend mode, especially on battery-powered consumer electronics
>> devices that are expected to spend much of their time in low-power
>> suspend while not in active use.  For example, reduced battery life on
>> a mobile phone may be caused in part by frequent wakeups by broadcast
>> traffic on a busy wireless LAN even while the screen is off and the
>> phone not in active use.
>>
>> Add API log_wakeup_reason() exposes it to userspace via the sysfs path
>> /sys/kernel/wakeup_reasons/last_resume_reason. This API would be called
>> from the paltform specific, or from the driver for the interrupt controller,
>> when the system resumes because of an IRQ. It logs the reasons which caused
>> the system to wakeup from the low-power mode.
>
> So what exactly is wrong with using wakeup sources for this purpose?
>
> --
> I speak only for myself.
> Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4] power: add an API to log wakeup reasons

2014-03-13 Thread Ruchi Kandoi
For power management diagnostic purposes, it is often useful to know
what interrupts are frequently waking the system from low power
suspend mode, especially on battery-powered consumer electronics
devices that are expected to spend much of their time in low-power
suspend while not in active use.  For example, reduced battery life on
a mobile phone may be caused in part by frequent wakeups by broadcast
traffic on a busy wireless LAN even while the screen is off and the
phone not in active use.

Add API log_wakeup_reason() exposes it to userspace via the sysfs path
/sys/kernel/wakeup_reasons/last_resume_reason. This API would be called
from the paltform specific, or from the driver for the interrupt controller,
when the system resumes because of an IRQ. It logs the reasons which caused
the system to wakeup from the low-power mode.

This information can be collected by userspace as part of the accounting
kept on power consumption.

Signed-off-by: Ruchi Kandoi 
Signed-off-by: Greg Hackmann 
---
space added after prefix in the pr_fmt()
---
 include/linux/wakeup_reason.h |  23 +++
 kernel/power/Makefile |   2 +-
 kernel/power/wakeup_reason.c  | 141 ++
 3 files changed, 165 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/wakeup_reason.h
 create mode 100644 kernel/power/wakeup_reason.c

diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h
new file mode 100644
index 000..7ce50f0
--- /dev/null
+++ b/include/linux/wakeup_reason.h
@@ -0,0 +1,23 @@
+/*
+ * include/linux/wakeup_reason.h
+ *
+ * Logs the reason which caused the kernel to resume
+ * from the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_WAKEUP_REASON_H
+#define _LINUX_WAKEUP_REASON_H
+
+void log_wakeup_reason(int irq);
+
+#endif /* _LINUX_WAKEUP_REASON_H */
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 29472bf..f98f021 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -5,7 +5,7 @@ obj-y   += qos.o
 obj-$(CONFIG_PM)   += main.o
 obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o
 obj-$(CONFIG_FREEZER)  += process.o
-obj-$(CONFIG_SUSPEND)  += suspend.o
+obj-$(CONFIG_SUSPEND)  += suspend.o wakeup_reason.o
 obj-$(CONFIG_PM_TEST_SUSPEND)  += suspend_test.o
 obj-$(CONFIG_HIBERNATION)  += hibernate.o snapshot.o swap.o user.o \
   block_io.o
diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c
new file mode 100644
index 000..c4ab205
--- /dev/null
+++ b/kernel/power/wakeup_reason.c
@@ -0,0 +1,141 @@
+/*
+ * kernel/power/wakeup_reason.c
+ *
+ * Logs the reasons which caused the kernel to resume from
+ * the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#define pr_fmt(fmt) "wakeup_reason: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+
+#define MAX_WAKEUP_REASON_IRQS 32
+static int irq_list[MAX_WAKEUP_REASON_IRQS];
+static int irqcount;
+static struct kobject *wakeup_reason;
+static spinlock_t resume_reason_lock;
+
+static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr,
+   char *buf)
+{
+   int irq_no, buf_offset = 0;
+   struct irq_desc *desc;
+   spin_lock(_reason_lock);
+   for (irq_no = 0; irq_no < irqcount; irq_no++) {
+   desc = irq_to_desc(irq_list[irq_no]);
+   if (desc && desc->action && desc->action->name)
+   buf_offset += sprintf(buf + buf_offset, "%d %s\n",
+   irq_list[irq_no], desc->action->name);
+   else
+   buf_offset += sprintf(buf + buf_offset, "%d\n",
+   irq_list[irq_no]);
+   }
+   spin_unlock(_reason_lock);
+   return buf_offset;
+}
+
+static struct kobj_attribute resume_reason = __ATTR(last_re

[PATCH v4] power: add an API to log wakeup reasons

2014-03-13 Thread Ruchi Kandoi
For power management diagnostic purposes, it is often useful to know
what interrupts are frequently waking the system from low power
suspend mode, especially on battery-powered consumer electronics
devices that are expected to spend much of their time in low-power
suspend while not in active use.  For example, reduced battery life on
a mobile phone may be caused in part by frequent wakeups by broadcast
traffic on a busy wireless LAN even while the screen is off and the
phone not in active use.

Add API log_wakeup_reason() exposes it to userspace via the sysfs path
/sys/kernel/wakeup_reasons/last_resume_reason. This API would be called
from the paltform specific, or from the driver for the interrupt controller,
when the system resumes because of an IRQ. It logs the reasons which caused
the system to wakeup from the low-power mode.

This information can be collected by userspace as part of the accounting
kept on power consumption.

Signed-off-by: Ruchi Kandoi kandoiru...@google.com
Signed-off-by: Greg Hackmann ghackm...@google.com
---
space added after prefix in the pr_fmt()
---
 include/linux/wakeup_reason.h |  23 +++
 kernel/power/Makefile |   2 +-
 kernel/power/wakeup_reason.c  | 141 ++
 3 files changed, 165 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/wakeup_reason.h
 create mode 100644 kernel/power/wakeup_reason.c

diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h
new file mode 100644
index 000..7ce50f0
--- /dev/null
+++ b/include/linux/wakeup_reason.h
@@ -0,0 +1,23 @@
+/*
+ * include/linux/wakeup_reason.h
+ *
+ * Logs the reason which caused the kernel to resume
+ * from the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_WAKEUP_REASON_H
+#define _LINUX_WAKEUP_REASON_H
+
+void log_wakeup_reason(int irq);
+
+#endif /* _LINUX_WAKEUP_REASON_H */
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 29472bf..f98f021 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -5,7 +5,7 @@ obj-y   += qos.o
 obj-$(CONFIG_PM)   += main.o
 obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o
 obj-$(CONFIG_FREEZER)  += process.o
-obj-$(CONFIG_SUSPEND)  += suspend.o
+obj-$(CONFIG_SUSPEND)  += suspend.o wakeup_reason.o
 obj-$(CONFIG_PM_TEST_SUSPEND)  += suspend_test.o
 obj-$(CONFIG_HIBERNATION)  += hibernate.o snapshot.o swap.o user.o \
   block_io.o
diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c
new file mode 100644
index 000..c4ab205
--- /dev/null
+++ b/kernel/power/wakeup_reason.c
@@ -0,0 +1,141 @@
+/*
+ * kernel/power/wakeup_reason.c
+ *
+ * Logs the reasons which caused the kernel to resume from
+ * the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#define pr_fmt(fmt) wakeup_reason:  fmt
+
+#include linux/wakeup_reason.h
+#include linux/kernel.h
+#include linux/irq.h
+#include linux/interrupt.h
+#include linux/io.h
+#include linux/kobject.h
+#include linux/sysfs.h
+#include linux/init.h
+#include linux/spinlock.h
+#include linux/notifier.h
+#include linux/suspend.h
+
+
+#define MAX_WAKEUP_REASON_IRQS 32
+static int irq_list[MAX_WAKEUP_REASON_IRQS];
+static int irqcount;
+static struct kobject *wakeup_reason;
+static spinlock_t resume_reason_lock;
+
+static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr,
+   char *buf)
+{
+   int irq_no, buf_offset = 0;
+   struct irq_desc *desc;
+   spin_lock(resume_reason_lock);
+   for (irq_no = 0; irq_no  irqcount; irq_no++) {
+   desc = irq_to_desc(irq_list[irq_no]);
+   if (desc  desc-action  desc-action-name)
+   buf_offset += sprintf(buf + buf_offset, %d %s\n,
+   irq_list[irq_no], desc-action-name);
+   else
+   buf_offset += sprintf(buf + buf_offset, %d\n,
+   irq_list[irq_no

Re: [PATCH v3] power: add an API to log wakeup reasons

2014-03-13 Thread Ruchi Kandoi
This should be true most of the times.

But there might be cases otherwise too.

For instance, there was a bug earlier with wi-fi which would cause the
system to wake up but not get hold of a wakeup source because there
wasn't any work for it to do. In that case, the wakeup sources would
not log such an event.

Additionally, there could be a situation where an IRQ caused the
system to resume from suspend. And since the system was up, a driver
could take a wakeup source. In this case we would assume that the
driver would have woken the system, but in reality the driver held the
wakeup source only because the system was up and did not cause the
wake up to happen.

Regards,
Ruchi Kandoi

On Thu, Mar 13, 2014 at 3:18 PM, Rafael J. Wysocki r...@rjwysocki.net wrote:
 Hi,

 I saw the v4, but I don't have it handy, so replying here.

 On Wednesday, March 12, 2014 12:46:38 PM Ruchi Kandoi wrote:
 For power management diagnostic purposes, it is often useful to know
 what interrupts are frequently waking the system from low power
 suspend mode, especially on battery-powered consumer electronics
 devices that are expected to spend much of their time in low-power
 suspend while not in active use.  For example, reduced battery life on
 a mobile phone may be caused in part by frequent wakeups by broadcast
 traffic on a busy wireless LAN even while the screen is off and the
 phone not in active use.

 Add API log_wakeup_reason() exposes it to userspace via the sysfs path
 /sys/kernel/wakeup_reasons/last_resume_reason. This API would be called
 from the paltform specific, or from the driver for the interrupt controller,
 when the system resumes because of an IRQ. It logs the reasons which caused
 the system to wakeup from the low-power mode.

 So what exactly is wrong with using wakeup sources for this purpose?

 --
 I speak only for myself.
 Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3] power: add an API to log wakeup reasons

2014-03-13 Thread Ruchi Kandoi
True, we could create new wakeup sources specifically to
track this information, perhaps as needed once an IRQ is first
observed to trigger a wakeup.

We would want to know which wakeup sources were responsible for the
most recent wakeup, since we keep a timeline of suspend/resume events
with wakeup reasons and durations.  It may require some extra work to
keep track of this information.

Apart from Google, other vendors like Qualcomm and Nvidia have already
introduced similar kinds of logging in their respective interrupt
controller drivers. We would really like to make this a standardized
logging for debugging purposes.



On Thu, Mar 13, 2014 at 6:06 PM, Rafael J. Wysocki r...@rjwysocki.net wrote:
 On Thursday, March 13, 2014 05:43:20 PM Ruchi Kandoi wrote:
 This should be true most of the times.

 But there might be cases otherwise too.

 For instance, there was a bug earlier with wi-fi which would cause the
 system to wake up but not get hold of a wakeup source because there
 wasn't any work for it to do. In that case, the wakeup sources would
 not log such an event.

 Additionally, there could be a situation where an IRQ caused the
 system to resume from suspend. And since the system was up, a driver
 could take a wakeup source. In this case we would assume that the
 driver would have woken the system, but in reality the driver held the
 wakeup source only because the system was up and did not cause the
 wake up to happen.

 But you can create special wakeup sources associated with interrupts (in
 addition to the existing ones) and use the statistics for those.

 It is possible to define wakeup sources that don't correspond to any
 devices.

 Rafael


 On Thu, Mar 13, 2014 at 3:18 PM, Rafael J. Wysocki r...@rjwysocki.net 
 wrote:
  Hi,
 
  I saw the v4, but I don't have it handy, so replying here.
 
  On Wednesday, March 12, 2014 12:46:38 PM Ruchi Kandoi wrote:
  For power management diagnostic purposes, it is often useful to know
  what interrupts are frequently waking the system from low power
  suspend mode, especially on battery-powered consumer electronics
  devices that are expected to spend much of their time in low-power
  suspend while not in active use.  For example, reduced battery life on
  a mobile phone may be caused in part by frequent wakeups by broadcast
  traffic on a busy wireless LAN even while the screen is off and the
  phone not in active use.
 
  Add API log_wakeup_reason() exposes it to userspace via the sysfs path
  /sys/kernel/wakeup_reasons/last_resume_reason. This API would be called
  from the paltform specific, or from the driver for the interrupt 
  controller,
  when the system resumes because of an IRQ. It logs the reasons which 
  caused
  the system to wakeup from the low-power mode.
 
  So what exactly is wrong with using wakeup sources for this purpose?
 
  --
  I speak only for myself.
  Rafael J. Wysocki, Intel Open Source Technology Center.
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/

 --
 I speak only for myself.
 Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3] power: add an API to log wakeup reasons

2014-03-12 Thread Ruchi Kandoi
For power management diagnostic purposes, it is often useful to know
what interrupts are frequently waking the system from low power
suspend mode, especially on battery-powered consumer electronics
devices that are expected to spend much of their time in low-power
suspend while not in active use.  For example, reduced battery life on
a mobile phone may be caused in part by frequent wakeups by broadcast
traffic on a busy wireless LAN even while the screen is off and the
phone not in active use.

Add API log_wakeup_reason() exposes it to userspace via the sysfs path
/sys/kernel/wakeup_reasons/last_resume_reason. This API would be called
from the paltform specific, or from the driver for the interrupt controller,
when the system resumes because of an IRQ. It logs the reasons which caused
the system to wakeup from the low-power mode.

This information can be collected by userspace as part of the accounting
kept on power consumption.

Signed-off-by: Ruchi Kandoi 
Signed-off-by: Greg Hackmann 
---
commit message changed for clarity
---
 include/linux/wakeup_reason.h |  23 +++
 kernel/power/Makefile |   2 +-
 kernel/power/wakeup_reason.c  | 141 ++
 3 files changed, 165 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/wakeup_reason.h
 create mode 100644 kernel/power/wakeup_reason.c

diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h
new file mode 100644
index 000..7ce50f0
--- /dev/null
+++ b/include/linux/wakeup_reason.h
@@ -0,0 +1,23 @@
+/*
+ * include/linux/wakeup_reason.h
+ *
+ * Logs the reason which caused the kernel to resume
+ * from the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_WAKEUP_REASON_H
+#define _LINUX_WAKEUP_REASON_H
+
+void log_wakeup_reason(int irq);
+
+#endif /* _LINUX_WAKEUP_REASON_H */
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 29472bf..f98f021 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -5,7 +5,7 @@ obj-y   += qos.o
 obj-$(CONFIG_PM)   += main.o
 obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o
 obj-$(CONFIG_FREEZER)  += process.o
-obj-$(CONFIG_SUSPEND)  += suspend.o
+obj-$(CONFIG_SUSPEND)  += suspend.o wakeup_reason.o
 obj-$(CONFIG_PM_TEST_SUSPEND)  += suspend_test.o
 obj-$(CONFIG_HIBERNATION)  += hibernate.o snapshot.o swap.o user.o \
   block_io.o
diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c
new file mode 100644
index 000..a21c592
--- /dev/null
+++ b/kernel/power/wakeup_reason.c
@@ -0,0 +1,141 @@
+/*
+ * kernel/power/wakeup_reason.c
+ *
+ * Logs the reasons which caused the kernel to resume from
+ * the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#define pr_fmt(fmt) "wakeup_reason:" fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+
+#define MAX_WAKEUP_REASON_IRQS 32
+static int irq_list[MAX_WAKEUP_REASON_IRQS];
+static int irqcount;
+static struct kobject *wakeup_reason;
+static spinlock_t resume_reason_lock;
+
+static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr,
+   char *buf)
+{
+   int irq_no, buf_offset = 0;
+   struct irq_desc *desc;
+   spin_lock(_reason_lock);
+   for (irq_no = 0; irq_no < irqcount; irq_no++) {
+   desc = irq_to_desc(irq_list[irq_no]);
+   if (desc && desc->action && desc->action->name)
+   buf_offset += sprintf(buf + buf_offset, "%d %s\n",
+   irq_list[irq_no], desc->action->name);
+   else
+   buf_offset += sprintf(buf + buf_offset, "%d\n",
+   irq_list[irq_no]);
+   }
+   spin_unlock(_reason_lock);
+   return buf_offset;
+}
+
+static struct kobj_attribute resume_reason = __ATTR(last_resume_re

[PATCH v3] power: add an API to log wakeup reasons

2014-03-12 Thread Ruchi Kandoi
For power management diagnostic purposes, it is often useful to know
what interrupts are frequently waking the system from low power
suspend mode, especially on battery-powered consumer electronics
devices that are expected to spend much of their time in low-power
suspend while not in active use.  For example, reduced battery life on
a mobile phone may be caused in part by frequent wakeups by broadcast
traffic on a busy wireless LAN even while the screen is off and the
phone not in active use.

Add API log_wakeup_reason() exposes it to userspace via the sysfs path
/sys/kernel/wakeup_reasons/last_resume_reason. This API would be called
from the paltform specific, or from the driver for the interrupt controller,
when the system resumes because of an IRQ. It logs the reasons which caused
the system to wakeup from the low-power mode.

This information can be collected by userspace as part of the accounting
kept on power consumption.

Signed-off-by: Ruchi Kandoi kandoiru...@google.com
Signed-off-by: Greg Hackmann ghackm...@google.com
---
commit message changed for clarity
---
 include/linux/wakeup_reason.h |  23 +++
 kernel/power/Makefile |   2 +-
 kernel/power/wakeup_reason.c  | 141 ++
 3 files changed, 165 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/wakeup_reason.h
 create mode 100644 kernel/power/wakeup_reason.c

diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h
new file mode 100644
index 000..7ce50f0
--- /dev/null
+++ b/include/linux/wakeup_reason.h
@@ -0,0 +1,23 @@
+/*
+ * include/linux/wakeup_reason.h
+ *
+ * Logs the reason which caused the kernel to resume
+ * from the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_WAKEUP_REASON_H
+#define _LINUX_WAKEUP_REASON_H
+
+void log_wakeup_reason(int irq);
+
+#endif /* _LINUX_WAKEUP_REASON_H */
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 29472bf..f98f021 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -5,7 +5,7 @@ obj-y   += qos.o
 obj-$(CONFIG_PM)   += main.o
 obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o
 obj-$(CONFIG_FREEZER)  += process.o
-obj-$(CONFIG_SUSPEND)  += suspend.o
+obj-$(CONFIG_SUSPEND)  += suspend.o wakeup_reason.o
 obj-$(CONFIG_PM_TEST_SUSPEND)  += suspend_test.o
 obj-$(CONFIG_HIBERNATION)  += hibernate.o snapshot.o swap.o user.o \
   block_io.o
diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c
new file mode 100644
index 000..a21c592
--- /dev/null
+++ b/kernel/power/wakeup_reason.c
@@ -0,0 +1,141 @@
+/*
+ * kernel/power/wakeup_reason.c
+ *
+ * Logs the reasons which caused the kernel to resume from
+ * the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#define pr_fmt(fmt) wakeup_reason: fmt
+
+#include linux/wakeup_reason.h
+#include linux/kernel.h
+#include linux/irq.h
+#include linux/interrupt.h
+#include linux/io.h
+#include linux/kobject.h
+#include linux/sysfs.h
+#include linux/init.h
+#include linux/spinlock.h
+#include linux/notifier.h
+#include linux/suspend.h
+
+
+#define MAX_WAKEUP_REASON_IRQS 32
+static int irq_list[MAX_WAKEUP_REASON_IRQS];
+static int irqcount;
+static struct kobject *wakeup_reason;
+static spinlock_t resume_reason_lock;
+
+static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr,
+   char *buf)
+{
+   int irq_no, buf_offset = 0;
+   struct irq_desc *desc;
+   spin_lock(resume_reason_lock);
+   for (irq_no = 0; irq_no  irqcount; irq_no++) {
+   desc = irq_to_desc(irq_list[irq_no]);
+   if (desc  desc-action  desc-action-name)
+   buf_offset += sprintf(buf + buf_offset, %d %s\n,
+   irq_list[irq_no], desc-action-name);
+   else
+   buf_offset += sprintf(buf + buf_offset, %d\n,
+   irq_list[irq_no

Re: [PATCH] power: add an API to log wakeup reasons

2014-03-11 Thread Ruchi Kandoi
This API would be called from the platform specific code, or the
driver for the interrupt controller, when the system resumes from the
suspend because of an IRQ.

We track the reasons for which systems wake up from the low power
suspend mode. This is especially important on battery-powered consumer
electronic devices.

Analyzing the data helps us figure what caused the maximum wake ups
and if something can be done about the same to improve the battery
life.
For instances, if the wi-fi network traffic or the radio traffic
causes the system to frequently wakeup from the low-power mode.

This is already in use on some Android devices. We are trying to make
this a generic API which could be called by other platforms as well,
standardizing the format in which the info is
logged in dmesg and the format of the info exported to userspace for
collecting power management statistics.

Thanking you,
Ruchi Kandoi

On Tue, Mar 11, 2014 at 12:32 PM, Rafael J. Wysocki  wrote:
> On Monday, March 10, 2014 07:02:02 PM Ruchi Kandoi wrote:
>> Add API log_wakeup_reason() and expose it to userspace via sysfs path
>> /sys/kernel/wakeup_reasons/last_resume_reason
>> This is useful for power management diagnostic purposes.
>
> What's the use case and how is it supposed to work?
>
>> Signed-off-by: Ruchi Kandoi 
>> Signed-off-by: Greg Hackmann 
>> ---
>>  include/linux/wakeup_reason.h |  23 +++
>>  kernel/power/Makefile |   2 +-
>>  kernel/power/wakeup_reason.c  | 140 
>> ++
>>  3 files changed, 164 insertions(+), 1 deletion(-)
>>  create mode 100644 include/linux/wakeup_reason.h
>>  create mode 100644 kernel/power/wakeup_reason.c
>>
>> diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h
>> new file mode 100644
>> index 000..7ce50f0
>> --- /dev/null
>> +++ b/include/linux/wakeup_reason.h
>> @@ -0,0 +1,23 @@
>> +/*
>> + * include/linux/wakeup_reason.h
>> + *
>> + * Logs the reason which caused the kernel to resume
>> + * from the suspend mode.
>> + *
>> + * Copyright (C) 2014 Google, Inc.
>> + * This software is licensed under the terms of the GNU General Public
>> + * License version 2, as published by the Free Software Foundation, and
>> + * may be copied, distributed, and modified under those terms.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +
>> +#ifndef _LINUX_WAKEUP_REASON_H
>> +#define _LINUX_WAKEUP_REASON_H
>> +
>> +void log_wakeup_reason(int irq);
>> +
>> +#endif /* _LINUX_WAKEUP_REASON_H */
>> diff --git a/kernel/power/Makefile b/kernel/power/Makefile
>> index 29472bf..f98f021 100644
>> --- a/kernel/power/Makefile
>> +++ b/kernel/power/Makefile
>> @@ -5,7 +5,7 @@ obj-y += qos.o
>>  obj-$(CONFIG_PM) += main.o
>>  obj-$(CONFIG_VT_CONSOLE_SLEEP)   += console.o
>>  obj-$(CONFIG_FREEZER)+= process.o
>> -obj-$(CONFIG_SUSPEND)+= suspend.o
>> +obj-$(CONFIG_SUSPEND)+= suspend.o wakeup_reason.o
>>  obj-$(CONFIG_PM_TEST_SUSPEND)+= suspend_test.o
>>  obj-$(CONFIG_HIBERNATION)+= hibernate.o snapshot.o swap.o user.o \
>>  block_io.o
>> diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c
>> new file mode 100644
>> index 000..188a6bf
>> --- /dev/null
>> +++ b/kernel/power/wakeup_reason.c
>> @@ -0,0 +1,140 @@
>> +/*
>> + * kernel/power/wakeup_reason.c
>> + *
>> + * Logs the reasons which caused the kernel to resume from
>> + * the suspend mode.
>> + *
>> + * Copyright (C) 2014 Google, Inc.
>> + * This software is licensed under the terms of the GNU General Public
>> + * License version 2, as published by the Free Software Foundation, and
>> + * may be copied, distributed, and modified under those terms.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +

Re: [PATCH] power: add an API to log wakeup reasons

2014-03-11 Thread Ruchi Kandoi
This API would be called from the platform specific code, or the
driver for the interrupt controller, when the system resumes from the
suspend because of an IRQ.

We track the reasons for which systems wake up from the low power
suspend mode. This is especially important on battery-powered consumer
electronic devices.

Analyzing the data helps us figure what caused the maximum wake ups
and if something can be done about the same to improve the battery
life.
For instances, if the wi-fi network traffic or the radio traffic
causes the system to frequently wakeup from the low-power mode.

This is already in use on some Android devices. We are trying to make
this a generic API which could be called by other platforms as well,
standardizing the format in which the info is
logged in dmesg and the format of the info exported to userspace for
collecting power management statistics.

Thanking you,
Ruchi Kandoi

On Tue, Mar 11, 2014 at 12:32 PM, Rafael J. Wysocki r...@rjwysocki.net wrote:
 On Monday, March 10, 2014 07:02:02 PM Ruchi Kandoi wrote:
 Add API log_wakeup_reason() and expose it to userspace via sysfs path
 /sys/kernel/wakeup_reasons/last_resume_reason
 This is useful for power management diagnostic purposes.

 What's the use case and how is it supposed to work?

 Signed-off-by: Ruchi Kandoi kandoiru...@google.com
 Signed-off-by: Greg Hackmann ghackm...@google.com
 ---
  include/linux/wakeup_reason.h |  23 +++
  kernel/power/Makefile |   2 +-
  kernel/power/wakeup_reason.c  | 140 
 ++
  3 files changed, 164 insertions(+), 1 deletion(-)
  create mode 100644 include/linux/wakeup_reason.h
  create mode 100644 kernel/power/wakeup_reason.c

 diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h
 new file mode 100644
 index 000..7ce50f0
 --- /dev/null
 +++ b/include/linux/wakeup_reason.h
 @@ -0,0 +1,23 @@
 +/*
 + * include/linux/wakeup_reason.h
 + *
 + * Logs the reason which caused the kernel to resume
 + * from the suspend mode.
 + *
 + * Copyright (C) 2014 Google, Inc.
 + * This software is licensed under the terms of the GNU General Public
 + * License version 2, as published by the Free Software Foundation, and
 + * may be copied, distributed, and modified under those terms.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + */
 +
 +#ifndef _LINUX_WAKEUP_REASON_H
 +#define _LINUX_WAKEUP_REASON_H
 +
 +void log_wakeup_reason(int irq);
 +
 +#endif /* _LINUX_WAKEUP_REASON_H */
 diff --git a/kernel/power/Makefile b/kernel/power/Makefile
 index 29472bf..f98f021 100644
 --- a/kernel/power/Makefile
 +++ b/kernel/power/Makefile
 @@ -5,7 +5,7 @@ obj-y += qos.o
  obj-$(CONFIG_PM) += main.o
  obj-$(CONFIG_VT_CONSOLE_SLEEP)   += console.o
  obj-$(CONFIG_FREEZER)+= process.o
 -obj-$(CONFIG_SUSPEND)+= suspend.o
 +obj-$(CONFIG_SUSPEND)+= suspend.o wakeup_reason.o
  obj-$(CONFIG_PM_TEST_SUSPEND)+= suspend_test.o
  obj-$(CONFIG_HIBERNATION)+= hibernate.o snapshot.o swap.o user.o \
  block_io.o
 diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c
 new file mode 100644
 index 000..188a6bf
 --- /dev/null
 +++ b/kernel/power/wakeup_reason.c
 @@ -0,0 +1,140 @@
 +/*
 + * kernel/power/wakeup_reason.c
 + *
 + * Logs the reasons which caused the kernel to resume from
 + * the suspend mode.
 + *
 + * Copyright (C) 2014 Google, Inc.
 + * This software is licensed under the terms of the GNU General Public
 + * License version 2, as published by the Free Software Foundation, and
 + * may be copied, distributed, and modified under those terms.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + */
 +
 +#include linux/wakeup_reason.h
 +#include linux/kernel.h
 +#include linux/irq.h
 +#include linux/interrupt.h
 +#include linux/io.h
 +#include linux/kobject.h
 +#include linux/sysfs.h
 +#include linux/init.h
 +#include linux/spinlock.h
 +#include linux/notifier.h
 +#include linux/suspend.h
 +
 +
 +#define MAX_WAKEUP_REASON_IRQS 32
 +static int irq_list[MAX_WAKEUP_REASON_IRQS];
 +static int irqcount;
 +static struct kobject *wakeup_reason;
 +static spinlock_t resume_reason_lock;
 +
 +static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute 
 *attr,
 + char *buf)
 +{
 + int irq_no, buf_offset = 0;
 + struct irq_desc *desc;
 + spin_lock(resume_reason_lock);
 + for (irq_no = 0; irq_no  irqcount; irq_no++) {
 + desc = irq_to_desc(irq_list[irq_no

[PATCH] power: add an API to log wakeup reasons

2014-03-10 Thread Ruchi Kandoi
Add API log_wakeup_reason() and expose it to userspace via sysfs path
/sys/kernel/wakeup_reasons/last_resume_reason
This is useful for power management diagnostic purposes.

Signed-off-by: Ruchi Kandoi 
Signed-off-by: Greg Hackmann 
---
 include/linux/wakeup_reason.h |  23 +++
 kernel/power/Makefile |   2 +-
 kernel/power/wakeup_reason.c  | 140 ++
 3 files changed, 164 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/wakeup_reason.h
 create mode 100644 kernel/power/wakeup_reason.c

diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h
new file mode 100644
index 000..7ce50f0
--- /dev/null
+++ b/include/linux/wakeup_reason.h
@@ -0,0 +1,23 @@
+/*
+ * include/linux/wakeup_reason.h
+ *
+ * Logs the reason which caused the kernel to resume
+ * from the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_WAKEUP_REASON_H
+#define _LINUX_WAKEUP_REASON_H
+
+void log_wakeup_reason(int irq);
+
+#endif /* _LINUX_WAKEUP_REASON_H */
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 29472bf..f98f021 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -5,7 +5,7 @@ obj-y   += qos.o
 obj-$(CONFIG_PM)   += main.o
 obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o
 obj-$(CONFIG_FREEZER)  += process.o
-obj-$(CONFIG_SUSPEND)  += suspend.o
+obj-$(CONFIG_SUSPEND)  += suspend.o wakeup_reason.o
 obj-$(CONFIG_PM_TEST_SUSPEND)  += suspend_test.o
 obj-$(CONFIG_HIBERNATION)  += hibernate.o snapshot.o swap.o user.o \
   block_io.o
diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c
new file mode 100644
index 000..188a6bf
--- /dev/null
+++ b/kernel/power/wakeup_reason.c
@@ -0,0 +1,140 @@
+/*
+ * kernel/power/wakeup_reason.c
+ *
+ * Logs the reasons which caused the kernel to resume from
+ * the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+
+#define MAX_WAKEUP_REASON_IRQS 32
+static int irq_list[MAX_WAKEUP_REASON_IRQS];
+static int irqcount;
+static struct kobject *wakeup_reason;
+static spinlock_t resume_reason_lock;
+
+static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr,
+   char *buf)
+{
+   int irq_no, buf_offset = 0;
+   struct irq_desc *desc;
+   spin_lock(_reason_lock);
+   for (irq_no = 0; irq_no < irqcount; irq_no++) {
+   desc = irq_to_desc(irq_list[irq_no]);
+   if (desc && desc->action && desc->action->name)
+   buf_offset += sprintf(buf + buf_offset, "%d %s\n",
+   irq_list[irq_no], desc->action->name);
+   else
+   buf_offset += sprintf(buf + buf_offset, "%d\n",
+   irq_list[irq_no]);
+   }
+   spin_unlock(_reason_lock);
+   return buf_offset;
+}
+
+static struct kobj_attribute resume_reason = __ATTR(last_resume_reason, 0666,
+   reason_show, NULL);
+
+static struct attribute *attrs[] = {
+   _reason.attr,
+   NULL,
+};
+static struct attribute_group attr_group = {
+   .attrs = attrs,
+};
+
+/*
+ * logs all the wake up reasons to the kernel
+ * stores the irqs to expose them to the userspace via sysfs
+ */
+void log_wakeup_reason(int irq)
+{
+   struct irq_desc *desc;
+   desc = irq_to_desc(irq);
+   if (desc && desc->action && desc->action->name)
+   printk(KERN_INFO "Resume caused by IRQ %d, %s\n", irq,
+   desc->action->name);
+   else
+   printk(KERN_INFO "Resume caused by IRQ %d\n", irq);
+
+   spin_lock(_reason_lock);
+   if (irqcount == MAX_WAKEUP_REASON_IRQS) {
+  

[PATCH] power: add an API to log wakeup reasons

2014-03-10 Thread Ruchi Kandoi
Add API log_wakeup_reason() and expose it to userspace via sysfs path
/sys/kernel/wakeup_reasons/last_resume_reason
This is useful for power management diagnostic purposes.

Signed-off-by: Ruchi Kandoi kandoiru...@google.com
Signed-off-by: Greg Hackmann ghackm...@google.com
---
 include/linux/wakeup_reason.h |  23 +++
 kernel/power/Makefile |   2 +-
 kernel/power/wakeup_reason.c  | 140 ++
 3 files changed, 164 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/wakeup_reason.h
 create mode 100644 kernel/power/wakeup_reason.c

diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h
new file mode 100644
index 000..7ce50f0
--- /dev/null
+++ b/include/linux/wakeup_reason.h
@@ -0,0 +1,23 @@
+/*
+ * include/linux/wakeup_reason.h
+ *
+ * Logs the reason which caused the kernel to resume
+ * from the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_WAKEUP_REASON_H
+#define _LINUX_WAKEUP_REASON_H
+
+void log_wakeup_reason(int irq);
+
+#endif /* _LINUX_WAKEUP_REASON_H */
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 29472bf..f98f021 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -5,7 +5,7 @@ obj-y   += qos.o
 obj-$(CONFIG_PM)   += main.o
 obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o
 obj-$(CONFIG_FREEZER)  += process.o
-obj-$(CONFIG_SUSPEND)  += suspend.o
+obj-$(CONFIG_SUSPEND)  += suspend.o wakeup_reason.o
 obj-$(CONFIG_PM_TEST_SUSPEND)  += suspend_test.o
 obj-$(CONFIG_HIBERNATION)  += hibernate.o snapshot.o swap.o user.o \
   block_io.o
diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c
new file mode 100644
index 000..188a6bf
--- /dev/null
+++ b/kernel/power/wakeup_reason.c
@@ -0,0 +1,140 @@
+/*
+ * kernel/power/wakeup_reason.c
+ *
+ * Logs the reasons which caused the kernel to resume from
+ * the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include linux/wakeup_reason.h
+#include linux/kernel.h
+#include linux/irq.h
+#include linux/interrupt.h
+#include linux/io.h
+#include linux/kobject.h
+#include linux/sysfs.h
+#include linux/init.h
+#include linux/spinlock.h
+#include linux/notifier.h
+#include linux/suspend.h
+
+
+#define MAX_WAKEUP_REASON_IRQS 32
+static int irq_list[MAX_WAKEUP_REASON_IRQS];
+static int irqcount;
+static struct kobject *wakeup_reason;
+static spinlock_t resume_reason_lock;
+
+static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr,
+   char *buf)
+{
+   int irq_no, buf_offset = 0;
+   struct irq_desc *desc;
+   spin_lock(resume_reason_lock);
+   for (irq_no = 0; irq_no  irqcount; irq_no++) {
+   desc = irq_to_desc(irq_list[irq_no]);
+   if (desc  desc-action  desc-action-name)
+   buf_offset += sprintf(buf + buf_offset, %d %s\n,
+   irq_list[irq_no], desc-action-name);
+   else
+   buf_offset += sprintf(buf + buf_offset, %d\n,
+   irq_list[irq_no]);
+   }
+   spin_unlock(resume_reason_lock);
+   return buf_offset;
+}
+
+static struct kobj_attribute resume_reason = __ATTR(last_resume_reason, 0666,
+   reason_show, NULL);
+
+static struct attribute *attrs[] = {
+   resume_reason.attr,
+   NULL,
+};
+static struct attribute_group attr_group = {
+   .attrs = attrs,
+};
+
+/*
+ * logs all the wake up reasons to the kernel
+ * stores the irqs to expose them to the userspace via sysfs
+ */
+void log_wakeup_reason(int irq)
+{
+   struct irq_desc *desc;
+   desc = irq_to_desc(irq);
+   if (desc  desc-action  desc-action-name)
+   printk(KERN_INFO Resume caused by IRQ %d, %s\n, irq,
+   desc-action-name);
+   else
+   printk(KERN_INFO Resume caused by IRQ %d\n, irq);
+
+   spin_lock(resume_reason_lock