Hi, I am trying to duplicate the purpose of /proc/pid/io for a homework to understand how procfs / seq_file works. Although there is no mandate to use seq_file but it seems very interesting to me.
The aim is to print an output similar to /proc/pid/io but for all processes at once using a custom proc file name "/proc/proc_io". The project is organized as follows: main.c -> It is responsible for kernel module init / deinit sequence.c -> This is where I have put in the processing logic utils.h -> bunch of macros for debugging The debugging logs come as follows: ========= [ +4.814129] <pl_open:15> [ +0.000017] <pl_seq_start:53> [ +0.000003] <pl_seq_start:55> init_task: [ffffffff81e11500] pos: [0] [ +0.000001] <pl_seq_show:103> [ +0.000004] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225a28000] [ +0.000002] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225a28dc0] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225a29b80] [ +0.000002] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225a2b700] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225a2d280] [ +0.000002] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225a2e040] [ +0.000002] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225a2ee00] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225a78000] [ +0.000002] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225a7ee00] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225aa8000] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225aa8dc0] [ +0.000001] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225aaa940] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225aab700] [ +0.000002] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225aac4c0] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225aad280] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225aaee00] [ +0.000002] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225048000] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff880225048dc0] [ +0.000002] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff880225049b80] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff88022504b700] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff88022504c4c0] [ +0.000001] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff88022504d280] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff88022504e040] [ +0.000002] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff8802250f0000] [ +0.000002] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff8802250f0dc0] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff8802250f1b80] [ +0.000002] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff8802250f2940] [ +0.000001] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff8802250f44c0] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff8802250f5280] [ +0.000002] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff8802250f6040] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff8802250f6e00] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_next:72> [ +0.000001] <pl_seq_next:79> n_tsk: [ffff8802251a0dc0] [ +0.000002] <pl_seq_show:103> [ +0.000002] <pl_seq_next:72> [ +0.000002] <pl_seq_next:79> n_tsk: [ffff8802251a1b80] [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_stop:92> [ +0.000276] <pl_seq_start:53> [ +0.000002] <pl_seq_start:55> init_task: [ffffffff81e11500] pos: [1] [ +0.000001] <pl_seq_stop:92> ========= Towards the end, the sequence is as follows: [ +0.000001] <pl_seq_show:103> [ +0.000003] <pl_seq_stop:92> i.e. stop() is called after show(). This is making my output truncated to only first few processes. Although I always return 0 from show(), I fail to understand why stop() is being invoked just after show(). Can you please help me or redirect me somewhere to understand why this is happening? Thank You. Gaurav
#pragma once
#include <linux/kernel.h>
#define DEBUG_ENABLE /* uncomment to enable debugging logs */
#ifdef DEBUG_ENABLE
#define dbg(fmt,args...) \
do { \
printk(KERN_DEBUG "<%s:%d> " fmt "\n", __func__, __LINE__, ##args); \
} while (0)
#else
#define dbg(fmt,args...)
#endif
#define err(fmt,args...) \
do { \
printk(KERN_ERR "<%s:%d> " fmt "\n", __func__, __LINE__, ##args); \
} while (0)
#define info(fmt,args...) \
do { \
printk(KERN_INFO "<%s:%d> " fmt "\n", __func__, __LINE__, ##args); \
} while (0)
#pragma once #include <linux/seq_file.h> /* return sequence operations */ inline struct seq_operations * get_sequence_ops(void);
#include "sequence.h"
#include "utils.h"
#include <linux/module.h>
#include <linux/proc_fs.h>
#define PROC_NAME "proc_io"
static struct proc_dir_entry *pl;
static int
pl_open(struct inode *inode, struct file *file)
{
struct seq_operations *sops;
dbg("");
/* initialize sequential file, register operations
* Ref: https://www.kernel.org/doc/htmldocs/filesystems/API-seq-open.html
*/
sops = get_sequence_ops();
return seq_open(file, sops);
}
/* file operations */
static struct file_operations fops = {
.owner = THIS_MODULE,
.open = pl_open,
/* read method for sequential files */
.read = seq_read,
/* llseek method for sequential files */
.llseek = seq_lseek,
/* free the structures associated with sequential file */
.release = seq_release,
};
static void
_pl_module_exit(void)
{
dbg("");
if (pl != NULL)
proc_remove(pl);
}
static int __init
pl_module_init(void)
{
dbg("");
/* create /proc/proc_io */
pl = proc_create(PROC_NAME, 0, NULL, &fops);
if (pl == NULL) {
err("Failed to create proc_io");
goto error;
}
return 0;
error:
_pl_module_exit();
return -1;
}
static void __exit
pl_module_exit(void)
{
_pl_module_exit();
}
module_init(pl_module_init);
module_exit(pl_module_exit);
MODULE_AUTHOR("Gaurav Kalra");
MODULE_DESCRIPTION("PR02 per Process I/O Usage");
MODULE_LICENSE("GPL");
Makefile
Description: Binary data
#include "sequence.h"
#include "utils.h"
#include <linux/sched.h>
#include <linux/task_io_accounting_ops.h>
/*
Organization of task information in kernel:
struct task_struct {
...
pid_t pid;
...
struct list_head tasks;
...
char comm[TASK_COMM_LEN];
...
struct task_io_accounting ioac;
};
struct list_head {
struct list_head *next, *prev;
};
struct task_io_accounting {
#ifdef CONFIG_TASK_XACCT
u64 rchar; //bytes read
u64 wchar; //bytes written
u64 syscr; //# of read syscalls
u64 syscw; //# of write syscalls
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
//The number of bytes which this task has caused to be read from storage.
u64 read_bytes;
//The number of bytes which this task has caused, or shall cause to be written to disk.
u64 write_bytes;
//A task can cause "negative" IO too. If this task truncates some
//dirty pagecache, some IO which another task has been accounted for
//(in its write_bytes) will not be happening. We _could_ just
//subtract that from the truncating task's write_bytes, but there is
//information loss in doing that.
u64 cancelled_write_bytes;
#endif
};
*/
static void *
pl_seq_start(struct seq_file *m, loff_t *pos)
{
dbg("");
dbg("init_task: [%p] pos: [%lld]", &init_task, *pos);
/* new sequence, return init_task */
if (*pos == 0) {
return &init_task;
}
/* sequence end, terminate */
else {
*pos = 0;
return NULL;
}
}
static void *
pl_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
struct task_struct *n_tsk, *c_tsk;
dbg("");
/* set current task */
c_tsk = v;
/* return next task */
if ((n_tsk = next_task(c_tsk)) != &init_task) {
dbg("n_tsk: [%p]", n_tsk);
return n_tsk;
}
dbg("init_task: [%p] c_tsk: [%p] n_tsk: [%p] pos: [%lld]", &init_task, c_tsk, n_tsk, *pos);
/* if next task == init_task, terminate */
return NULL;
}
static void
pl_seq_stop(struct seq_file *m, void *v)
{
dbg("");
return; /* nop */
}
static int
pl_seq_show(struct seq_file *m, void *v)
{
struct task_struct *tsk = v, *t;
char buf[TASK_COMM_LEN];
struct task_io_accounting acct = tsk->ioac; /* initialize accounting data */
dbg("");
/* account each thread
* Ref: https://github.com/torvalds/linux/blob/master/fs/proc/base.c
* Function: do_io_accounting()
*/
t = tsk;
task_io_accounting_add(&acct, &tsk->signal->ioac);
while_each_thread(tsk, t)
task_io_accounting_add(&acct, &t->ioac);
/* print information */
seq_printf(m, "%s [PID: %u]\n"
"\trchar: %llu\n"
"\twchar: %llu\n"
"\tsyscr: %llu\n"
"\tsyscw: %llu\n"
"\tread_bytes: %llu\n"
"\twrite_bytes: %llu\n"
"\tcancelled_write_bytes: %llu\n\n",
get_task_comm(buf, tsk),
task_pid_nr(tsk),
(unsigned long long)acct.rchar,
(unsigned long long)acct.wchar,
(unsigned long long)acct.syscr,
(unsigned long long)acct.syscw,
(unsigned long long)acct.read_bytes,
(unsigned long long)acct.write_bytes,
(unsigned long long)acct.cancelled_write_bytes);
/* return success */
return 0;
}
/* sequence operations */
static struct seq_operations sops = {
/* sets the iterator up and returns the first element of sequence */
.start = pl_seq_start,
/* returns the next element of sequence */
.next = pl_seq_next,
/* shuts it down */
.stop = pl_seq_stop,
/* prints element into the buffer */
.show = pl_seq_show,
};
inline struct seq_operations *
get_sequence_ops(void)
{
return &sops;
}_______________________________________________ Kernelnewbies mailing list [email protected] https://lists.kernelnewbies.org/mailman/listinfo/kernelnewbies
