From: José Bollo <[email protected]> Hi all,
I'd like to get your feeling about the idea exposed in that draft. Should continue or stop immediately? Is there already some existing work? How is the taken approach? BR - José Bollo [RFC DRAFT] Adds PUI: process unic identifier The name 'pui' is choosen -instead of puid- to avoid confusion with either pid and uid. It is intended to identify uniquely each activated task item, accordling to namespaces. 64 bits seems to be a good deal for beginning. The count of second in a year is less than 2^25. So if a huge machine is able to create 2^31 processes per second (ex: 2^7 cores, each creating 2^24 processes -a nighmare-), then the unic id is over in 2^8 years. Far more than what a regular system upgrade needs. The pui is represented as a hexadecimal value because it is much more efficient. Signed-off-by: José Bollo <[email protected]> --- fs/proc/array.c | 21 +++++ include/linux/pid.h | 5 ++ include/linux/pid_namespace.h | 4 + include/linux/pui.h | 55 +++++++++++++ include/uapi/asm-generic/socket.h | 2 + init/Kconfig | 7 ++ kernel/Makefile | 1 + kernel/pid.c | 12 +++ kernel/pid_namespace.c | 3 + kernel/pui.c | 168 ++++++++++++++++++++++++++++++++++++++ net/core/sock.c | 20 +++++ 11 files changed, 298 insertions(+) create mode 100644 include/linux/pui.h create mode 100644 kernel/pui.c diff --git a/fs/proc/array.c b/fs/proc/array.c index 51a4213..6350a1e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -153,6 +153,16 @@ static inline int get_task_umask(struct task_struct *tsk) return umask; } +#ifdef CONFIG_PUI +static inline void put_pui(struct seq_file *m, pui_t pui) +{ + pui_str_t str; + + pui_to_str(pui, str); + seq_puts(m, str); +} +#endif + static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *p) { @@ -226,6 +236,17 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, for (g = ns->level; g <= pid->level; g++) seq_put_decimal_ull(m, "\t", task_session_nr_ns(p, pid->numbers[g].ns)); #endif +#ifdef CONFIG_PUI + seq_puts(m, "\nPui:"); + put_pui(m, pid->numbers[ns->level].pui); +#ifdef CONFIG_PID_NS + seq_puts(m, "\nNSpui:"); + for (g = ns->level; g <= pid->level; g++) { + seq_putc(m, '\t'); + put_pui(m, pid->numbers[g].pui); + } +#endif +#endif seq_putc(m, '\n'); } diff --git a/include/linux/pid.h b/include/linux/pid.h index 23705a5..9bb131c 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -2,6 +2,7 @@ #define _LINUX_PID_H #include <linux/rcupdate.h> +#include <linux/pui.h> enum pid_type { @@ -52,6 +53,10 @@ struct upid { int nr; struct pid_namespace *ns; struct hlist_node pid_chain; +#ifdef CONFIG_PUI + struct hlist_node pui_chain; + pui_t pui; +#endif }; struct pid diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 34cce96..cd4844c 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -9,6 +9,7 @@ #include <linux/nsproxy.h> #include <linux/kref.h> #include <linux/ns_common.h> +#include <linux/pui.h> struct pidmap { atomic_t nr_free; @@ -26,6 +27,9 @@ struct pid_namespace { struct pidmap pidmap[PIDMAP_ENTRIES]; struct rcu_head rcu; int last_pid; +#ifdef CONFIG_PUI + pui_gen_t pui_generator; +#endif unsigned int nr_hashed; struct task_struct *child_reaper; struct kmem_cache *pid_cachep; diff --git a/include/linux/pui.h b/include/linux/pui.h new file mode 100644 index 0000000..fee67e2 --- /dev/null +++ b/include/linux/pui.h @@ -0,0 +1,55 @@ +#ifndef _LINUX_PUI_H +#define _LINUX_PUI_H + +#ifdef CONFIG_PUI + +#include <linux/atomic.h> + +typedef __u64 pui_t; +typedef char pui_str_t[17]; +typedef atomic64_t pui_gen_t; + +struct pid; +struct upid; +struct task_struct; +struct pid_namespace; + +#define PUI_INVALID 0 +#define PUI_GEN_INIT ATOMIC_INIT(0) + +/* + * look up a PUI in the hash table. Must be called with the tasklist_lock + * or rcu_read_lock() held. + * + * find_pui_ns() finds the pui in the namespace specified + * find_vpui() finds the pui by its virtual id, i.e. in the current namespace + */ +extern struct pid *find_pui_ns(pui_t pui, struct pid_namespace *ns); +extern struct pid *find_vpui(pui_t pui); + +/* + * find a task by its PUI + * + * find_task_by_pui_ns(): + * finds a task by its pui in the specified namespace + * find_task_by_vpui(): + * finds a task by its virtual pui + */ +extern struct task_struct *find_task_by_pui_ns(pui_t pui, struct pid_namespace *ns); +extern struct task_struct *find_task_by_vpui(pui_t pui); + +extern pui_t pui_nr_ns(struct pid *pid, struct pid_namespace *ns); +extern pui_t pui_vnr(struct pid *pid); + +extern void pui_init_generator(pui_gen_t *generator); + +extern void pui_make(struct upid *upid); +extern void pui_add(struct upid *upid); +extern void pui_del(struct upid *upid); + +extern int pui_to_str(pui_t pui, pui_str_t str); +extern pui_t pui_from_str(const char *str); + +#endif + +#endif /* _LINUX_PUI_H */ diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 2c748dd..143bcbe 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -94,4 +94,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_PEERPUI 55 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/init/Kconfig b/init/Kconfig index 223b734..ff4dde2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -306,6 +306,12 @@ config USELIB earlier, you may need to enable this syscall. Current systems running glibc can safely disable this. +config PUI + bool "Enables Process Unic Identifier" + default n + help + Provides a 64 bits unic identifier to processes and threads + config AUDIT bool "Auditing support" depends on NET @@ -2158,3 +2164,4 @@ config ASN1 functions to call on what tags. source "kernel/Kconfig.locks" + diff --git a/kernel/Makefile b/kernel/Makefile index 12c679f..ebd1cd8 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -71,6 +71,7 @@ obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_UTS_NS) += utsname.o obj-$(CONFIG_USER_NS) += user_namespace.o obj-$(CONFIG_PID_NS) += pid_namespace.o +obj-$(CONFIG_PUI) += pui.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_SMP) += stop_machine.o obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o diff --git a/kernel/pid.c b/kernel/pid.c index f66162f..24b76fe 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -75,6 +75,9 @@ struct pid_namespace init_pid_ns = { [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }, .last_pid = 0, +#ifdef CONFIG_PUI + .pui_generator = PUI_GEN_INIT, +#endif .nr_hashed = PIDNS_HASH_ADDING, .level = 0, .child_reaper = &init_task, @@ -267,6 +270,9 @@ void free_pid(struct pid *pid) struct upid *upid = pid->numbers + i; struct pid_namespace *ns = upid->ns; hlist_del_rcu(&upid->pid_chain); +#ifdef CONFIG_PUI + pui_del(upid); +#endif switch(--ns->nr_hashed) { case 2: case 1: @@ -318,6 +324,9 @@ struct pid *alloc_pid(struct pid_namespace *ns) pid->numbers[i].nr = nr; pid->numbers[i].ns = tmp; +#ifdef CONFIG_PUI + pui_make(&pid->numbers[i]); +#endif tmp = tmp->parent; } @@ -338,6 +347,9 @@ struct pid *alloc_pid(struct pid_namespace *ns) for ( ; upid >= pid->numbers; --upid) { hlist_add_head_rcu(&upid->pid_chain, &pid_hash[pid_hashfn(upid->nr, upid->ns)]); +#ifdef CONFIG_PUI + pui_add(upid); +#endif upid->ns->nr_hashed++; } spin_unlock_irq(&pidmap_lock); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index df9e8e9..ed17097 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -130,6 +130,9 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns ns->ucounts = ucounts; ns->nr_hashed = PIDNS_HASH_ADDING; INIT_WORK(&ns->proc_work, proc_cleanup_work); +#ifdef CONFIG_PUI + pui_init_generator(&ns->pui_generator); +#endif set_bit(0, ns->pidmap[0].page); atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); diff --git a/kernel/pui.c b/kernel/pui.c new file mode 100644 index 0000000..d9ab6bb --- /dev/null +++ b/kernel/pui.c @@ -0,0 +1,168 @@ +#ifdef CONFIG_PUI + +/* +#include <linux/mm.h> +#include <linux/export.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/rculist.h> +#include <linux/bootmem.h> +#include <linux/hash.h> +#include <linux/init_task.h> +#include <linux/syscalls.h> +#include <linux/proc_ns.h> +#include <linux/proc_fs.h> +*/ +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/rculist.h> +#include <linux/hash.h> +#include <linux/pui.h> +#include <linux/pid.h> +#include <linux/pid_namespace.h> + +#define pui_hashfn(pui, ns) \ + hash_long((unsigned long)pui + (unsigned long)ns, puihash_shift) + +static struct hlist_head *pui_hash; +static unsigned int puihash_shift = 4; + +void __init puihash_init(void) +{ + unsigned int i, pidhash_size; + + pui_hash = alloc_large_system_hash("PUI", sizeof(*pui_hash), 0, 18, + HASH_EARLY | HASH_SMALL, + &puihash_shift, NULL, + 0, 4096); + pidhash_size = 1U << puihash_shift; + + for (i = 0; i < pidhash_size; i++) + INIT_HLIST_HEAD(&pui_hash[i]); +} + +struct pid *find_pui_ns(pui_t pui, struct pid_namespace *ns) +{ + struct upid *pnr; + + hlist_for_each_entry_rcu(pnr, + &pui_hash[pui_hashfn(pui, ns)], pui_chain) + if (pnr->pui == pui && pnr->ns == ns) + return container_of(pnr, struct pid, + numbers[ns->level]); + + return NULL; +} + +struct task_struct *find_task_by_pui_ns(pui_t pui, struct pid_namespace *ns) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), + "find_task_by_pui_ns() needs rcu_read_lock() protection"); + return pid_task(find_pui_ns(pui, ns), PIDTYPE_PID); +} + +struct pid *find_vpui(pui_t pui) +{ + return find_pui_ns(pui, task_active_pid_ns(current)); +} + +struct task_struct *find_task_by_vpui(pui_t pui) +{ + return find_task_by_pui_ns(pui, task_active_pid_ns(current)); +} + +pui_t pui_nr_ns(struct pid *pid, struct pid_namespace *ns) +{ + struct upid *upid; + pui_t result = PUI_INVALID; + + if (pid && ns->level <= pid->level) { + upid = &pid->numbers[ns->level]; + if (upid->ns == ns) + result = upid->pui; + } + return result; +} + +pui_t pui_vnr(struct pid *pid) +{ + return pui_nr_ns(pid, task_active_pid_ns(current)); +} + +void pui_init_generator(pui_gen_t *generator) +{ + atomic64_set((atomic64_t*)generator, 0); +} + +void pui_del(struct upid *upid) +{ + hlist_del_rcu(&upid->pui_chain); +} + +void pui_add(struct upid *upid) +{ + hlist_add_head_rcu(&upid->pui_chain, + &pui_hash[pui_hashfn(upid->pui, upid->ns)]); +} + +static inline pui_t pui_new(pui_gen_t *generator) +{ + pui_t result; + do { result = atomic64_inc_return((atomic64_t*)&generator); } while(result == PUI_INVALID); + return result; +} + +void pui_make(struct upid *upid) +{ + upid->pui = pui_new(&upid->ns->pui_generator); +} + +int pui_to_str(pui_t pui, pui_str_t str) +{ + char c; + int i, j, r; + + i = 0; + do { + c = (char)(pui & 15); + pui >>= 4; + str[i++] = (char)(c + (c > 9 ? 'a' - 10 : '0')); + } while(pui); + str[r = i] = 0; + j = 0; + while(j < --i) { + c = str[i]; + str[i] = str[j]; + str[j++] = c; + } + return r; +} + +pui_t pui_from_str(const char *str) +{ + char c; + pui_t result = 0; + + c = *str; + if (!c) + return PUI_INVALID; + do { + if ('0' <= c && c <= '9') + c = c - '0'; + else if ('a' <= c && c <= 'f') + c = c - 'a' + 10; + else if ('A' <= c && c <= 'F') + c = c - 'A' + 10; + else + return PUI_INVALID; + if (result >> 60) + return PUI_INVALID; + result = (result << 4) | c; + c = *++str; + } while(c); + return result; +} + + +#endif + diff --git a/net/core/sock.c b/net/core/sock.c index f560e08..65f5dbb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1271,6 +1271,26 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_incoming_cpu; break; +#ifdef CONFIG_PUI + case SO_PEERPUI: + { + pui_str_t str; + pui_t pui; + int l; + + pui = pui_vnr(sk->sk_peer_pid); + if (pui == PUI_INVALID) + return -ENOTCONN; + l = pui_to_str(pui, str); + if (len <= l) + return -EINVAL; + len = l + 1; /* includes leading zero */ + if (copy_to_user(optval, str, len)) + return -EFAULT; + goto lenout; + } +#endif + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). -- 2.9.3

