Re: kmem_cache_attr (was Re: [PATCH 04/36] usercopy: Prepare for usercopy whitelisting)

2018-01-17 Thread Christopher Lameter
On Tue, 16 Jan 2018, Matthew Wilcox wrote:

> On Tue, Jan 16, 2018 at 12:17:01PM -0600, Christopher Lameter wrote:
> > Draft patch of how the data structs could change. kmem_cache_attr is read
> > only.
>
> Looks good.  Although I would add Kees' user feature:

Sure I tried to do this quickly so that the basic struct changes are
visible.

> And I'd start with
> +struct kmem_cache *kmem_cache_create_attr(const kmem_cache_attr *);
>
> leaving the old kmem_cache_create to kmalloc a kmem_cache_attr and
> initialise it.

Well at some point we should convert the callers by putting the
definitions into const kmem_cache_attr initializations. That way
the callbacks function pointers are safe.

> Can we also do something like this?
>
> -#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\
> - sizeof(struct __struct), __alignof__(struct __struct),\
> - (__flags), NULL)
> +#define KMEM_CACHE(__struct, __flags) ({ \
> + const struct kmem_cache_attr kca ## __stringify(__struct) = {   \
> + .name = #__struct,  \
> + .size = sizeof(struct __struct),\
> + .align = __alignof__(struct __struct),  \
> + .flags = (__flags), \
> + };  \
> + kmem_cache_create_attr( ## __stringify(__struct));  \
> +})
>
> That way we won't need to convert any of those users.

Yep thats what I was planning.



Re: kmem_cache_attr (was Re: [PATCH 04/36] usercopy: Prepare for usercopy whitelisting)

2018-01-16 Thread Matthew Wilcox
On Tue, Jan 16, 2018 at 12:17:01PM -0600, Christopher Lameter wrote:
> Draft patch of how the data structs could change. kmem_cache_attr is read
> only.

Looks good.  Although I would add Kees' user feature:

struct kmem_cache_attr {
char name[16];
unsigned int size;
unsigned int align;
+   unsigned int useroffset;
+   unsigned int usersize;
slab_flags_t flags;
kmem_cache_ctor ctor;
}

And I'd start with 
+struct kmem_cache *kmem_cache_create_attr(const kmem_cache_attr *);

leaving the old kmem_cache_create to kmalloc a kmem_cache_attr and
initialise it.

Can we also do something like this?

-#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\
-   sizeof(struct __struct), __alignof__(struct __struct),\
-   (__flags), NULL)
+#define KMEM_CACHE(__struct, __flags) ({   \
+   const struct kmem_cache_attr kca ## __stringify(__struct) = {   \
+   .name = #__struct,  \
+   .size = sizeof(struct __struct),\
+   .align = __alignof__(struct __struct),  \
+   .flags = (__flags), \
+   };  \
+   kmem_cache_create_attr( ## __stringify(__struct));  \
+})

That way we won't need to convert any of those users.



Re: kmem_cache_attr (was Re: [PATCH 04/36] usercopy: Prepare for usercopy whitelisting)

2018-01-16 Thread Christopher Lameter
Draft patch of how the data structs could change. kmem_cache_attr is read
only.

Index: linux/include/linux/slab.h
===
--- linux.orig/include/linux/slab.h
+++ linux/include/linux/slab.h
@@ -135,9 +135,17 @@ struct mem_cgroup;
 void __init kmem_cache_init(void);
 bool slab_is_available(void);

-struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
-   slab_flags_t,
-   void (*)(void *));
+typedef kmem_cache_ctor void (*ctor)(void *);
+
+struct kmem_cache_attr {
+   char name[16];
+   unsigned int size;
+   unsigned int align;
+   slab_flags_t flags;
+   kmem_cache_ctor ctor;
+}
+
+struct kmem_cache *kmem_cache_create(const kmem_cache_attr *);
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);

Index: linux/include/linux/slab_def.h
===
--- linux.orig/include/linux/slab_def.h
+++ linux/include/linux/slab_def.h
@@ -10,6 +10,7 @@

 struct kmem_cache {
struct array_cache __percpu *cpu_cache;
+   struct kmem_cache_attr *attr;

 /* 1) Cache tunables. Protected by slab_mutex */
unsigned int batchcount;
@@ -35,14 +36,9 @@ struct kmem_cache {
struct kmem_cache *freelist_cache;
unsigned int freelist_size;

-   /* constructor func */
-   void (*ctor)(void *obj);
-
 /* 4) cache creation/removal */
-   const char *name;
struct list_head list;
int refcount;
-   int object_size;
int align;

 /* 5) statistics */
Index: linux/include/linux/slub_def.h
===
--- linux.orig/include/linux/slub_def.h
+++ linux/include/linux/slub_def.h
@@ -83,9 +83,9 @@ struct kmem_cache {
struct kmem_cache_cpu __percpu *cpu_slab;
/* Used for retriving partial slabs etc */
slab_flags_t flags;
+   struct kmem_cache_attr *attr;
unsigned long min_partial;
int size;   /* The size of an object including meta data */
-   int object_size;/* The size of an object without meta data */
int offset; /* Free pointer offset. */
 #ifdef CONFIG_SLUB_CPU_PARTIAL
int cpu_partial;/* Number of per cpu partial objects to keep 
around */
@@ -97,12 +97,10 @@ struct kmem_cache {
struct kmem_cache_order_objects min;
gfp_t allocflags;   /* gfp flags to use on each alloc */
int refcount;   /* Refcount for slab cache destroy */
-   void (*ctor)(void *);
int inuse;  /* Offset to metadata */
int align;  /* Alignment */
int reserved;   /* Reserved bytes at the end of slabs */
int red_left_pad;   /* Left redzone padding size */
-   const char *name;   /* Name (only for display!) */
struct list_head list;  /* List of slab caches */
 #ifdef CONFIG_SYSFS
struct kobject kobj;/* For sysfs */
Index: linux/mm/slab.h
===
--- linux.orig/mm/slab.h
+++ linux/mm/slab.h
@@ -18,13 +18,11 @@
  * SLUB is no longer needed.
  */
 struct kmem_cache {
-   unsigned int object_size;/* The original size of the object */
+   struct kmem_cache_attr *attr
unsigned int size;  /* The aligned/padded/added on size  */
unsigned int align; /* Alignment as calculated */
slab_flags_t flags; /* Active flags on the slab */
-   const char *name;   /* Slab name for sysfs */
int refcount;   /* Use counter */
-   void (*ctor)(void *);   /* Called on object slot creation */
struct list_head list;  /* List of all slab caches on the system */
 };



Re: kmem_cache_attr (was Re: [PATCH 04/36] usercopy: Prepare for usercopy whitelisting)

2018-01-16 Thread Christopher Lameter
On Tue, 16 Jan 2018, Matthew Wilcox wrote:

> > Sure this data is never changed. It can be const.
>
> It's changed at initialisation.  Look:
>
> kmem_cache_create(const char *name, size_t size, size_t align,
>   slab_flags_t flags, void (*ctor)(void *))
> s = create_cache(cache_name, size, size,
>  calculate_alignment(flags, align, size),
>  flags, ctor, NULL, NULL);
>
> The 'align' that ends up in s->align, is not the user-specified align.
> It's also dependent on runtime information (cache_line_size()), so it
> can't be calculated at compile time.

Then we would need another align field in struct kmem_cache that takes the
changes value?

> 'flags' also gets mangled:
> flags &= CACHE_CREATE_MASK;

Well ok then that also belongs into kmem_cache and the original value
stays in kmem_cache_attr.

> unsigned int would be my preference.

Great.



Re: kmem_cache_attr (was Re: [PATCH 04/36] usercopy: Prepare for usercopy whitelisting)

2018-01-16 Thread Matthew Wilcox
On Tue, Jan 16, 2018 at 10:54:27AM -0600, Christopher Lameter wrote:
> On Tue, 16 Jan 2018, Matthew Wilcox wrote:
> 
> > I think that's a good thing!  /proc/slabinfo really starts to get grotty
> > above 16 bytes.  I'd like to chop off "_cache" from the name of every
> > single slab!  If ext4_allocation_context has to become ext4_alloc_ctx,
> > I don't think we're going to lose any valuable information.
> 
> Ok so we are going to cut off at 16 charaacters? Sounds good to me.

Excellent!

> > > struct kmem_cache_attr {
> > >   char *name;
> > >   size_t size;
> > >   size_t align;
> > >   slab_flags_t flags;
> > >   unsigned int useroffset;
> > >   unsinged int usersize;
> > >   void (*ctor)(void *);
> > >   kmem_isolate_func *isolate;
> > >   kmem_migrate_func *migrate;
> > >   ...
> > > }
> >
> > In these slightly-more-security-conscious days, it's considered poor
> > practice to have function pointers in writable memory.  That was why
> > I wanted to make the kmem_cache_attr const.
> 
> Sure this data is never changed. It can be const.

It's changed at initialisation.  Look:

kmem_cache_create(const char *name, size_t size, size_t align,
  slab_flags_t flags, void (*ctor)(void *))
s = create_cache(cache_name, size, size,
 calculate_alignment(flags, align, size),
 flags, ctor, NULL, NULL);

The 'align' that ends up in s->align, is not the user-specified align.
It's also dependent on runtime information (cache_line_size()), so it
can't be calculated at compile time.

'flags' also gets mangled:
flags &= CACHE_CREATE_MASK;


> I am not married to either way of specifying the sizes. unsigned int would
> be fine with me. SLUB falls back to the page allocator anyways for
> anything above 2* PAGE_SIZE and I think we can do the same for the other
> allocators as well. Zeroing or initializing such a large memory chunk is
> much more expensive than the allocation so it does not make much sense to
> have that directly supported in the slab allocators.

The only slabs larger than 4kB on my system right now are:
kvm_vcpu   0  0  1913618 : tunables840 : 
slabdata  0  0  0
net_namespace  1  1   608012 : tunables840 : 
slabdata  1  1  0

(other than the fake slabs for kmalloc)

> Some platforms support 64K page size and I could envision a 2M page size
> at some point. So I think we cannot use 16 bits there.
> 
> If no one objects then I can use unsigned int there again.

unsigned int would be my preference.


Re: kmem_cache_attr (was Re: [PATCH 04/36] usercopy: Prepare for usercopy whitelisting)

2018-01-16 Thread Christopher Lameter
On Tue, 16 Jan 2018, Matthew Wilcox wrote:

> I think that's a good thing!  /proc/slabinfo really starts to get grotty
> above 16 bytes.  I'd like to chop off "_cache" from the name of every
> single slab!  If ext4_allocation_context has to become ext4_alloc_ctx,
> I don't think we're going to lose any valuable information.

Ok so we are going to cut off at 16 charaacters? Sounds good to me.

> > struct kmem_cache_attr {
> > char *name;
> > size_t size;
> > size_t align;
> > slab_flags_t flags;
> > unsigned int useroffset;
> > unsinged int usersize;
> > void (*ctor)(void *);
> > kmem_isolate_func *isolate;
> > kmem_migrate_func *migrate;
> > ...
> > }
>
> In these slightly-more-security-conscious days, it's considered poor
> practice to have function pointers in writable memory.  That was why
> I wanted to make the kmem_cache_attr const.

Sure this data is never changed. It can be const.

> Also, there's no need for 'size' and 'align' to be size_t.  Slab should
> never support allocations above 4GB in size.  I'm not even keen on seeing
> allocations above 64kB, but I see my laptop has six 512kB allocations (!),
> three 256kB allocations and seven 128kB allocations, so I must reluctantly
> concede that using an unsigned int is necessary.  If I were really into
> bitshaving, I might force all allocations to be a multiple of 32-bytes
> in size, and then we could use 16 bits to represent an allocation between
> 32 and 2MB, but I think that tips us beyond the complexity boundary.

I am not married to either way of specifying the sizes. unsigned int would
be fine with me. SLUB falls back to the page allocator anyways for
anything above 2* PAGE_SIZE and I think we can do the same for the other
allocators as well. Zeroing or initializing such a large memory chunk is
much more expensive than the allocation so it does not make much sense to
have that directly supported in the slab allocators.

Some platforms support 64K page size and I could envision a 2M page size
at some point. So I think we cannot use 16 bits there.

If no one objects then I can use unsigned int there again.



Re: kmem_cache_attr (was Re: [PATCH 04/36] usercopy: Prepare for usercopy whitelisting)

2018-01-16 Thread Matthew Wilcox
On Tue, Jan 16, 2018 at 09:21:30AM -0600, Christopher Lameter wrote:
> > struct kmem_cache_attr {
> > const char name[32];
> 
> Want to avoid the string reference mess that occurred in the past?
> Is that really necessary? But it would limit the size of the name.

I think that's a good thing!  /proc/slabinfo really starts to get grotty
above 16 bytes.  I'd like to chop off "_cache" from the name of every
single slab!  If ext4_allocation_context has to become ext4_alloc_ctx,
I don't think we're going to lose any valuable information.

My real intent was to reduce the number of allocations; if we can make
it not necessary to kstrdup the name, I think that'd be appreciated by
our CONFIG_TINY friends.

> > (my rationale is that everything in attr should be const, but size, align
> > and flags all get modified by the slab code).
> 
> Thought about putting all the parameters into the kmem_cache_attr struct.
> 
> So
> 
> struct kmem_cache_attr {
>   char *name;
>   size_t size;
>   size_t align;
>   slab_flags_t flags;
>   unsigned int useroffset;
>   unsinged int usersize;
>   void (*ctor)(void *);
>   kmem_isolate_func *isolate;
>   kmem_migrate_func *migrate;
>   ...
> }

In these slightly-more-security-conscious days, it's considered poor
practice to have function pointers in writable memory.  That was why
I wanted to make the kmem_cache_attr const.

Also, there's no need for 'size' and 'align' to be size_t.  Slab should
never support allocations above 4GB in size.  I'm not even keen on seeing
allocations above 64kB, but I see my laptop has six 512kB allocations (!),
three 256kB allocations and seven 128kB allocations, so I must reluctantly
concede that using an unsigned int is necessary.  If I were really into
bitshaving, I might force all allocations to be a multiple of 32-bytes
in size, and then we could use 16 bits to represent an allocation between
32 and 2MB, but I think that tips us beyond the complexity boundary.



kmem_cache_attr (was Re: [PATCH 04/36] usercopy: Prepare for usercopy whitelisting)

2018-01-16 Thread Christopher Lameter
On Sun, 14 Jan 2018, Matthew Wilcox wrote:

> > Hmmm... At some point we should switch kmem_cache_create to pass a struct
> > containing all the parameters. Otherwise the API will blow up with
> > additional functions.
>
> Obviously I agree with you.  I'm inclined to not let that delay Kees'
> patches; we can fix the few places that use this API later.  At this
> point, my proposal for the ultimate form would be:

Right. Thats why I said "at some point"

>
> struct kmem_cache_attr {
>   const char name[32];

Want to avoid the string reference mess that occurred in the past?
Is that really necessary? But it would limit the size of the name.

>   void (*ctor)(void *);
>   unsigned int useroffset;
>   unsigned int user_size;
> };
>
> kmem_create_cache_attr(const struct kmem_cache_attr *attr, unsigned int size,
>   unsigned int align, slab_flags_t flags)
>
> (my rationale is that everything in attr should be const, but size, align
> and flags all get modified by the slab code).

Thought about putting all the parameters into the kmem_cache_attr struct.

So

struct kmem_cache_attr {
char *name;
size_t size;
size_t align;
slab_flags_t flags;
unsigned int useroffset;
unsinged int usersize;
void (*ctor)(void *);
kmem_isolate_func *isolate;
kmem_migrate_func *migrate;
...
}