Re: [PATCH 1/5] kfifo: remove unnecessary type check

2013-01-09 Thread Stefani Seibold
Am Mittwoch, den 09.01.2013, 10:35 +0800 schrieb Yuanhan Liu:
> On Tue, Jan 08, 2013 at 10:51:04PM +0100, Stefani Seibold wrote:
> > Am Dienstag, den 08.01.2013, 22:57 +0800 schrieb Yuanhan Liu:
> > > Firstly, this kind of type check doesn't work. It does something similar
> > > as following:
> > >   void * __dummy = NULL;
> > >   __buf = __dummy;
> > > 
> > > __dummy is defined as void *. Thus it will not trigger warnings as
> > > expected.
> > > 
> > > Second, we don't need that kind of check. Since the prototype
> > > of __kfifo_out is:
> > >   unsigned int __kfifo_out(struct __kfifo *fifo,  void *buf, unsigned int 
> > > len)
> > > 
> > > buf is defined as void *, so we don't need do the type check. Remove it.
> > > 
> > 
> > Thats wrong.
> > 
> > First the type checking will be used in kfifo_put() and kfifo_in() for
> > const types to check if the passed type of the data can converted to the
> > fifo element type. 
> 
> Hi Stefani,
> 
> Yes, I see now. After rechecking the code, I found that this kind of
> type checking only works for those static defined kifo by
> DECLARE/DEFINE_KFIFO. As the ptrtype is the same as the data type:
> 
> /* the 4th argument "type" is "ptrtype" */
> #define STRUCT_KFIFO(type, size) struct __STRUCT_KFIFO(type, size, 0, 
> type)
> 
> #define DECLARE_KFIFO(fifo, type, size) STRUCT_KFIFO(type, size) fifo
> 
> While, for those kfifo dynamically allocated, the type checking will not
> work as expected then as ptrtype is always "void":
> 
> struct kfifo __STRUCT_KFIFO_PTR(unsigned char, 0, void);
> 

You should avoid using struct kfifo, as you can read in kfifo.h this is
only for compatibility reason.

If you use the macro DECLARE_KFIFO_PTR(), DECLARE_KFIFO() or
DEFINE_KFIFO() instead.

Have a look at the examples files in the samples/kfifo directory.

- Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kfifo: remove unnecessary type check

2012-10-25 Thread Stefani Seibold
Am Freitag, den 26.10.2012, 09:46 +0800 schrieb Yuanhan Liu:
> From: Yuanhan Liu 
> 
> Firstly, this kind of type check doesn't work. It does something similay
> like following:
>   void * __dummy = NULL;
>   __buf = __dummy;
> 
> __dummy is defined as void *. Thus it will not trigger warnings as
> expected.
> 
> Second, we don't need that kind of check. Since the prototype
> of __kfifo_out is:
>   unsigned int __kfifo_out(struct __kfifo *fifo,  void *buf, unsigned int 
> len)
> 
> buf is defined as void *, so we don't need do the type check. Remove it.
> 
> LINK: https://lkml.org/lkml/2012/10/25/386
> LINK: https://lkml.org/lkml/2012/10/25/584
> 
> Cc: Andrew Morton 
> Cc: Wei Yang 
> Cc: Stefani Seibold 
> Cc: Fengguang Wu 
> Cc: Stephen Rothwell 
> Signed-off-by: Yuanhan Liu 
> ---
>  include/linux/kfifo.h | 20 
>  1 file changed, 20 deletions(-)
> 
> diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
> index 10308c6..b8c1d03 100644
> --- a/include/linux/kfifo.h
> +++ b/include/linux/kfifo.h
> @@ -390,10 +390,6 @@ __kfifo_int_must_check_helper( \
>   unsigned int __ret; \
>   const size_t __recsize = sizeof(*__tmp->rectype); \
>   struct __kfifo *__kfifo = &__tmp->kfifo; \
> - if (0) { \
> - typeof(__tmp->ptr_const) __dummy __attribute__ ((unused)); \
> - __dummy = (typeof(__val))NULL; \
> - } \
>   if (__recsize) \
>   __ret = __kfifo_in_r(__kfifo, __val, sizeof(*__val), \
>   __recsize); \
> @@ -432,8 +428,6 @@ __kfifo_uint_must_check_helper( \
>   unsigned int __ret; \
>   const size_t __recsize = sizeof(*__tmp->rectype); \
>   struct __kfifo *__kfifo = &__tmp->kfifo; \
> - if (0) \
> - __val = (typeof(__tmp->ptr))0; \
>   if (__recsize) \
>   __ret = __kfifo_out_r(__kfifo, __val, sizeof(*__val), \
>   __recsize); \
> @@ -473,8 +467,6 @@ __kfifo_uint_must_check_helper( \
>   unsigned int __ret; \
>   const size_t __recsize = sizeof(*__tmp->rectype); \
>   struct __kfifo *__kfifo = &__tmp->kfifo; \
> - if (0) \
> - __val = (typeof(__tmp->ptr))NULL; \
>   if (__recsize) \
>   __ret = __kfifo_out_peek_r(__kfifo, __val, sizeof(*__val), \
>   __recsize); \
> @@ -512,10 +504,6 @@ __kfifo_uint_must_check_helper( \
>   unsigned long __n = (n); \
>   const size_t __recsize = sizeof(*__tmp->rectype); \
>   struct __kfifo *__kfifo = &__tmp->kfifo; \
> - if (0) { \
> - typeof(__tmp->ptr_const) __dummy __attribute__ ((unused)); \
> - __dummy = (typeof(__buf))NULL; \
> - } \
>   (__recsize) ?\
>   __kfifo_in_r(__kfifo, __buf, __n, __recsize) : \
>   __kfifo_in(__kfifo, __buf, __n); \
> @@ -565,10 +553,6 @@ __kfifo_uint_must_check_helper( \
>   unsigned long __n = (n); \
>   const size_t __recsize = sizeof(*__tmp->rectype); \
>   struct __kfifo *__kfifo = &__tmp->kfifo; \
> - if (0) { \
> - typeof(__tmp->ptr) __dummy = NULL; \
> - __buf = __dummy; \
> - } \
>   (__recsize) ?\
>   __kfifo_out_r(__kfifo, __buf, __n, __recsize) : \
>   __kfifo_out(__kfifo, __buf, __n); \
> @@ -777,10 +761,6 @@ __kfifo_uint_must_check_helper( \
>   unsigned long __n = (n); \
>   const size_t __recsize = sizeof(*__tmp->rectype); \
>   struct __kfifo *__kfifo = &__tmp->kfifo; \
> - if (0) { \
> - typeof(__tmp->ptr) __dummy __attribute__ ((unused)) = NULL; \
> - __buf = __dummy; \
> - } \
>   (__recsize) ? \
>   __kfifo_out_peek_r(__kfifo, __buf, __n, __recsize) : \
>   __kfifo_out_peek(__kfifo, __buf, __n); \

Did you tried to compile the whole kernel including all the drivers with
your patch?


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: linux-next: build warnings after merge of the akpm tree

2012-10-25 Thread Stefani Seibold
Am Freitag, den 26.10.2012, 06:36 +0800 schrieb Richard Yang:

> >
> >And holy cow that code is hard to read :( Why was kfifo_in()
> >implemented as a macro, anyway?  AFAICT all its args have a known type,
> >so we could have used a proper C interface, which would have fixed all
> >this nicely.
> 

Thats simple for performance reasons, the compiler remove most of the
code during the compile stage, so no runtime checks are necessary. And
it is the only way since C does not provides templates like C++.
 
> Hmm, move the definition of kfifo_in()/kfifo_out() into the kfifo.c?
> 

Don't do it. this will result in a performance degradation. Look at the
disassembled code by each change in code and compare it with the
previous one. I don't believe that you can produce better code.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kfifo: remove unnecessary type check

2012-10-26 Thread Stefani Seibold
Am Freitag, den 26.10.2012, 14:11 +0800 schrieb Yuanhan Liu:
> On Fri, Oct 26, 2012 at 07:38:31AM +0200, Stefani Seibold wrote:
> > Am Freitag, den 26.10.2012, 09:46 +0800 schrieb Yuanhan Liu:
> > > From: Yuanhan Liu 
> > > 
> > > Firstly, this kind of type check doesn't work. It does something similay
> > > like following:
> > >   void * __dummy = NULL;
> > >   __buf = __dummy;
> > > 
> > > __dummy is defined as void *. Thus it will not trigger warnings as
> > > expected.
> > > 
> > > Second, we don't need that kind of check. Since the prototype
> > > of __kfifo_out is:
> > >   unsigned int __kfifo_out(struct __kfifo *fifo,  void *buf, unsigned int 
> > > len)
> > > 
> > > buf is defined as void *, so we don't need do the type check. Remove it.
> > > 
> > > LINK: https://lkml.org/lkml/2012/10/25/386
> > > LINK: https://lkml.org/lkml/2012/10/25/584
> > > 
> > > Cc: Andrew Morton 
> > > Cc: Wei Yang 
> > > Cc: Stefani Seibold 
> > > Cc: Fengguang Wu 
> > > Cc: Stephen Rothwell 
> > > Signed-off-by: Yuanhan Liu 
> > > ---

> > 
> > Did you tried to compile the whole kernel including all the drivers with
> > your patch?
> 
> Hi Stefani,
> 
> I did a build test, it did't introduce any new compile errors and
> warnings. While, I haven't tried make allmodconfig then. Does this patch
> seems wrong to you?
> 
> Thanks,
> Yuanhan Liu

Hi Liu,

no the patch seems not wrong to me. But as you see with the previous
patch it is not easy to predict the side effects.

An allmodconfig together with C=2 is necessary to check if there is no
side effects which current users of the kfifo API. That is exactly what
i did again and again as i developed the kfifo API.

Also you have to build the kfifo samples, since this example code use
all features of the kfifo API.

And again: The kfifo is designed to do the many things at compile time,
not at runtime. If you modify the code, you have to check the compiler
assembler output for no degradation, especially in kfifo_put, kfifo_get,
kfifo_in, kfifo_out, __kfifo_in and __kfifo_out. Prevent runtime checks
if you can do it at compile time. This is the basic reasons to do it in
macros.

Greetings,
Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kfifo: remove unnecessary type check

2012-10-26 Thread Stefani Seibold
Am Freitag, den 26.10.2012, 15:17 +0800 schrieb Yuanhan Liu:
> On Fri, Oct 26, 2012 at 08:51:06AM +0200, Stefani Seibold wrote:
> > Am Freitag, den 26.10.2012, 14:11 +0800 schrieb Yuanhan Liu:
> > > On Fri, Oct 26, 2012 at 07:38:31AM +0200, Stefani Seibold wrote:
> > > > Am Freitag, den 26.10.2012, 09:46 +0800 schrieb Yuanhan Liu:
> > > > > From: Yuanhan Liu 
> > > > > 
> > > > > Firstly, this kind of type check doesn't work. It does something 
> > > > > similay
> > > > > like following:
> > > > >   void * __dummy = NULL;
> > > > >   __buf = __dummy;
> > > > > 
> > > > > __dummy is defined as void *. Thus it will not trigger warnings as
> > > > > expected.
> > > > > 
> > > > > Second, we don't need that kind of check. Since the prototype
> > > > > of __kfifo_out is:
> > > > >   unsigned int __kfifo_out(struct __kfifo *fifo,  void *buf, 
> > > > > unsigned int len)
> > > > > 
> > > > > buf is defined as void *, so we don't need do the type check. Remove 
> > > > > it.
> > > > > 
> > > > > LINK: https://lkml.org/lkml/2012/10/25/386
> > > > > LINK: https://lkml.org/lkml/2012/10/25/584
> > > > > 
> > > > > Cc: Andrew Morton 
> > > > > Cc: Wei Yang 
> > > > > Cc: Stefani Seibold 
> > > > > Cc: Fengguang Wu 
> > > > > Cc: Stephen Rothwell 
> > > > > Signed-off-by: Yuanhan Liu 
> > > > > ---
> > 
> > > > 
> > > > Did you tried to compile the whole kernel including all the drivers with
> > > > your patch?
> > > 
> > > Hi Stefani,
> > > 
> > > I did a build test, it did't introduce any new compile errors and
> > > warnings. While, I haven't tried make allmodconfig then. Does this patch
> > > seems wrong to you?
> > > 
> > > Thanks,
> > > Yuanhan Liu
> > 
> > Hi Liu,
> > 
> > no the patch seems not wrong to me. But as you see with the previous
> > patch it is not easy to predict the side effects.
> > 
> > An allmodconfig together with C=2 is necessary to check if there is no
> > side effects which current users of the kfifo API.
> 
> Hi Stefani,
> 
> Make with C=2 will produce tons of warnings, hard to tell it introduces
> new warnings or not. I build some drivers used kfifo and samples as you
> suggested with C=2, find no new warnings. I will build all drivers that
> used kfifo with C=2 later, and will post the result here.
> 

That will be great...

> > 
> > Also you have to build the kfifo samples, since this example code use
> > all features of the kfifo API.
> > 
> > And again: The kfifo is designed to do the many things at compile time,
> > not at runtime. If you modify the code, you have to check the compiler
> > assembler output for no degradation, especially in kfifo_put, kfifo_get,
> > kfifo_in, kfifo_out, __kfifo_in and __kfifo_out. Prevent runtime checks
> > if you can do it at compile time. This is the basic reasons to do it in
> > macros.
> 
> Is it enought to check kernel/kfifo.o only? I build that file with
> and without this patch. And then  dump it by objdump -D kernel/fifo.o to
> /tmp/kfifo.dump.with and /tmp/kfifo.dump.without, respectively. And the
> two dump file are exactly same.
> 

No, since most of the code is inlined due performace reasons, you have
to hack the kfifo examples output code for regressions and code
increase.
 
Greetings,
Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] kfifo: round up the fifo size power of 2

2012-10-26 Thread Stefani Seibold
Am Freitag, den 26.10.2012, 15:56 +0800 schrieb Yuanhan Liu:
> Say, if we want to allocate a filo with size of 6 bytes, it would be safer
> to allocate 8 bytes instead of 4 bytes.
> 
> I know it works with rounddown_pow_of_two as well, since size is maintained
> in the kfifo internal part. But, I'm quite curious why Stefani chose
> rounddown_pow_of_two. To reduce memory?
> 

Yes, exactly, if a user do the wrong thing, than the user will get also
a wrong result, and did not waste memory.

But anyway, if the majority like this patch it is okay for me.

> Thanks,
> Yuanhan Liu
> -
> 
> Cc: Stefani Seibold 
> Cc: Andrew Morton 
> Signed-off-by: Yuanhan Liu 
> ---
>  kernel/kfifo.c |6 +++---
>  1 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/kfifo.c b/kernel/kfifo.c
> index 59dcf5b..0f78378 100644
> --- a/kernel/kfifo.c
> +++ b/kernel/kfifo.c
> @@ -39,11 +39,11 @@ int __kfifo_alloc(struct __kfifo *fifo, unsigned int size,
>   size_t esize, gfp_t gfp_mask)
>  {
>   /*
> -  * round down to the next power of 2, since our 'let the indices
> +  * round up to the next power of 2, since our 'let the indices
>* wrap' technique works only in this case.
>*/
>   if (!is_power_of_2(size))
> - size = rounddown_pow_of_two(size);
> + size = roundup_pow_of_two(size);
>  
>   fifo->in = 0;
>   fifo->out = 0;
> @@ -84,7 +84,7 @@ int __kfifo_init(struct __kfifo *fifo, void *buffer,
>   size /= esize;
>  
>   if (!is_power_of_2(size))
> - size = rounddown_pow_of_two(size);
> + size = roundup_pow_of_two(size);
>  
>   fifo->in = 0;
>   fifo->out = 0;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] kfifo: round up the fifo size power of 2

2012-10-26 Thread Stefani Seibold
Am Freitag, den 26.10.2012, 20:33 +0800 schrieb Yuanhan Liu:
> On Fri, Oct 26, 2012 at 11:30:27AM +0200, Stefani Seibold wrote:
> > Am Freitag, den 26.10.2012, 15:56 +0800 schrieb Yuanhan Liu:
> > > Say, if we want to allocate a filo with size of 6 bytes, it would be safer
> > > to allocate 8 bytes instead of 4 bytes.
> > > 
> > > I know it works with rounddown_pow_of_two as well, since size is 
> > > maintained
> > > in the kfifo internal part. But, I'm quite curious why Stefani chose
> > > rounddown_pow_of_two. To reduce memory?
> > > 
> > 
> > Yes, exactly, if a user do the wrong thing, than the user will get also
> > a wrong result, and did not waste memory.
> 
> But, isn't it better to 'correct' it? ;-)

Both is wrong. This depends on the view. For me it is better to get less
and don't wast space. For example: requesting 1025 will yield in your
case to a fifo which 2048 elements, which requires double of the memory
as expected.

> 
> > 
> > But anyway, if the majority like this patch it is okay for me.
> 
> Sorry, do you mean you are OK with this patch?
> 

I depends not on me, ask for a democratic decisions.

Greetings,
Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kfifo: remove unnecessary type check

2012-10-26 Thread Stefani Seibold
Am Freitag, den 26.10.2012, 21:04 +0800 schrieb Yuanhan Liu:
> On Fri, Oct 26, 2012 at 11:26:31AM +0200, Stefani Seibold wrote:
> > Am Freitag, den 26.10.2012, 15:17 +0800 schrieb Yuanhan Liu:
> > > On Fri, Oct 26, 2012 at 08:51:06AM +0200, Stefani Seibold wrote:
> > > > Am Freitag, den 26.10.2012, 14:11 +0800 schrieb Yuanhan Liu:
> > > > > On Fri, Oct 26, 2012 at 07:38:31AM +0200, Stefani Seibold wrote:
> > > > > > Am Freitag, den 26.10.2012, 09:46 +0800 schrieb Yuanhan Liu:
> > > > > > > From: Yuanhan Liu 
> > > > > > > 
> > > > > > > Firstly, this kind of type check doesn't work. It does something 
> > > > > > > similay
> > > > > > > like following:
> > > > > > >   void * __dummy = NULL;
> > > > > > >   __buf = __dummy;
> > > > > > > 
> > > > > > > __dummy is defined as void *. Thus it will not trigger warnings as
> > > > > > > expected.
> > > > > > > 
> > > > > > > Second, we don't need that kind of check. Since the prototype
> > > > > > > of __kfifo_out is:
> > > > > > >   unsigned int __kfifo_out(struct __kfifo *fifo,  void *buf, 
> > > > > > > unsigned int len)
> > > > > > > 
> > > > > > > buf is defined as void *, so we don't need do the type check. 
> > > > > > > Remove it.
> > > > > > > 
> > > > > > > LINK: https://lkml.org/lkml/2012/10/25/386
> > > > > > > LINK: https://lkml.org/lkml/2012/10/25/584
> > > > > > > 
> > > > > > > Cc: Andrew Morton 
> > > > > > > Cc: Wei Yang 
> > > > > > > Cc: Stefani Seibold 
> > > > > > > Cc: Fengguang Wu 
> > > > > > > Cc: Stephen Rothwell 
> > > > > > > Signed-off-by: Yuanhan Liu 
> > > > > > > ---
> > > > 
> 
> [snip]...
> 
> > > > 
> > > > Also you have to build the kfifo samples, since this example code use
> > > > all features of the kfifo API.
> > > > 
> > > > And again: The kfifo is designed to do the many things at compile time,
> > > > not at runtime. If you modify the code, you have to check the compiler
> > > > assembler output for no degradation, especially in kfifo_put, kfifo_get,
> > > > kfifo_in, kfifo_out, __kfifo_in and __kfifo_out. Prevent runtime checks
> > > > if you can do it at compile time. This is the basic reasons to do it in
> > > > macros.
> > > 
> > > Is it enought to check kernel/kfifo.o only? I build that file with
> > > and without this patch. And then  dump it by objdump -D kernel/fifo.o to
> > > /tmp/kfifo.dump.with and /tmp/kfifo.dump.without, respectively. And the
> > > two dump file are exactly same.
> > > 
> > 
> > No, since most of the code is inlined due performace reasons, you have
> > to hack the kfifo examples output code for regressions and code
> > increase.
> 
> In my test, this patch doesn't change anything. Here are some data to
> prove that:
> 
> $ make samples/kfifo/
> $ cp samples/kfifo/*.o /tmp/before/
> 
> $ git am this-patch
> $ make samples/kfifo/
> $ cp samples/kfifo/*.o /tmp/after/
> 
> $ for i in /tmp/before/*.o; do size $i /tmp/after/`basename $i`; done
>textdata bss dec hex filename
>1939 464 4562859 b2b /tmp/before/bytestream-example.o
>1939 464 4562859 b2b /tmp/after/bytestream-example.o
>textdata bss dec hex filename
>1423 112 2961831 727 /tmp/before/dma-example.o
>1423 112 2961831 727 /tmp/after/dma-example.o
>textdata bss dec hex filename
>1864 624 3762864 b30 /tmp/before/inttype-example.o
>1864 624 3762864 b30 /tmp/after/inttype-example.o
>textdata bss dec hex filename
>1916 464 4722852 b24 /tmp/before/record-example.o
>1916 464 4722852 b24 /tmp/after/record-example.o
> # You will see that it changed nothing.
> 
> 
> $ objdump -d /tmp/before/bytestream-example.o >/tmp/bytestream-example.before
> $ objdump -d /tmp/after/bytestream-example.o >/tmp/bytestream-example.after
> $ diff /tmp/bytestream.before /tmp/bytestream.after -urN
> --- bytestream.before   2012-10-26 20:55:33.645578668 +0800
> +++ bytestream.after2012-10-26 20:55:26.520578669 +0800
> @@ -1,5 +1,5 @@
> 
> -/tmp/bytestream-example.o: file format elf64-x86-64
> +/tmp/bytestream-example.o: file format elf64-x86-64
> 
> # So, as you can see, expect the filename, they are same.
> 
> 
> So, Stefani, is it what you want? Does this looks OK to you?

Perfect. It looks okay for me and i hope for you too ;-)

Acked by stef...@seibold.net

Greetings,
Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] kfifo: round up the fifo size power of 2

2012-11-08 Thread Stefani Seibold
Am Donnerstag, den 08.11.2012, 20:24 +0800 schrieb Yuanhan Liu:
> On Tue, Oct 30, 2012 at 11:52:10PM -0700, Andrew Morton wrote:
> > On Wed, 31 Oct 2012 07:30:33 +0100 Stefani Seibold  
> > wrote:
> > 
> > > > Yes, and I guess the same to give them a 64-element one.
> > > > 
> > > > > 
> > > > > If there's absolutely no prospect that the kfifo code will ever 
> > > > > support
> > > > > 100-byte fifos then I guess we should rework the API so that the 
> > > > > caller
> > > > > has to pass in log2 of the size, not the size itself.  That way there
> > > > > will be no surprises and no mistakes.
> > > > > 
> > > > > That being said, the power-of-2 limitation isn't at all intrinsic to a
> > > > > fifo, so we shouldn't do this.  Ideally, we'd change the kfifo
> > > > > implementation so it does what the caller asked it to do!
> > > > 
> > > > I'm fine with removing the power-of-2 limitation. Stefani, what's your
> > > > comment on that?
> > > > 
> > > 
> > > You can't remove the power-of-2-limitation, since this would result in a
> > > performance decrease (bit wise and vs. modulo operation).
> > 
> > Probably an insignificant change in performance.
> > 
> > It could be made much smaller by just never doing the modulus operation
> > - instead do
> > 
> > if (++index == max)
> > index = 0;
> > 
> > this does introduce one problem: it's no longer possible to distinguish
> > the "full" and "empty" states by comparing the head and tail indices. 
> > But that is soluble.
> 
> Hi Andrew,
> 
> Yes, it is soluble. How about the following solution?
> 
> Add 2 more fields(in_off and out_off) in __kfifo structure, so that in
> and out will keep increasing each time, while in_off and out_off will be
> wrapped to head if goes to the end of fifo buffer.
> 
> So, we can use in and out for counting unused space, and distinguish the
> "full" and "empty" state, and also, of course no need for locking.
> 
> Stefani, sorry for quite late reply. I checked all the code used kfifo_alloc
> and kfifo_init. Firstly, there are a lot of users ;-)
> 
> And secondly, I did find some examples used kfifo as it supports
> none-power-of-2 kfifo. Say, the one at drivers/hid/hid-logitech-dj.c:
>if (kfifo_alloc(_dev->notif_fifo,
>DJ_MAX_NUMBER_NOTIFICATIONS * sizeof(struct dj_report),
>GFP_KERNEL)) {
> 
> which means it wants to allocate a kfifo buffer which can store
> DJ_MAX_NUMBER_NOTIFICATIONS(8 here) dj_report(each 15 bytes) at once.
> 
> And DJ_MAX_NUMBER_NOTIFICATIONS * sizeof(struct dj_report) = 8 * 15.
> Then current code would allocate a size of rounddown_power_of_2(120) =
> 64 bytes, which can hold 4 dj_report only once, which is a half of expected.
> 

This will go away with a log API.

> There are few more examples like this.
> 
> And, kfifo_init used a pre-allocated buffer, it would be a little strange
> to ask user to pre-allocate a power of 2 size aligned buffer.
> 
> So, I guess it's would be good to support none-power-of-2 kfifo?
> 
> I know you care the performance a lot. Well, as Andrew said, it may
> introduce a little insignificant drop(no modulus, few more add/dec).
> Thus, do you have some benchmarks for that? I can have a test to check
> if it is a insignificant change on performance or not :)
> 

Dirty, Ugly, Hacky and this will produce a lot of overhead, especially
for kfifo_put and kfifo_get which are inlined code.

In the kernel world it was always a regular use case to use power-of-2
restricted API's, f.e. the slab cache.

I see no benefit for a none-power-of-2 kfifo, only drawbacks.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] kfifo: round up the fifo size power of 2

2012-11-13 Thread Stefani Seibold
Am Freitag, den 09.11.2012, 10:32 +0800 schrieb Yuanhan Liu:
> On Thu, Nov 08, 2012 at 01:37:15PM +0100, Stefani Seibold wrote:
> > Am Donnerstag, den 08.11.2012, 20:24 +0800 schrieb Yuanhan Liu:

> Yes, it is. I will try log API then.
> 
> Stefani, I found an issue while rework to current API. Say the current
> code of __kfifo_init:
> int __kfifo_init(struct __kfifo *fifo, void *buffer,
> unsigned int size, size_t esize)
> {
> size /= esize;
> 
> if (!is_power_of_2(size))
> size = rounddown_pow_of_two(size);
> 
> }
> 
> Even thought I changed the API to something like:
> int __kfifo_init(struct __kfifo *fifo, void *buffer,
> int size_order, size_t esize)
> {
>   unsigned int size = 1 << size_order;
> 
>   size /= esize;
>   ...
> }
> 
> See? There is still a divide and we can't make it sure that it will be
> power of 2 after that.
> 
> So, I came up 2 proposal to fix this.
> 
> 1. refactor the meaning of 'size' argument first.
> 
>'size' means the size of pre-allocated buffer. We can refactor it to
>meaning of 'the number of fifo elements' just like __kfifo_alloc, so
>that we don't need do the size /= esize stuff.
> 
> 2. remove kfifo_init
> 
>As we can't make sure that kfifo will do exactly what users asked(in
>the way of fifo size). It would be safe and good to maintain buffer
>and buffer size inside kfifo. So, I propose to remove it and use
>kfifo_alloc instead.
> 
>git grep 'kfifo_init\>' shows that we currently have 2 users only.
> 
> 
> The first way is hacky, and it doesn't make much sense to me. Since
> buffer is pre-allocated by user but not kfifo. User has to calculate
> element size and the number of elements, which is not friendly.
> 
> The second way does make more sense to me.

kfifo_init() was requested by some kernel developers, i never liked it.
If you have a better and cleaner solution than do it, otherwise kick it
away if you like.

- Stefani



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] remove untouched code in kfifo_in

2012-10-18 Thread Stefani Seibold

Am Freitag, den 19.10.2012, 00:37 +0200 schrieb richard -rw- weinberger:
> On Thu, Oct 18, 2012 at 3:59 PM, Wei Yang  wrote:
> > In kfifo_in marco, one piece of code is arounded by if(0). This code in
> > introduced by Stefani Seibold  to suppress a compiler
> > warning. This warning is not there with the upgrade of gcc version.
> >
> > This patch just remove this code.
> 
> Are you sure?
> This code fragment looks like a compiler bomb to detect type mismatch to me...
> 

Yes, you are great! That was the reason why i made this peace of code.
So don't remove it!

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Replace the type check code with typecheck() in kfifo_in

2012-10-23 Thread Stefani Seibold
Am Mittwoch, den 24.10.2012, 11:41 +0800 schrieb Wei Yang:
> In kfifo_in marco, one piece of code which is arounded by if(0) will check the
> type of __tmp->ptr_const and __buf. If they are different type, there will
> output a warning during compiling. This piece of code is not self explaining
> and a little bit hard to understand.
> 
> Based on Andrew Morton's suggestion, this patch replace this with typecheck()
> which will be easy to understand.
> 
> In the same file, there are several places with the same code style. This
> patch change them too.
> 
> Signed-off-by: Wei Yang 
> Reviewed-by: Andrew Morton 
> Reviewed-by: richard -rw- weinberger 
> ---
>  include/linux/kfifo.h |   22 +-
>  1 files changed, 5 insertions(+), 17 deletions(-)
> 
> diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
> index 10308c6..680c293 100644
> --- a/include/linux/kfifo.h
> +++ b/include/linux/kfifo.h
> @@ -390,10 +390,7 @@ __kfifo_int_must_check_helper( \
>   unsigned int __ret; \
>   const size_t __recsize = sizeof(*__tmp->rectype); \
>   struct __kfifo *__kfifo = &__tmp->kfifo; \
> - if (0) { \
> - typeof(__tmp->ptr_const) __dummy __attribute__ ((unused)); \
> - __dummy = (typeof(__val))NULL; \
> - } \
> + typecheck(typeof(__tmp->ptr_const), __val); \
>   if (__recsize) \
>   __ret = __kfifo_in_r(__kfifo, __val, sizeof(*__val), \
>   __recsize); \
> @@ -433,7 +430,7 @@ __kfifo_uint_must_check_helper( \
>   const size_t __recsize = sizeof(*__tmp->rectype); \
>   struct __kfifo *__kfifo = &__tmp->kfifo; \
>   if (0) \
> - __val = (typeof(__tmp->ptr))0; \
> + __val = (typeof(__tmp->ptr))NULL; \
>   if (__recsize) \
>   __ret = __kfifo_out_r(__kfifo, __val, sizeof(*__val), \
>   __recsize); \
> @@ -512,10 +509,7 @@ __kfifo_uint_must_check_helper( \
>   unsigned long __n = (n); \
>   const size_t __recsize = sizeof(*__tmp->rectype); \
>   struct __kfifo *__kfifo = &__tmp->kfifo; \
> - if (0) { \
> - typeof(__tmp->ptr_const) __dummy __attribute__ ((unused)); \
> - __dummy = (typeof(__buf))NULL; \
> - } \
> + typecheck(typeof(__tmp->ptr_const), __buf);\
>   (__recsize) ?\
>   __kfifo_in_r(__kfifo, __buf, __n, __recsize) : \
>   __kfifo_in(__kfifo, __buf, __n); \
> @@ -565,10 +559,7 @@ __kfifo_uint_must_check_helper( \
>   unsigned long __n = (n); \
>   const size_t __recsize = sizeof(*__tmp->rectype); \
>   struct __kfifo *__kfifo = &__tmp->kfifo; \
> - if (0) { \
> - typeof(__tmp->ptr) __dummy = NULL; \
> - __buf = __dummy; \
> - } \
> + typecheck(typeof(__tmp->ptr), __buf); \
>   (__recsize) ?\
>   __kfifo_out_r(__kfifo, __buf, __n, __recsize) : \
>   __kfifo_out(__kfifo, __buf, __n); \
> @@ -777,10 +768,7 @@ __kfifo_uint_must_check_helper( \
>   unsigned long __n = (n); \
>   const size_t __recsize = sizeof(*__tmp->rectype); \
>   struct __kfifo *__kfifo = &__tmp->kfifo; \
> - if (0) { \
> - typeof(__tmp->ptr) __dummy __attribute__ ((unused)) = NULL; \
> - __buf = __dummy; \
> - } \
> + typecheck(typeof(__tmp->ptr), __buf); \
>   (__recsize) ? \
>   __kfifo_out_peek_r(__kfifo, __buf, __n, __recsize) : \
>   __kfifo_out_peek(__kfifo, __buf, __n); \

Acked-by: Stefani Seibold 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] remove untouched code in kfifo_in

2012-10-17 Thread Stefani Seibold
This was introduce by me to suppress a compiler warning, so don't remove
it. 

Am Mittwoch, den 17.10.2012, 16:05 +0200 schrieb Jiri Kosina:
> On Wed, 17 Oct 2012, Richard Yang wrote:
> 
> > >> This patch just remove this code.
> > >> ---
> > >>  include/linux/kfifo.h |4 
> > >>  1 files changed, 0 insertions(+), 4 deletions(-)
> > >> 
> > >> diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
> > >> index 10308c6..e7015bb 100644
> > >> --- a/include/linux/kfifo.h
> > >> +++ b/include/linux/kfifo.h
> > >> @@ -512,10 +512,6 @@ __kfifo_uint_must_check_helper( \
> > >>  unsigned long __n = (n); \
> > >>  const size_t __recsize = sizeof(*__tmp->rectype); \
> > >>  struct __kfifo *__kfifo = &__tmp->kfifo; \
> > >> -if (0) { \
> > >> -typeof(__tmp->ptr_const) __dummy __attribute__ 
> > >> ((unused)); \
> > >> -__dummy = (typeof(__buf))NULL; \
> > >> -} \
> > >>  (__recsize) ?\
> > >>  __kfifo_in_r(__kfifo, __buf, __n, __recsize) : \
> > >>  __kfifo_in(__kfifo, __buf, __n); \
> > >
> > >Frankly, I'd first like to understand what was the rationale for adding it 
> > >at the first place. Adding Stefani and lkml to CC.
> > 
> > Agree.
> > 
> > BTW, by git blame, I just see commit 2e956fb3 which is checked in by 
> > Stefani. 
> > While looking in the diff, the kfifo_in() is already there. Looks like this
> > commit move the definition from one file to another file. 
> > 
> > So I am not sure the if(0) code is added by Stefani. Any other method to 
> > trace
> > the ealier history?
> 
> git blame -- include/linux/kfifo-new.h 2e956fb3~1
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] remove untouched code in kfifo_in

2012-10-17 Thread Stefani Seibold
Am Donnerstag, den 18.10.2012, 09:59 +0800 schrieb Richard Yang:
> On Wed, Oct 17, 2012 at 09:51:49PM +0200, Stefani Seibold wrote:
> >This was introduce by me to suppress a compiler warning, so don't remove
> >it. 
> Which warning? I compile by removing this, but not find warning.
> I compile it on x86_64 platform.
> >

Sorry, i can't remember, it is three years and two gcc major releases
ago.

> >Am Mittwoch, den 17.10.2012, 16:05 +0200 schrieb Jiri Kosina:
> >> On Wed, 17 Oct 2012, Richard Yang wrote:
> >> 
> >> > >> This patch just remove this code.
> >> > >> ---
> >> > >>  include/linux/kfifo.h |4 
> >> > >>  1 files changed, 0 insertions(+), 4 deletions(-)
> >> > >> 
> >> > >> diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
> >> > >> index 10308c6..e7015bb 100644
> >> > >> --- a/include/linux/kfifo.h
> >> > >> +++ b/include/linux/kfifo.h
> >> > >> @@ -512,10 +512,6 @@ __kfifo_uint_must_check_helper( \
> >> > >>   unsigned long __n = (n); \
> >> > >>   const size_t __recsize = sizeof(*__tmp->rectype); \
> >> > >>   struct __kfifo *__kfifo = &__tmp->kfifo; \
> >> > >> - if (0) { \
> >> > >> - typeof(__tmp->ptr_const) __dummy __attribute__ 
> >> > >> ((unused)); \
> >> > >> - __dummy = (typeof(__buf))NULL; \
> >> > >> - } \
> >> > >>   (__recsize) ?\
> >> > >>   __kfifo_in_r(__kfifo, __buf, __n, __recsize) : \
> >> > >>   __kfifo_in(__kfifo, __buf, __n); \
> >> > >
> >> > >Frankly, I'd first like to understand what was the rationale for adding 
> >> > >it 
> >> > >at the first place. Adding Stefani and lkml to CC.
> >> > 
> >> > Agree.
> >> > 
> >> > BTW, by git blame, I just see commit 2e956fb3 which is checked in by 
> >> > Stefani. 
> >> > While looking in the diff, the kfifo_in() is already there. Looks like 
> >> > this
> >> > commit move the definition from one file to another file. 
> >> > 
> >> > So I am not sure the if(0) code is added by Stefani. Any other method to 
> >> > trace
> >> > the ealier history?
> >> 
> >> git blame -- include/linux/kfifo-new.h 2e956fb3~1
> >> 
> >
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


losetup kernel crash in drivers/block/loop.c kernel 3.4.11

2012-10-03 Thread Stefani Seibold
Hi,

i am faced with a strange kernel crash while removing a loopback device
with losetup, during a software update of my embedded device, which was
introduced between 3.0 and 3.4. All other used kernels 2.6.39, 2.6.35,
2.6.33, 2.6.29, 2.6.27 and 2.6.20 works well.

BUG: unable to handle kernel NULL pointer derference at 0041
IP: [] invalidate_bdev+0x4/0x26
*pde = 
Ooops:  I#11 PREEMNT SMP
Modules linked in: vfat fat i915 drm_kms_helper drm intel_agp i2c_algo_bit 
intel_gtt agpgart video backlight e1000e usb_storage

Pid: 869, comm: losetup Tainted G8.3.4
EIP: 0060:[] EFLAGS: 00010282 CPU: 1
EIP is at invalidate_bdev+0x4/0x26
EAX: 0029 EBX: f63c1c00 ECX:  EDX: f63c1e20
ESI: f5c6bc80 EDI: f63c1c60 EBP: f596e500 ESP: f5053e54
 DS: 007b ES: 007b FS: 00d8 GS:  SS: 0068
CR0: 8005003b CR2: 0041 CR3: 324ae000 CR4: 000407d0
DR0:  DR1:  DR2:  DR3: 
DR6: 0ff0 DR7: 0400
Process losetup (pid: 869, ti=f5052000 task=f616c0c0 task.ti=f5052000)
Stack:
 f63c1c00 c0277449 000200da f63c1c00 ffe7 4c01 f5c39900 c02784d0
 f5d750a4  f5053efc f5d750a4 f5269900 c017dda6 001d 8000
 f63c1cfc c027897b ffe7 4c01 f5053f10 c0202021  f5c39900
Call Trace:
 [] ? loop_clr_fd+0x11/0x1d6
 [] ? lo_ioctl+0x455/0x62b
 [] ? do_last.clone.32+0x55b/0x5d5
 [] ? loop_switch.clone.13+0x67/0x67
 [] ? __blkdev_driver_ioctl+0x1d/0x25
 [] ? blkdev_ioctl+0x6a3/0x6c2
 [] ? handle_pte_fault+0x21d/0x7ad
 [] ? do_file_open+0x21/0x5d
 [] ? block_ioctl+0x2f/0x34
 [] ? block_ioctl+0x2f/0x34
 [] ? bd_set_size+0x60/0x60
 [] ? do_vfs_ioctl+0x455/0x492
 [] ? do_page_fault+0x30f/0x32c
 [] ? fd_install+0x1e/0x3d
 [] ? do_sys_open+0x17e/0x188
 [] ? sys_ioctl+0x2d/0x47
 [] ? syscall+0x7/0xb
Code: 00 89 f0 5b 5e 5f c3 53 8b 40 08 8b 58 18 83 7b 3c 00 74 11 e8 3f b9 ff 
ff 89 d8 31 d2 31 c9 5b e9 ba 8e fc ff 5b c3 53 8b 40 08 (8b) 58 18 83 7b 3c 00 
74 17 e8  1f b9 ff ff e8 4e 88 fc ff 89 d8
EIP: [] invalidate_bdev+0x4/0x26 SS:ESP 0068:f5053e54
CR2: 0041

This dump was copied by hand from a smart phone screenshot, i hope there
are no typos.

It is not possible to write a demo program which reproduce this bug due
the complexity, so i will explain what going on.

First mount a kernel which include a initramfs doing the following:

/bin/mount -t proc none /proc
/bin/mount -o rw,data=journal,barrier=1,errors=remount-ro /dev/sda3 /mnt
/bin/mount -o loop /mnt/rootfs.squashfs /rootfs
/bin/mount -o loop modules.squashfs /rootfs/lib/modules
/bin/mount -o move /mnt /rootfs/rw
/bin/umount /proc
exec /rootfs/bin/sh -c 'exec /sbin/switch_root -c /dev/console /rootfs 
/sbin/init'
exec /bin/sh

The Squashfs-Image will be mounted and will be the new root filesystem,
the file system of /dev/sda3 will be then mounted under /rw.

The reason to do this is, that is is very easy to exchange the root
filesystem, since it it only a plain image file. And there is no extra
partition necessary which can be to small in the future.

Also the kernel modules will be a squashfs image as a part of the
initramfs. This make it safe to exchange the kernel, because it will
change togehter with the modules.

After starting the new init process of the rootfs.squashfs the firmware
image opfs.squashfs will be mounted also via loopback block device
at /opt.

When the user decide to do an update, a new rootfs.squashf will be
copied into a ramdisk and the following script (snippet) will be
executed:

cat /dev/console
umount /init/opt
umount -l -r /init/rw
umount -l -r /init
umount /etc 
rm -rf /tmp/etc
sync
for i in /dev/loop*
do
losetup -d $i 2>/dev/null
done
rm \$0
exec /tmp/update.sh "$1" "$2"
reboot -f
EOF
chmod a+x /tmp/init

echo "::restart:/tmp/init" >/tmp/etc/inittab

mount -o ro /dev/ramdisk /mnt
cd /mnt
/sbin/pivot_root . init

mount -o move /init/tmp /tmp
mount -o move /init/proc /proc
mount -o move /init/sys /sys
mount -o move /init/dev/pts /dev/pts
mount -o move /init/dev/shm /dev/shm
mount -o bind /tmp/etc /etc

init -q 
sleep 1
kill -SIGQUIT 1
exit

Now the update.sh script has the control over the system, no more
application or daemons will running and all mass storages should be
unmounted.

Till this everything is working fine, than the update.sh will execute
the following code:

rm -f /rw/optfs.squashfs

for i in /dev/loop*
do
losetup -d $i 2>/dev/null
done

This will remove the old firmware and all possible loopback devices.
Executing the losetup will crash the kernel and will produce the Oops
above.

This is independent to the underlying file system or the processor
architecture, it will happen on x86 or ppc and ext3fs and yaffs2 as
well.

Any idea?

- Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to 

Re: [PATCH 6/6] Add 32 bit VDSO support for 32 and 64 bit kernels

2012-12-18 Thread Stefani Seibold
Am Dienstag, den 18.12.2012, 08:17 -0800 schrieb H. Peter Anvin:
> On 12/18/2012 01:40 AM, stef...@seibold.net wrote:
> > From: Stefani Seibold 
> > 
> > This patch adds support for 32 bit VDSO.
> > 
> > For 32 bit programs running on a 32 bit kernel, the same mechanism is
> > used as for 64 bit programs running on a 64 bit kernel.
> > 
> > For 32 bit programs running under a 64 bit IA32_EMULATION, it is a
> > little bit more tricky. In this case the VVAR and HPET will be mapped
> > into the 32 bit address space, by cutting of the upper 32 bit. So the
> > address for this will not changed in the view of the 32 bit VDSO. The
> > HPET will be mapped in this case at 0xff5fe000 and the VVAR at 0xff5ff000.
> > 
> > The transformation between the in 64 bit kernel representation and the 32 
> > bit
> > abi will be also provided.
> > 
> > So we have one VDSO Source for all.
> > 
> 
> You seem to once again have disregarded the request to make the vvar and
> hpet part of the vdso address space proper.  If this is not possible or
> highly impractical, please extend a technical motivation why that is so,
> rather than simply ignoring it.
> 

Pardon, i never disregarded nor i have agreed that this is going to be a
part of the VDSO. I currently have also no idea how to do this and i see
no need at the moment to do this revamp. The 64 bit VDSO lives since
more than 6 years with this kind of implementation.

You asked me to do the VDSO 32 bit stuff for the IA32_EMULATION, before
it is ready for inclusion into the kernel. Thats exactly what i did. I
spend the whole weekend of my spare time to do this implementation. Now
we have them all.

The patch works perfectly, all issues are solved:

- Calling conventions
- ABI transformations
- System call gateway for X86 32 bit
- Mapping of the FIXMAP and HPET into the lower 32 bit address space for
IA32_EMULATION
- Support for 32 bit programs in 32 kernel and 64 bit kernel
- One VDSO source for all

If you prefer an other solutions, its okay. There are many ways to code
things. But for now i think it is a good step ahead. That is what i
currently can provide.

- Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 6/6] Add 32 bit VDSO support for 32 and 64 bit kernels

2012-12-18 Thread Stefani Seibold
Am Dienstag, den 18.12.2012, 10:44 -0800 schrieb H. Peter Anvin:
> On 12/18/2012 08:52 AM, Stefani Seibold wrote:
> > 
> > Pardon, i never disregarded nor i have agreed that this is going to be a
> > part of the VDSO. I currently have also no idea how to do this and i see
> > no need at the moment to do this revamp. The 64 bit VDSO lives since
> > more than 6 years with this kind of implementation.
> > 
> 
> It was part of this discussion thread, about how to best manage the
> address space.  Fixed addresses are a major problem, and introducing new
> ones are extremely undesirable.
> 

There is no introduce of new fix address. There are still there for
x86_64. If this will currently not a major problem on this architecture
than it will not for x86_32 too.

> Hence I wrote:
> 
> > IMO it seems this is making it way more complicated than it is. Just
> > make sure you have a section in the vdso where you can map in a data
> > page with the symbols in the right offsets. Extra points for doing
> > magic so that it is at the beginning or end, but I think that might
> > be harder than necessary.
> 
> Basically, make the vvar and hpet pages part of the vdso page list.
> Optionally they can be mapped without the MAYWRITE option -- in fact, we
> could easily split the vdso into an executable area which gets MAYWRITE
> to be able to set breakpoints and a data area which doesn't -- but that
> is a minor tweak IMO.
> 

I see the benefits, but it will not work under all circumstance. The
VDSO compat mode for x86_32 requires a fix address and there is no room
behind this. So since this must preserved, i see no real gain for this.

> > You asked me to do the VDSO 32 bit stuff for the IA32_EMULATION, before
> > it is ready for inclusion into the kernel. Thats exactly what i did. I
> > spend the whole weekend of my spare time to do this implementation. Now
> > we have them all.
> > 
> > The patch works perfectly, all issues are solved:
> > 
> > - Calling conventions
> > - ABI transformations
> > - System call gateway for X86 32 bit
> > - Mapping of the FIXMAP and HPET into the lower 32 bit address space for
> > IA32_EMULATION
> > - Support for 32 bit programs in 32 kernel and 64 bit kernel
> > - One VDSO source for all
> > 
> > If you prefer an other solutions, its okay. There are many ways to code
> > things. But for now i think it is a good step ahead. That is what i
> > currently can provide.
> 
> This is good.  We have some time anyway to get this ready for the 3.9
> merge window.
> 

What does this mean? Do you accept my patch or drop it? I see no real
technical issue to drop it. It makes things better not worse. Maybe
there is a better solution, but this is the next step.

I advocate to apply this patch, because i spend a lot time to write it
and it is a good base to continue the work.

- Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 6/6] Add 32 bit VDSO support for 32 and 64 bit kernels

2012-12-19 Thread Stefani Seibold
Am Dienstag, den 18.12.2012, 12:37 -0800 schrieb Andy Lutomirski:
> On Tue, Dec 18, 2012 at 12:32 PM, Stefani Seibold  wrote:
> > Am Dienstag, den 18.12.2012, 10:44 -0800 schrieb H. Peter Anvin:
> >> On 12/18/2012 08:52 AM, Stefani Seibold wrote:
> >> >
> >> > Pardon, i never disregarded nor i have agreed that this is going to be a
> >> > part of the VDSO. I currently have also no idea how to do this and i see
> >> > no need at the moment to do this revamp. The 64 bit VDSO lives since
> >> > more than 6 years with this kind of implementation.
> >> >
> >>
> >> It was part of this discussion thread, about how to best manage the
> >> address space.  Fixed addresses are a major problem, and introducing new
> >> ones are extremely undesirable.
> >>
> >
> > There is no introduce of new fix address. There are still there for
> > x86_64. If this will currently not a major problem on this architecture
> > than it will not for x86_32 too.
> 
> Not necessarily true.  On x86-64 (non-compat) the fixmap address is in
> kernel space (high bit set), so it can't conflict with anything in
> user space.  On true 32-bit mode, the same applies.  In compat mode,
> the fixed address is in *user* space and might conflict with existing
> uses.
> 
> >
> >> Hence I wrote:
> >>
> >> > IMO it seems this is making it way more complicated than it is. Just
> >> > make sure you have a section in the vdso where you can map in a data
> >> > page with the symbols in the right offsets. Extra points for doing
> >> > magic so that it is at the beginning or end, but I think that might
> >> > be harder than necessary.
> >>
> >> Basically, make the vvar and hpet pages part of the vdso page list.
> >> Optionally they can be mapped without the MAYWRITE option -- in fact, we
> >> could easily split the vdso into an executable area which gets MAYWRITE
> >> to be able to set breakpoints and a data area which doesn't -- but that
> >> is a minor tweak IMO.
> >>
> >
> > I see the benefits, but it will not work under all circumstance. The
> > VDSO compat mode for x86_32 requires a fix address and there is no room
> > behind this. So since this must preserved, i see no real gain for this.
> 
> Not true.  It can be mapped with the vdso at a variable address using
> GOTOFF addressing.  See my earlier email with
> __attribute__((visibility("hidden")).
> 
> --Andy

I am not sure that we talking about the same.

In a 32 bit kernel a VDSO can mapped on an fix address VDSO_HIGH_BASE
(kernel parameter vdso32=2 or CONFIG_COMPAT_VDSO), which is 0xe000.
There is no available page before nor after this page, so there is no
space for a multi page VDSO.

The only way i see is to do this is a test in the vdso_...() functions
for running on this address and than access the old fixmap addresses for
VVAR and HPET. This can be done for example by a tweaking macro.

If this is okay, i can do it. Otherwise i have no idea how to.

- Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 6/6] Add 32 bit VDSO support for 32 and 64 bit kernels

2012-12-20 Thread Stefani Seibold
Am Donnerstag, den 20.12.2012, 08:17 -0800 schrieb H. Peter Anvin:
> The whole point is to avoid fixed addresses.  We already install a list of 
> pages as the vdso; the vvar and hpet pages can simply be part of that list.
> 

It would be great if you can give me a answer for my question. Again:

In a 32 bit kernel a VDSO can mapped on an fix address VDSO_HIGH_BASE
(kernel parameter vdso32=2 or CONFIG_COMPAT_VDSO), which is 0xe000.
There is no available page BEFORE nor AFTER the page, so it is not
possible to have VVAR or HPET as a part of that list

> Stefani Seibold  wrote:
> 
> >Am Dienstag, den 18.12.2012, 12:37 -0800 schrieb Andy Lutomirski:
> >> On Tue, Dec 18, 2012 at 12:32 PM, Stefani Seibold
> > wrote:
> >> > Am Dienstag, den 18.12.2012, 10:44 -0800 schrieb H. Peter Anvin:
> >> >> On 12/18/2012 08:52 AM, Stefani Seibold wrote:
> >> >> >
> >> >> > Pardon, i never disregarded nor i have agreed that this is going
> >to be a
> >> >> > part of the VDSO. I currently have also no idea how to do this
> >and i see
> >> >> > no need at the moment to do this revamp. The 64 bit VDSO lives
> >since
> >> >> > more than 6 years with this kind of implementation.
> >> >> >
> >> >>
> >> >> It was part of this discussion thread, about how to best manage
> >the
> >> >> address space.  Fixed addresses are a major problem, and
> >introducing new
> >> >> ones are extremely undesirable.
> >> >>
> >> >
> >> > There is no introduce of new fix address. There are still there for
> >> > x86_64. If this will currently not a major problem on this
> >architecture
> >> > than it will not for x86_32 too.
> >> 
> >> Not necessarily true.  On x86-64 (non-compat) the fixmap address is
> >in
> >> kernel space (high bit set), so it can't conflict with anything in
> >> user space.  On true 32-bit mode, the same applies.  In compat mode,
> >> the fixed address is in *user* space and might conflict with existing
> >> uses.
> >> 
> >> >
> >> >> Hence I wrote:
> >> >>
> >> >> > IMO it seems this is making it way more complicated than it is.
> >Just
> >> >> > make sure you have a section in the vdso where you can map in a
> >data
> >> >> > page with the symbols in the right offsets. Extra points for
> >doing
> >> >> > magic so that it is at the beginning or end, but I think that
> >might
> >> >> > be harder than necessary.
> >> >>
> >> >> Basically, make the vvar and hpet pages part of the vdso page
> >list.
> >> >> Optionally they can be mapped without the MAYWRITE option -- in
> >fact, we
> >> >> could easily split the vdso into an executable area which gets
> >MAYWRITE
> >> >> to be able to set breakpoints and a data area which doesn't -- but
> >that
> >> >> is a minor tweak IMO.
> >> >>
> >> >
> >> > I see the benefits, but it will not work under all circumstance.
> >The
> >> > VDSO compat mode for x86_32 requires a fix address and there is no
> >room
> >> > behind this. So since this must preserved, i see no real gain for
> >this.
> >> 
> >> Not true.  It can be mapped with the vdso at a variable address using
> >> GOTOFF addressing.  See my earlier email with
> >> __attribute__((visibility("hidden")).
> >> 
> >> --Andy
> >
> >I am not sure that we talking about the same.
> >
> >In a 32 bit kernel a VDSO can mapped on an fix address VDSO_HIGH_BASE
> >(kernel parameter vdso32=2 or CONFIG_COMPAT_VDSO), which is 0xe000.
> >There is no available page before nor after this page, so there is no
> >space for a multi page VDSO.
> >
> >The only way i see is to do this is a test in the vdso_...() functions
> >for running on this address and than access the old fixmap addresses
> >for
> >VVAR and HPET. This can be done for example by a tweaking macro.
> >
> >If this is okay, i can do it. Otherwise i have no idea how to.
> >
> >- Stefani
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Add VDSO time function support for x86 32-bit kernel

2012-12-12 Thread Stefani Seibold
Am Mittwoch, den 12.12.2012, 15:34 -0800 schrieb H. Peter Anvin:
> On 12/12/2012 12:19 PM, stef...@seibold.net wrote:
> > diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c 
> > b/arch/x86/vdso/vdso32/vclock_gettime.c
> > new file mode 100644
> > index 000..c9a1909
> > --- /dev/null
> > +++ b/arch/x86/vdso/vdso32/vclock_gettime.c
> > @@ -0,0 +1,7 @@
> > +/*
> > + * since vgtod layout differs between X86_64 and x86_32, it is not 
> > possible to
> > + * provide a 32 bit vclock with a 64 bit kernel
> > + */
> > +#ifdef CONFIG_X86_32
> > +#include "../vclock_gettime.c"
> > +#endif
> 
> This is where this goes fail.  Sorry, it is not acceptable to introduce 
> an ABI on x86-32 without providing it also on x86-64 in compatibility mode.
> 
> I also don't believe it is not possible... it might require some more 
> cleverness; perhaps we need to do the 32-bit vgtod in such a way that it 
> *is* compatible with 64 bits.
> 

The comment is ambiguous:

Since vgtod layout differs between X86_64 and x86_32 AND the vgtod is
not inside the accessible address space of a 32 bit program, it is
CURRENTLY not possible to provide a 32 bit vclock with a 64 bit kernel

As i understand VDSO it is an alternativ way, so if there is no support
for it, there must be a fall back to the system call interface in the
program or lib, which tries to use it.

So there is no drawback for 32 bit programs running on a 64 bit kernel. 

I think this feature is not so important and can implemented in a
subsequent patch, because a 64 bit kernel system mostly runs 64 bit
programs. Let us fix this things step by step.

- Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Add VDSO time function support for x86 32-bit kernel

2012-12-12 Thread Stefani Seibold
Am Mittwoch, den 12.12.2012, 22:14 -0800 schrieb H. Peter Anvin:
> This is too late for 3.8 anyway, so there is time to make it work correctly 
> before tge 3.9 merge window anyway.  After this merge window is over I may 
> pull tjis into a testing branch, but compat support is a precondition.
> 
> The vdso is only optional if you build in backwards compatibility anyway, and 
> software has a right to expect a specific numeric kernel version to export a 
> single ABI.
> 

Any idea or clean solution how i can map the 64 bit vgtod into the 32
bit address space? Thats the only problem i see.

- Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Add VDSO time function support for x86 32-bit kernel

2012-12-12 Thread Stefani Seibold
Am Mittwoch, den 12.12.2012, 22:47 -0800 schrieb H. Peter Anvin:
> Should be a simple matter of sharing pages.  Look perhaps at the x32 vdso for 
> a hint.
> 


> >
> >Any idea or clean solution how i can map the 64 bit vgtod into the 32
> >bit address space? Thats the only problem i see.
> >

No, i see no special handling for x32 vdso to do this. I am not sure if
x32 vdso can access the 64 bit address space of vsyscall_gtod_data. I
can't test this due the lack of a x32 abi system.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [CRIU] [PATCH] Add VDSO time function support for x86 32-bit kernel

2012-12-14 Thread Stefani Seibold
Am Freitag, den 14.12.2012, 14:46 -0800 schrieb H. Peter Anvin:
> On 12/14/2012 12:34 AM, Pavel Emelyanov wrote:
> > On 12/14/2012 06:20 AM, Andy Lutomirski wrote:
> >> On Thu, Dec 13, 2012 at 6:18 PM, H. Peter Anvin  wrote:
> >>> Wouldn't the vdso get mapped already and could be mremap()'d.  If we
> >> really need more control I'd almost push for a device/filesystem node
> >> that could be mmapped the usual way.
> >>
> >> Hmm.  That may work, but it'll still break ABI.  I'm not sure that
> >> criu is stable enough yet that we should care.  Criu people?
> > 
> > It's not yet, but we'd still appreciate the criu-friendly vdso redesign.
> > 
> >> (In brief summary: how annoying would it be if the vdso was no longer
> >> just a bunch of constant bytes that lived somewhere?)
> > 
> > It depends on what vdso is going to be. In the perfect case it should
> > a) be mremap-able to any address (or be at fixed address _forever_, but
> >I assume this is not feasible);
> > b) have entry points at fixed (or somehow movable) places.
> > 
> > I admit that I didn't understand your question properly, if I did,
> > please correct me.
> > 
> 
> Either way... criu on the side, we should proceed with this vdso
> redesign and get support for the 32-bit entry points including compat
> mode on x86-64.
> 
>   -hpa
> 
> 

Sorry for not following the discussion, but im am currently trying to
compile the vclocktime.c as a 32 bit object. Most of the (clever) work
is done.

After this the next step is to map the needed fixmaps into the 32 bit
address space. Maybe this can be done with install_special_mapping().

I think i will do this job in the next days.

- Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] samples: Fix `echo 1 > /proc/int-fifo` never return error

2015-02-04 Thread Stefani Seibold
The example is intended for int types, not for strings. So it is not a
bug, it's a feature ;-) But anyway, if you prefer to handle with strings
your are okay by me.

Am Dienstag, den 03.02.2015, 11:51 + schrieb Wang Long:
> echo 99 > /proc/int-fifo   > Never return
> echo 1000 > /proc/int-fifo > Never return
> 
> this patch fix it.
> 
> Signed-off-by: Wang Long 
> ---
>  samples/kfifo/inttype-example.c | 51 
> -
>  1 file changed, 40 insertions(+), 11 deletions(-)
> 
> diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c
> index 8dc3c2e..cc0db5f 100644
> --- a/samples/kfifo/inttype-example.c
> +++ b/samples/kfifo/inttype-example.c
> @@ -6,6 +6,7 @@
>   * Released under the GPL version 2 only.
>   *
>   */
> +#include 
>  
>  #include 
>  #include 
> @@ -23,6 +24,9 @@
>  /* name of the proc entry */
>  #define  PROC_FIFO   "int-fifo"
>  
> +/* Worst case buffer size needed for holding an integer. */
> +#define PROC_NUMBUF 13
> +
>  /* lock for procfs read access */
>  static DEFINE_MUTEX(read_lock);
>  
> @@ -108,33 +112,58 @@ static int __init testfunc(void)
>  static ssize_t fifo_write(struct file *file, const char __user *buf,
>   size_t count, loff_t *ppos)
>  {
> - int ret;
> - unsigned int copied;
> + char buffer[PROC_NUMBUF];
> + int value;
> + int err;
>  
> - if (mutex_lock_interruptible(_lock))
> - return -ERESTARTSYS;
> + memset(buffer, 0, sizeof(buffer));
>  
> - ret = kfifo_from_user(, buf, count, );
> + if (count > sizeof(buffer) - 1)
> + count = sizeof(buffer) - 1;
> + if (copy_from_user(buffer, buf, count)) {
> + err = -EFAULT;
> + goto out;
> + }
>  
> - mutex_unlock(_lock);
> + err = kstrtoint(strstrip(buffer), 0, );
> + if (err)
> + goto out;
> +
> + if (kfifo_is_full()) {
> + err = -EINVAL;
> + goto out;
> + }
>  
> - return ret ? ret : copied;
> + if (mutex_lock_interruptible(_lock))
> + return -ERESTARTSYS;
> + kfifo_put(, value);
> + mutex_unlock(_lock);
> +out:
> + return err < 0 ? err : count;
>  }
>  
>  static ssize_t fifo_read(struct file *file, char __user *buf,
>   size_t count, loff_t *ppos)
>  {
> - int ret;
> - unsigned int copied;
> + char buffer[PROC_NUMBUF * FIFO_SIZE];
> + int value;
> + size_t  len = 0;
> + ssize_t ret = -1;
> +
> + memset(buffer, 0, sizeof(buffer));
>  
>   if (mutex_lock_interruptible(_lock))
>   return -ERESTARTSYS;
>  
> - ret = kfifo_to_user(, buf, count, );
> + while (!kfifo_is_empty()){
> + ret = kfifo_get(, );
> + len = snprintf(buffer, sizeof(buffer), "%s%d\n", buffer, value);
> + }
>  
>   mutex_unlock(_lock);
> + ret = copy_to_user(buf, buffer, len);
>  
> - return ret ? ret : copied;
> + return ret ? ret : len;
>  }
>  
>  static const struct file_operations fifo_fops = {


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] external references for device tree overlays

2017-06-08 Thread Stefani Seibold
Hi Pantelis,

On Wed, 2017-06-07 at 11:11 +0300, Pantelis Antoniou wrote:
> Hi Stefani,
> 
> On Tue, 2017-06-06 at 21:17 +0200, Stefani Seibold wrote:
> > Hi Pantelis,
> > 
> > thanks for the suggestion. This feature is not very well
> > documented. I
> > tried this on my rasp1 running 4.12.0-rc3 and it doesn't work. My
> > source is:
> > 
> > // rapsi example
> > /dts-v1/;
> > /plugin/;
> > 
> > / {
> > compatible = "brcm,bcm2835", "brcm,bcm2708", "brcm,bcm2709";
> > 
> > fragment@0 {
> > target-path = "/soc/i2s@7e203000";
> > __overlay__ {
> > #address-cells = <0x0001>;
> > #size-cells = <0x0001>;
> > test = "test";
> > timer = <&{/soc/timer@7e003}>;
> > };
> > };
> > };
> > 
> > 
> > The resulting overlay is (decompiled with fdtdump):
> > 
> > /dts-v1/;
> > // magic:   0xd00dfeed
> > // totalsize:   0x19a (410)
> > // off_dt_struct:   0x38
> > // off_dt_strings:  0x148
> > // off_mem_rsvmap:  0x28
> > // version: 17
> > // last_comp_version:   16
> > // boot_cpuid_phys: 0x0
> > // size_dt_strings: 0x52
> > // size_dt_struct:  0x110
> > 
> > / {
> > compatible = "brcm,bcm2835", "brcm,bcm2708", "brcm,bcm2709";
> > fragment@0 {
> > target-path = "/soc/i2s@7e203000";
> > __overlay__ {
> > #address-cells = <0x0001>;
> > #size-cells = <0x0001>;
> > test = "test";
> > timer = <0xdeadbeef>;
> > };
> > };
> > __fixups__ {
> > /soc/timer@7e003 = "/fragment@0/__overlay__:timer:0";
> > };
> > };
> > 
> > But this will not apply:
> > 
> > OF: resolver: overlay phandle fixup failed: -22
> > create_overlay: Failed to resolve tree
> > 
> > 
> 
> Yes, it will not work as it is; my point is that you don't need the
> magic __*__ node.
> 

The magic __fixups__ node was inserted by the device tree compiler. I
use the dtc from https://github.com/pantoniou/dtc at commit
d990b8013889b816ec054c7e07a77db59c56c400.

> You will need to modify the overlay application code to live insert a
> phandle (if it doesn't exist) when it encounters a /path fixup.
> 

That is part of my patch!

> > Anyway, the reason for my patch is that i can reference to nodes
> > which
> > lacks a phandle. The phandle will be created on the fly and also
> > destroyed when the overlay is unloaded.
> > 
> > I have a real use case for this patch:
> > 
> > I have a BIOS on some ARM64 servers which provides broken device
> > tree.
> > It also lacks some devices in this tree which needs references to
> > other
> > devices which lacks a phandle.
> > 
> > Since the BIOSes are closed source i need a way to work arround
> > this
> > problem without patching all the drivers involved to this devices.
> > 
> > Hope this helps to understand the reason for this patch.
> > 
> 
> FWIW your problem seems like something that would happen on the
> field.
> We can berate the vendor of not providing the correct device tree,
> but
> in the end workarounds for broken vendor things are common in the
> kernel.
> 

Yes, that is the way how linux do the things. Linux has a long history
to bypassing bugs of BIOSes, ACPI or broken devices.

Greetings,
Stefani



Re: [PATCH] external references for device tree overlays

2017-06-08 Thread Stefani Seibold
On Wed, 2017-06-07 at 17:19 -0500, Rob Herring wrote:
> On Wed, Jun 7, 2017 at 3:11 AM, Pantelis Antoniou
>  wrote:
> > Hi Stefani,
> > 
> > On Tue, 2017-06-06 at 21:17 +0200, Stefani Seibold wrote:
> > > Hi Pantelis,
> > > 
> > > thanks for the suggestion. This feature is not very well
> > > documented. I
> > > tried this on my rasp1 running 4.12.0-rc3 and it doesn't work. My
> > > source is:
> > > 
> > > // rapsi example
> > > /dts-v1/;
> > > /plugin/;
> > > 
> > > / {
> > > compatible = "brcm,bcm2835", "brcm,bcm2708", "brcm,bcm2709";
> > > 
> > > fragment@0 {
> > > target-path = "/soc/i2s@7e203000";
> > > __overlay__ {
> > > #address-cells = <0x0001>;
> > > #size-cells = <0x0001>;
> > > test = "test";
> > > timer = <&{/soc/timer@7e003}>;
> > > };
> > > };
> > > };
> > > 
> > > 
> > > The resulting overlay is (decompiled with fdtdump):
> > > 
> > > /dts-v1/;
> > > // magic: 0xd00dfeed
> > > // totalsize: 0x19a (410)
> > > // off_dt_struct: 0x38
> > > // off_dt_strings:0x148
> > > // off_mem_rsvmap:0x28
> > > // version:   17
> > > // last_comp_version: 16
> > > // boot_cpuid_phys:   0x0
> > > // size_dt_strings:   0x52
> > > // size_dt_struct:0x110
> > > 
> > > / {
> > > compatible = "brcm,bcm2835", "brcm,bcm2708", "brcm,bcm2709";
> > > fragment@0 {
> > > target-path = "/soc/i2s@7e203000";
> > > __overlay__ {
> > > #address-cells = <0x0001>;
> > > #size-cells = <0x0001>;
> > > test = "test";
> > > timer = <0xdeadbeef>;
> > > };
> > > };
> > > __fixups__ {
> > > /soc/timer@7e003 = "/fragment@0/__overlay__:timer:0";
> > > };
> > > };
> > > 
> > > But this will not apply:
> > > 
> > > OF: resolver: overlay phandle fixup failed: -22
> > > create_overlay: Failed to resolve tree
> > > 
> > > 
> > 
> > Yes, it will not work as it is; my point is that you don't need the
> > magic __*__ node.
> > 
> > You will need to modify the overlay application code to live insert
> > a
> > phandle (if it doesn't exist) when it encounters a /path fixup.
> 
> phandles only exist if something in the base tree refers to that
> node.
> Adding them when they don't exist should definitely be something we
> support for overlays. But don't call that a broken DT. That would be
> a
> separate issue.
> 

Believe me it is broken. Due a NDA i am not able to give you more
details about the vendor. But there forgot do provide an device node
which must refer to the attached network and interrupt controller.

- Stefani



[PATCH] external references for device tree overlays

2017-06-05 Thread Stefani Seibold
From: Stefani Seibold 

This patch enables external references for symbols which are not
exported by the current device tree. For example

// RASPI example (only for testing)
/dts-v1/;
/plugin/;

/ {
compatible = "brcm,bcm2835", "brcm,bcm2708", "brcm,bcm2709";

fragment@0 {
target-path = "/soc/i2s@7e203000";
__overlay__ {
#address-cells = <0x0001>;
#size-cells = <0x0001>;
test = "test";
timer = <>;
};
};

__external_symbols__ {
timer = "/soc/timer@7e003000";
};
};

The "timer" symbol is not exported by the RASPI device tree, because it is
missing in the __symbols__ section of the device tree.

In case of the RASPI device tree this could be simple fixed by modifing
the device tree source, but when the device tree is provided by a closed
source BIOS this kind of missing symbol could not be fixed.

An additional benefit is to override a (possible broken) symbol exported
by the currect live device tree.

The patch is based and tested on linux 4.12-rc3.

Signed-off-by: Stefani Seibold 
Signed-off-by: Stefani Seibold 
---
 drivers/of/overlay.c  | 19 +++
 drivers/of/resolver.c | 27 ++-
 2 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
index 7827786718d8..de6516ea0fcd 100644
--- a/drivers/of/overlay.c
+++ b/drivers/of/overlay.c
@@ -50,6 +50,7 @@ struct of_overlay {
int id;
struct list_head node;
int count;
+   struct device_node *tree;
struct of_overlay_info *ovinfo_tab;
struct of_changeset cset;
 };
@@ -422,6 +423,8 @@ int of_overlay_create(struct device_node *tree)
/* add to the tail of the overlay list */
list_add_tail(>node, _list);
 
+   ov->tree = tree;
+
of_overlay_notify(ov, OF_OVERLAY_POST_APPLY);
 
mutex_unlock(_mutex);
@@ -524,6 +527,7 @@ int of_overlay_destroy(int id)
 {
struct of_overlay *ov;
int err;
+   phandle phandle;
 
mutex_lock(_mutex);
 
@@ -540,6 +544,8 @@ int of_overlay_destroy(int id)
goto out;
}
 
+   phandle = ov->tree->phandle;
+
of_overlay_notify(ov, OF_OVERLAY_PRE_REMOVE);
list_del(>node);
__of_changeset_revert(>cset);
@@ -549,6 +555,19 @@ int of_overlay_destroy(int id)
of_changeset_destroy(>cset);
kfree(ov);
 
+   if (phandle) {
+   struct device_node *node;
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(_lock, flags);
+   for_each_of_allnodes(node) {
+   if (node->phandle >= phandle)
+   node->phandle = 0;
+   }
+   raw_spin_unlock_irqrestore(_lock, flags);
+   }
+
+
err = 0;
 
 out:
diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index 771f4844c781..31b5f32c9b27 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -286,13 +286,14 @@ static int adjust_local_phandle_references(struct 
device_node *local_fixups,
 int of_resolve_phandles(struct device_node *overlay)
 {
struct device_node *child, *local_fixups, *refnode;
-   struct device_node *tree_symbols, *overlay_fixups;
+   struct device_node *tree_symbols, *ext_symbols, *overlay_fixups;
struct property *prop;
const char *refpath;
phandle phandle, phandle_delta;
int err;
 
tree_symbols = NULL;
+   ext_symbols = NULL;
 
if (!overlay) {
pr_err("null overlay\n");
@@ -321,6 +322,9 @@ int of_resolve_phandles(struct device_node *overlay)
for_each_child_of_node(overlay, child) {
if (!of_node_cmp(child->name, "__fixups__"))
overlay_fixups = child;
+   else
+   if (!of_node_cmp(child->name, "__external_symbols__"))
+   ext_symbols = child;
}
 
if (!overlay_fixups) {
@@ -329,20 +333,30 @@ int of_resolve_phandles(struct device_node *overlay)
}
 
tree_symbols = of_find_node_by_path("/__symbols__");
-   if (!tree_symbols) {
-   pr_err("no symbols in root of device tree.\n");
+   if (!tree_symbols && !ext_symbols) {
+   pr_err("no symbols for resolve in device tree.\n");
err = -EINVAL;
goto out;
}
 
+   phandle_delta = live_tree_max_phandle() + 1;
+
for_each_property_of_node(overlay_fixups, prop) {
 
/* skip properties added automatically */
if (!of_prop_cmp(prop->name, "name"))
continue;
 
-   err = of_property_read_string(tree_symbols,
- 

Re: [PATCH] external references for device tree overlays

2017-06-06 Thread Stefani Seibold
Hi Frank,

On 06.06.2017, 00:20 -0700 Frank Rowand wrote::
> On 06/05/17 05:59, Stefani Seibold wrote:
> > From: Stefani Seibold 
> > 
> > This patch enables external references for symbols which are not
> > exported by the current device tree. For example
> > 
> > // RASPI example (only for testing)
> > /dts-v1/;
> > /plugin/;
> > 
> > / {
> > compatible = "brcm,bcm2835", "brcm,bcm2708", "brcm,bcm2709";
> > 
> > fragment@0 {
> > target-path = "/soc/i2s@7e203000";
> > __overlay__ {
> > #address-cells = <0x0001>;
> > #size-cells = <0x0001>;
> > test = "test";
> > timer = <>;
> > };
> > };
> > 
> > __external_symbols__ {
> > timer = "/soc/timer@7e003000";
> > };
> > };
> 
> My hope is that the dtc compiler will stop supporting specification
> of the
> __symbols__ node in dts source, and only generate it automatically in
> the dtb.
> That change to dtc would not allow any node name specified in a dts
> to begin
> with an underscore.  Thus node __external_symbols__ would not be
> allowed.
> 

The name is not so important to me, only the solution.

> > In case of the RASPI device tree this could be simple fixed by
> > modifing
> > the device tree source, but when the device tree is provided by a
> > closed
> > source BIOS this kind of missing symbol could not be fixed.
> 
> Is there a real example of this issue, or is this a theoretical
> concern?
> If this is a real example, we should be discouraging such behavior.
> 

Yes, I have a BIOS on some ARM64 servers which provides broken device tree. It 
also lacks some devices in this tree which needs references to other devices 
which lacks a phandle.


> The suggestion by Pantelis should work, but that is just a hack to
> get
> you out of a bad situation, not a good practice.
> 

I tried it, but it doesn't work. Look at my post to Pantelis.

- Stefani


Re: [PATCH] external references for device tree overlays

2017-06-06 Thread Stefani Seibold
Hi Pantelis,

thanks for the suggestion. This feature is not very well documented. I
tried this on my rasp1 running 4.12.0-rc3 and it doesn't work. My
source is:

// rapsi example
/dts-v1/;
/plugin/;

/ {
compatible = "brcm,bcm2835", "brcm,bcm2708", "brcm,bcm2709";

fragment@0 {
target-path = "/soc/i2s@7e203000";
__overlay__ {
#address-cells = <0x0001>;
#size-cells = <0x0001>;
test = "test";
timer = <&{/soc/timer@7e003}>;
};
};
};


The resulting overlay is (decompiled with fdtdump):

/dts-v1/;
// magic:   0xd00dfeed
// totalsize:   0x19a (410)
// off_dt_struct:   0x38
// off_dt_strings:  0x148
// off_mem_rsvmap:  0x28
// version: 17
// last_comp_version:   16
// boot_cpuid_phys: 0x0
// size_dt_strings: 0x52
// size_dt_struct:  0x110

/ {
compatible = "brcm,bcm2835", "brcm,bcm2708", "brcm,bcm2709";
fragment@0 {
target-path = "/soc/i2s@7e203000";
__overlay__ {
#address-cells = <0x0001>;
#size-cells = <0x0001>;
test = "test";
timer = <0xdeadbeef>;
};
};
__fixups__ {
/soc/timer@7e003 = "/fragment@0/__overlay__:timer:0";
};
};

But this will not apply:

OF: resolver: overlay phandle fixup failed: -22
create_overlay: Failed to resolve tree


Anyway, the reason for my patch is that i can reference to nodes which
lacks a phandle. The phandle will be created on the fly and also
destroyed when the overlay is unloaded.

I have a real use case for this patch:

I have a BIOS on some ARM64 servers which provides broken device tree.
It also lacks some devices in this tree which needs references to other
devices which lacks a phandle.

Since the BIOSes are closed source i need a way to work arround this
problem without patching all the drivers involved to this devices.

Hope this helps to understand the reason for this patch.

- Stefani

Am Montag, den 05.06.2017, 21:43 +0300 schrieb Pantelis Antoniou:
> Hi Stefani,
> 
> On Mon, 2017-06-05 at 14:59 +0200, Stefani Seibold wrote:
> > From: Stefani Seibold 
> > 
> > This patch enables external references for symbols which are not
> > exported by the current device tree. For example
> > 
> > // RASPI example (only for testing)
> > /dts-v1/;
> > /plugin/;
> > 
> > / {
> > compatible = "brcm,bcm2835", "brcm,bcm2708", "brcm,bcm2709";
> > 
> > fragment@0 {
> > target-path = "/soc/i2s@7e203000";
> > __overlay__ {
> > #address-cells = <0x0001>;
> > #size-cells = <0x0001>;
> > test = "test";
> > timer = <>;
> > };
> > };
> > 
> > __external_symbols__ {
> > timer = "/soc/timer@7e003000";
> > };
> > };
> > 
> 
> I understand the problem. I am just not fond of the
> __external_symbols__
> solution.
> 
> There's a facility in the DT source language that allows to declare
> pathspec labels.
> 
> The 'timer = <>;' statement could be rewritten as 
> 'timer = <&{/soc/timer@7e003}>;'
> 
> Internally you can 'catch' that this refers to a symbol in the base
> tree
> and then do the same symbol insertion as the patch you've submitted.
> 
> The benefit to the above is that you don't introduce manually edited
> special nodes.
> 
> Regards
> 
> -- Pantelis
> 
> > The "timer" symbol is not exported by the RASPI device tree,
> > because it is
> > missing in the __symbols__ section of the device tree.
> > 
> > In case of the RASPI device tree this could be simple fixed by
> > modifing
> > the device tree source, but when the device tree is provided by a
> > closed
> > source BIOS this kind of missing symbol could not be fixed.
> > 
> > An additional benefit is to override a (possible broken) symbol
> > exported
> > by the currect live device tree.
> > 
> > The patch is based and tested on linux 4.12-rc3.
> > 
> > Signed-off-by: Stefani Seibold 
> > Signed-off-by: Stefani Seibold 
> > ---
> >  drivers/of/overlay.c  | 19 +++
> >  drivers/of/resolver.c | 27 ++-
> >  2 files changed, 41 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
> > index 7827786718d8..de6516ea0fcd 100644
> > --- a/drivers/of/overlay.c
&g

OHCI unplug kernel crash in kernel 4.3, 4.4 and 4.5

2016-02-28 Thread Stefani Seibold
I still reported this bug 6 Weeks ago... and i checked it know with the
current kernel 4.5.0-rc5. The bug is yet not fixed.

A unplug of an USB 1.0 OHCI controller express card will result in a
kernel crash. The express card is attached via thunderbolt and a sonnet
express card to thunderbolt adapter. The computer hangs after the
unplug, only a power down fix the situation.

This is the kernel log of a kernel 4.4 via netconsole:

pciehp :06:03.0:pcie24: Card not present on Slot(3)
pciehp :06:03.0:pcie24: slot(3): Link Down event
pciehp :06:03.0:pcie24: Link Down event ignored on slot(3): already
powering off
ehci-pci :0b:00.2: HC died; cleaning up
ehci-pci :0b:00.2: remove, state 4
usb usb5: USB disconnect, device number 1
pciehp :00:1c.4:pcie04: Card not present on Slot(4)
pciehp :00:1c.4:pcie04: slot(4): Link Down event
ehci-pci :0b:00.2: USB bus 5 deregistered
ohci-pci :0b:00.1: HC died; cleaning up
ohci-pci :0b:00.1: remove, state 4
usb usb7: USB disconnect, device number 1
pciehp :00:1c.4:pcie04: Link Down event ignored on slot(4): already
powering off
ohci-pci :0b:00.1: USB bus 7 deregistered
ohci-pci :0b:00.0: HC died; cleaning up
ohci-pci :0b:00.0: remove, state 4
usb usb6: USB disconnect, device number 1
[ cut here ]
kernel BUG at drivers/iommu/intel-iommu.c:3592!
invalid opcode:  [#1] PREEMPT SMP 
Modules linked in: ohci_pci ohci_hcd binfmt_misc netconsole configfs
bbswitch(O) iwlmvm iwlwifi vboxnetadp(O) vboxnetflt(O) vboxdrv(O)
nvidia(PO) vhost_net tun vhost kvm_intel kvm irqbypass dell_smm_hwmon
[last unloaded: netconsole]
CPU: 0 PID: 4857 Comm: kworker/0:3 Tainted: P   O4.4.0-
gentoo #1
Hardware name: Dell Inc. XPS 2720/05R2TK  , BIOS A12 09/21/2015
Workqueue: pciehp-3 pciehp_power_thread
task: 8804070a2300 ti: 8803e4658000 task.ti: 8803e4658000
RIP: 0010:[]  []
intel_unmap+0x1c4/0x1d0
RSP: 0018:8803e465bb98  EFLAGS: 00010246
RAX:  RBX: 8804225fb098 RCX: c000
RDX:  RSI: c000 RDI: 8804225fb098
RBP: 8803e465bbd0 R08:  R09: 
R10: 88046cc000c0 R11:  R12: 
R13: c000 R14: 880468caa400 R15: e8c13800
FS:  () GS:88047f20()
knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7f2515680324 CR3: 0120a000 CR4: 001406f0
Stack:
 8804101e4a78 8803e465bbf0 ea000f915840 
 812a3ec0 880468caa400 e8c13800 8803e465bbf0
 8061ee8a 88045e68f000 8804225fb098 8803e465bc28
Call Trace:
 [] intel_free_coherent+0x5a/0xa0
 [] ohci_stop+0x144/0x1c0 [ohci_hcd]
 [] usb_remove_hcd+0xe4/0x1a0
 [] usb_hcd_pci_remove+0x63/0x130
 [] pci_device_remove+0x39/0xc0
 [] __device_release_driver+0x96/0x130
 [] device_release_driver+0x23/0x30
 [] pci_stop_bus_device+0x8a/0xa0
 [] pci_stop_bus_device+0x31/0xa0
 [] pci_stop_bus_device+0x31/0xa0
 [] pci_stop_bus_device+0x31/0xa0
 [] pci_stop_and_remove_bus_device+0x12/0x20
 [] pciehp_unconfigure_device+0x9b/0x180
 [] pciehp_disable_slot+0x43/0xb0
 [] pciehp_power_thread+0x8d/0xb0
 [] process_one_work+0x144/0x3c0
 [] worker_thread+0x4b/0x440
 [] ? process_one_work+0x3c0/0x3c0
 [] ? process_one_work+0x3c0/0x3c0
 [] kthread+0xc9/0xe0
 [] ? kthread_create_on_node+0x180/0x180
 [] ret_from_fork+0x3f/0x70
 [] ? kthread_create_on_node+0x180/0x180
Code: f9 48 89 de e8 fe cd ff ff 4c 89 e6 4c 89 f7 e8 23 92 ff ff 4c 89
ef e8 0b cf ff ff 48 83 c4 10 5b 41 5c 41 5d 41 5e 41 5f 5d c3 <0f> 0b
e8 45 cf ff ff e9 f3 fe ff ff 0f 1f 44 00 00 55 48 8b 76 
RIP  [] intel_unmap+0x1c4/0x1d0
 RSP 
---[ end trace ad0596f59dc3d9e0 ]---
BUG: unable to handle kernel paging request at ffd8
IP: [] kthread_data+0x11/0x20
PGD 120b067 PUD 120d067 PMD 0 
Oops:  [#2] PREEMPT SMP 
Modules linked in: ohci_pci ohci_hcd binfmt_misc netconsole configfs
bbswitch(O) iwlmvm iwlwifi vboxnetadp(O) vboxnetflt(O) vboxdrv(O)
nvidia(PO) vhost_net tun vhost kvm_intel kvm irqbypass dell_smm_hwmon
[last unloaded: netconsole]
CPU: 0 PID: 4857 Comm: kworker/0:3 Tainted: P  DO4.4.0-
gentoo #1
Hardware name: Dell Inc. XPS 2720/05R2TK  , BIOS A12 09/21/2015
task: 8804070a2300 ti: 8803e4658000 task.ti: 8803e4658000
RIP: 0010:[]  []
kthread_data+0x11/0x20
RSP: 0018:8803e465b878  EFLAGS: 00010002
RAX:  RBX:  RCX: 814bd980
RDX:  RSI:  RDI: 8804070a2300
RBP: 8803e465b888 R08:  R09: 880468dfe901
R10: 2800 R11: 001a R12: 
R13: 000154c0 R14: 8804070a2300 R15: 88047f2154c0
FS:  () GS:88047f20()
knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 0028 CR3: 0120a000 CR4: 

[PATCH] kfifo: fix sparse complains

2016-02-21 Thread Stefani Seibold
This patch fix complains by the sparse tool when using kfifo_put() with non
scalar types like structures (i.e. drivers/iio/industrialio-event.c).

Casting a pointer to the value and read this pointer instead of directly casting
the value will fix this.

The generated code is equal.

Signed-off-by: Stefani Seibold 
---
 include/linux/kfifo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 473b436..41eb6fd 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -401,7 +401,7 @@ __kfifo_int_must_check_helper( \
((typeof(__tmp->type))__kfifo->data) : \
(__tmp->buf) \
)[__kfifo->in & __tmp->kfifo.mask] = \
-   (typeof(*__tmp->type))__val; \
+   *(typeof(__tmp->type))&__val; \
smp_wmb(); \
__kfifo->in++; \
} \
-- 
2.7.1



Re: [x86, vdso] BUG: unable to handle kernel paging request at d34bd000

2014-03-10 Thread Stefani Seibold
Am Montag, den 10.03.2014, 10:12 -0700 schrieb Andy Lutomirski:
> On Mon, Mar 10, 2014 at 8:11 AM, Linus Torvalds
>  wrote:
> >
> > On Mar 10, 2014 8:01 AM, "H. Peter Anvin"  wrote:
> >>
> >> I have mentioned in the past wanting to move the fixmap to the low part
> >> of the kernel space, because the top isn't really fixed...
> >
> > How about the high part of the user address space, just above the stack?
> > Leave a unmapped page in between, or something. The stack is already
> > randomized, isn't it?
> 
> For the !compat_vdso case, I don't like it -- this will put the vdso
> (which is executable) at a constant offset from the stack, which will
> make it much easier to use the vdso to defeat ASLR.
> 
> For the compat_vdso case, this only works if the address is *not*
> random, unless we're going to start giving each process its very own
> relocated vdso.
> 
> >
> > That would actually be preferable in a few ways, notably not having to mark
> > page directories user accessible in the kennel space area.
> 
> Is that where the rabid pte dogs live?
> 
> We can already avoid making fixmap pages user-accessible in the
> !compat_vdso case for 32-bit tasks -- the vdso lives in a couple of
> more-or-less ordinary vmas.
> 

What is now the next step? Kick out the compat VDSO? Or should i
implement the dual VDSO. And what is now the preferred way to map the
VDSO into the user space? Using install_special_mapping() or map it
beyond the user stack?

The is easiest and fastest way to get a working result is to do the non
compat VDSO only mapping using install_special_mapping(). The dual VDSO
would take a little bit more time.

It would be great to have first a consensus about the design before i
start to implement ;-)

- Stefani

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] x86: Remove compat vdso support

2014-03-12 Thread Stefani Seibold
Am Dienstag, den 11.03.2014, 10:09 -0700 schrieb Linus Torvalds:
> On Tue, Mar 11, 2014 at 9:50 AM, Andy Lutomirski  wrote:
> > Looking forward, would it be reasonable to have an extensible set of
> > flags that live in the ELF interpreter's headers somewhere
> 
> No. Not reasonable. The whole "32-bit x86" and "looking forward"
> combination makes absolutely zero sense.
> 
> I can pretty much guarantee that even *phones* will be 64-bit if/when
> x86 ever gets there. They'll need it just for ARM emulation, I bet.
> 
> So 32-bit x86 is dead, dead, dead. There's absolutely no future to it.
> We're not adding new stuff to "future-proof" it.
> 

Quite frankly this sounds like the mad scientist in an old marvell
comic: "dead, dead, dead".

Is it possible to calm down and get a more technical discussion rather
than blaming and treats not to accepting patches?

Can we also stop this hard words like "WTF". I don't like this style and
other developers too, especially women.

32-bit is not dead. I think 98 percent of all computers running linux
are embedded devices and a lot of them are not capable for 64 bit
support. So its your opinion, but there a also developers not sharing
this.

For me i still work with old Celeron Pentium III devices. And the life
time of this device will end in 7 years.

A lot of peoples (also main kernel hackers) ask me to do this patch
because the time functions in 32 bit kernel mode are so slow compared to
a 64 bit linux. And as i can see most of the involved kernel developers
are not opposite against this patch.

The other side is that many embedded developers use hand crafted time
functions using TSC or similar to get a fast time functions, but did not
know the pitfalls (C- and P-States) to handle this in a right way. So a
reliable way is to use the kernel functions, because the kernel knows
the state of the CPU and always returns the correct time. But this will
result in a slow down of the application, which generates latency.

We use this kind of patch for a long time and it decreased the latency
of our applications notable.

The current solution is quite clean, but there was a issue with the size
of the vDSO which not fits into one page by some kernel configurations. 

There is a solution for this to #undef CONFIG_OPTIMIZE_INLINING and
CONFIG_X86_PPRO_FENCE in arch/x86/vdso/vdso32/vclock_gettime.c. 

To prevent issues which future kernel releases, we have now two ideas to
solve this:

One ist Andy's kick ouf of the compat VDSO. For this there is already a
patch there.

And the other one is (thanks to Andys archeology investigations) to
increase the size of the vDSO fixmap space which has according to Andy
no side effect. This can be done in a very clean and easy way. The code
is still there, since the fixmap area is not fix:

Lguest, XEN, OPLC and the reservetop will move the fixmap during boot,
so we can easily get additional space by fixing __FIXADDR_TOP.

I will write a patch for the later one.

- Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/3] x86, vdso32: handle 32 bit vDSO larger one page

2014-03-12 Thread Stefani Seibold
This patch enables 32 bit vDSO which are larger than a page. Currently
two pages are reserved, this should be enough for future improvements.

Signed-off-by: Stefani Seibold 
---
 arch/x86/include/asm/fixmap.h |  4 +++-
 arch/x86/vdso/vdso32-setup.c  | 29 +++--
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 094d0cc..f513f14 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -43,6 +43,8 @@ extern unsigned long __FIXADDR_TOP;
 
 #define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
 #define FIXADDR_USER_END   __fix_to_virt(FIX_VDSO - 1)
+
+#define MAX_VDSO_PAGES 2
 #else
 #define FIXADDR_TOP(VSYSCALL_END-PAGE_SIZE)
 
@@ -74,7 +76,7 @@ extern unsigned long __FIXADDR_TOP;
 enum fixed_addresses {
 #ifdef CONFIG_X86_32
FIX_HOLE,
-   FIX_VDSO,
+   FIX_VDSO = MAX_VDSO_PAGES,
VVAR_PAGE,
VSYSCALL_HPET,
 #else
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 1a9f8c3..aa785f0 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -190,7 +191,8 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr)
}
 }
 
-static struct page *vdso32_pages[VDSO_PAGES];
+static struct page *vdso32_pages[MAX_VDSO_PAGES];
+static unsigned int vdso32_size;
 
 #ifdef CONFIG_X86_64
 
@@ -256,6 +258,7 @@ static int __init gate_vma_init(void)
 static void map_compat_vdso(int map)
 {
static int vdso_mapped;
+   unsigned int i;
 
if (map == vdso_mapped)
return;
@@ -267,7 +270,8 @@ static void map_compat_vdso(int map)
 
__set_fixmap(VVAR_PAGE, __pa_symbol(&__vvar_page), PAGE_KERNEL_VVAR);
 
-   __set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT,
+   for(i = 0; i != vdso32_size; ++i)
+   __set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[i]) << 
PAGE_SHIFT,
 map ? PAGE_READONLY_EXEC : PAGE_NONE);
 
/* flush stray tlbs */
@@ -278,11 +282,10 @@ static void map_compat_vdso(int map)
 
 int __init sysenter_setup(void)
 {
-   void *vdso_page = (void *)get_zeroed_page(GFP_ATOMIC);
+   void *vdso_pages;
const void *vdso;
size_t vdso_len;
-
-   vdso32_pages[0] = virt_to_page(vdso_page);
+   unsigned int i;
 
 #ifdef CONFIG_X86_32
gate_vma_init();
@@ -299,9 +302,15 @@ int __init sysenter_setup(void)
vdso_len = _int80_end - _int80_start;
}
 
-   memcpy(vdso_page, vdso, vdso_len);
-   patch_vdso32(vdso_page, vdso_len);
-   relocate_vdso(vdso_page);
+   vdso32_size = (vdso_len + PAGE_SIZE - 1) / PAGE_SIZE;
+   vdso_pages = kmalloc(VDSO_OFFSET(vdso32_size), GFP_ATOMIC);
+
+   for(i = 0; i != vdso32_size; ++i)
+   vdso32_pages[i] = virt_to_page(vdso_pages + VDSO_OFFSET(i));
+
+   memcpy(vdso_pages, vdso, vdso_len);
+   patch_vdso32(vdso_pages, vdso_len);
+   relocate_vdso(vdso_pages);
 
return 0;
 }
@@ -334,7 +343,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, 
int uses_interp)
if (compat)
addr = VDSO_HIGH_BASE;
else {
-   addr = get_unmapped_area(NULL, 0, VDSO_OFFSET(VDSO_PAGES), 0, 
0);
+   addr = get_unmapped_area(NULL, 0, VDSO_OFFSET(VDSO_PREV_PAGES + 
vdso32_size), 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
@@ -351,7 +360,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, 
int uses_interp)
 */
ret = install_special_mapping(mm,
addr,
-   VDSO_OFFSET(VDSO_PAGES - VDSO_PREV_PAGES),
+   VDSO_OFFSET(vdso32_size),
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
vdso32_pages);
-- 
1.9.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/3] Improve 32 bit vDSO time

2014-03-12 Thread Stefani Seibold
This patch bring some improvments to the vDSO 32 bit:

- undef some kernel configs which increase the code size of the VDSO
- remove vsyscall and prevent wrong conditional VVAR_PAGE mapping
- handle VDSO larger than a page

The patch set is agains tip 7ed5ee279499a02bf35c77f0a91d657c24f6474e
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3] x86, vdso32: undef CONFIG_OPTIMIZE_INLINING and CONFIG_X86_PPRO_FENCE

2014-03-12 Thread Stefani Seibold
In case of a 32 bit vDSO build the CONFIG_OPTIMIZE_INLINING and
CONFIG_X86_PPRO_FENCE options will increase the size of the generated
vDSO not to fit in a page.

A #undef of this config options will solve this.

Signed-off-by: Stefani Seibold 
---
 arch/x86/vdso/vdso32/vclock_gettime.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c 
b/arch/x86/vdso/vdso32/vclock_gettime.c
index ab092f7..175cc72 100644
--- a/arch/x86/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/vdso/vdso32/vclock_gettime.c
@@ -1,5 +1,11 @@
 #define BUILD_VDSO32
 
+#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
+#undef CONFIG_OPTIMIZE_INLINING
+#endif
+
+#undef CONFIG_X86_PPRO_FENCE
+
 #ifdef CONFIG_X86_64
 
 /*
-- 
1.9.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/3] x86, vdso32: remove vsyscall_32.c

2014-03-12 Thread Stefani Seibold
vsyscall_32.c has not much code in it and the only function map_vsyscall()
was invoked prior to vdso_setup() which results in wrong conditional
mapping of VVAR_PAGE.

Moving the whole fixmap initialization into map_compat_vdso() will
solve this calling dependences.

Signed-off-by: Stefani Seibold 
---
 arch/x86/kernel/Makefile  |  1 -
 arch/x86/kernel/hpet.c|  6 --
 arch/x86/kernel/setup.c   |  2 ++
 arch/x86/kernel/vsyscall_32.c | 24 
 arch/x86/vdso/vdso32-setup.c  |  5 +
 5 files changed, 7 insertions(+), 31 deletions(-)
 delete mode 100644 arch/x86/kernel/vsyscall_32.c

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3282eda..f4d9600 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -27,7 +27,6 @@ obj-y += probe_roms.o
 obj-$(CONFIG_X86_32)   += i386_ksyms_32.o
 obj-$(CONFIG_X86_64)   += sys_x86_64.o x8664_ksyms_64.o
 obj-y  += syscall_$(BITS).o vsyscall_gtod.o
-obj-$(CONFIG_X86_32)   += vsyscall_32.o
 obj-$(CONFIG_X86_64)   += vsyscall_64.o
 obj-$(CONFIG_X86_64)   += vsyscall_emu_64.o
 obj-$(CONFIG_SYSFS)+= ksysfs.o
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b99544b..1129f79 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -12,7 +12,6 @@
 #include 
 #include 
 
-#include 
 #include 
 #include 
 #include 
@@ -75,11 +74,6 @@ static inline void hpet_writel(unsigned int d, unsigned int 
a)
 static inline void hpet_set_mapping(void)
 {
hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
-#ifdef CONFIG_X86_32
-   if (vdso_enabled != VDSO_COMPAT)
-   return;
-#endif
-   __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VVAR_NOCACHE);
 }
 
 static inline void hpet_clear_mapping(void)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 56ff330..ebf1c00 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1182,7 +1182,9 @@ void __init setup_arch(char **cmdline_p)
 
tboot_probe();
 
+#ifndef CONFIG_X86_32
map_vsyscall();
+#endif
 
generic_apic_probe();
 
diff --git a/arch/x86/kernel/vsyscall_32.c b/arch/x86/kernel/vsyscall_32.c
deleted file mode 100644
index 0cbf94b..000
--- a/arch/x86/kernel/vsyscall_32.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- *  Copyright (C) 2001 Andrea Arcangeli  SuSE
- *  Copyright 2003 Andi Kleen, SuSE Labs.
- *
- *  Modified for x86 32 bit arch by Stefani Seibold 
- *
- *  Thanks to h...@transmeta.com for some useful hint.
- *  Special thanks to Ingo Molnar for his early experience with
- *  a different vsyscall implementation for Linux/IA32 and for the name.
- *
- */
-
-#include 
-#include 
-#include 
-#include 
-
-void __init map_vsyscall(void)
-{
-   if (vdso_enabled != VDSO_COMPAT)
-   return;
-
-   __set_fixmap(VVAR_PAGE, __pa_symbol(&__vvar_page), PAGE_KERNEL_VVAR);
-}
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index b37aa1d..1a9f8c3 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -262,6 +262,11 @@ static void map_compat_vdso(int map)
 
vdso_mapped = map;
 
+   if (hpet_address)
+   __set_fixmap(VSYSCALL_HPET, hpet_address, 
PAGE_KERNEL_VVAR_NOCACHE);
+
+   __set_fixmap(VVAR_PAGE, __pa_symbol(&__vvar_page), PAGE_KERNEL_VVAR);
+
__set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT,
 map ? PAGE_READONLY_EXEC : PAGE_NONE);
 
-- 
1.9.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/3] Improve 32 bit vDSO time

2014-03-13 Thread Stefani Seibold
Am Mittwoch, den 12.03.2014, 20:48 -0700 schrieb H. Peter Anvin:
> On 03/12/2014 04:11 PM, stef...@seibold.net wrote:
> > 
> > I will do this when your patch is pulled into tip. For now we have the
> > choice, but i preferer our solution removing the compat vdso.
> > 
> 
> Sorry, that didn't parse from me.

I thought it is a good idea to wait until the "remove compat vdso
support" is settled and pulled into tip.

If there is no objections against this patch i am happy to do the
job ;-)

BTW: Thanks to Andy for doing this job to git rid off this ugly compat
vDSO layer.

> Also, if you state a preference, could you please motivate it?

For the next three days i am very busy with a important project, so i
will rebase the vdso 32 bit time patch on Monday or Tuesday. 

- Stefani

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/3] Improve 32 bit vDSO time

2014-03-14 Thread Stefani Seibold
Am Donnerstag, den 13.03.2014, 15:08 -0700 schrieb H. Peter Anvin:
> On 03/13/2014 01:11 AM, Stefani Seibold wrote:
> > Am Mittwoch, den 12.03.2014, 20:48 -0700 schrieb H. Peter Anvin:
> >> On 03/12/2014 04:11 PM, stef...@seibold.net wrote:
> >>>
> >>> I will do this when your patch is pulled into tip. For now we have the
> >>> choice, but i preferer our solution removing the compat vdso.
> >>>
> >>
> >> Sorry, that didn't parse from me.
> > 
> > I thought it is a good idea to wait until the "remove compat vdso
> > support" is settled and pulled into tip.
> > 
> > If there is no objections against this patch i am happy to do the
> > job ;-)
> > 
> > BTW: Thanks to Andy for doing this job to git rid off this ugly compat
> > vDSO layer.
> > 
> >> Also, if you state a preference, could you please motivate it?
> > 
> > For the next three days i am very busy with a important project, so i
> > will rebase the vdso 32 bit time patch on Monday or Tuesday. 
> > 
> 
> So when I get the updated patch from Andy I will pull it into tip,
> resetting the x86/vdso branch.  I'll then expect an upgraded and
> simplified patchset from you to put on top.
> 
> Sounds like a plan.
> 

I love it when a plan comes together.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


BUG: 32 Bit Kernel kexec hangs on P2020

2013-07-10 Thread Stefani Seibold
Hi,

i have tried to kexec a 32 bit kernel on a Freescale P2020 dual core CPU
(e500v2, revison 5.1 - pvr 8021 1051), but Kexec will hang after the
"Bye!".

The host and the kexec kernel are the same, based on the current kernel
version 3.10.

I have tried it with kexec-tools 2.0.4.git released 30 June 2013.

Invoking kexec with

kexec --command-line "1 maxcpus=1 noirqdistrib reset_devices $(cat 
/proc/cmdline)" -t elf-ppc --dtb=rs2020.dtb \
--reuse-node="/cpus/PowerPC,P2020@0/timebase-frequency" \
--reuse-node="/cpus/PowerPC,P2020@0/bus-frequency" \
--reuse-node="/cpus/PowerPC,P2020@0/clock-frequency" \
--reuse-node="/cpus/PowerPC,P2020@0/next-level-cache" \
--reuse-node="/cpus/PowerPC,P2020@1/timebase-frequency" \
--reuse-node="/cpus/PowerPC,P2020@1/bus-frequency" \
--reuse-node="/cpus/PowerPC,P2020@1/clock-frequency" \
--reuse-node="/cpus/PowerPC,P2020@1/next-level-cache" \
--reuse-node="/cpus/PowerPC,P2020@1/cpu-release-addr" \
--reuse-node="/cpus/PowerPC,P2020@1/enable-method" \
--reuse-node="/soc@ffe0/bus-frequency" \
--reuse-node="/soc@ffe0/serial@4500/clock-frequency" \
--reuse-node="/soc@ffe0/ethernet@24000/local-mac-address" \
-d -l -x vmlinux
kexec -e

This will be the result output of the run:

kernel: 0x48032008 kernel_size: 54568c
-8000 : 0
get base memory ranges:1
sym:  .data info: 03 other: 00 shndx: 4 value: 0 size: 0
sym: .data value: 589da8 addr: 584012
sym:  .data info: 03 other: 00 shndx: 4 value: 0 size: 0
sym: .data value: 589da8 addr: 58401a
sym: sha256_starts info: 12 other: 00 shndx: 1 value: 99c size: e0
sym: sha256_starts value: 58499c addr: 584024
sym: sha256_update info: 12 other: 00 shndx: 1 value: 565c size: 1b0
sym: sha256_update value: 58965c addr: 584038
sym: sha256_finish info: 12 other: 00 shndx: 1 value: 580c size: 528
sym: sha256_finish value: 58980c addr: 584050
sym:  .data info: 03 other: 00 shndx: 4 value: 0 size: 0
sym: .data value: 589da8 addr: 584056
sym:  .data info: 03 other: 00 shndx: 4 value: 0 size: 0
sym: .data value: 589da8 addr: 58405a
sym: memcmp info: 12 other: 00 shndx: 1 value: 664 size: 40
sym: memcmp value: 584664 addr: 584068
sym: .rodata.str1.4 info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata.str1.4 value: 589d34 addr: 58407a
sym: .rodata.str1.4 info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata.str1.4 value: 589d64 addr: 58407e
sym: .rodata.str1.4 info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata.str1.4 value: 589d34 addr: 584082
sym: printf info: 12 other: 00 shndx: 1 value: 55c size: 68
sym: printf value: 58455c addr: 58408c
sym: .rodata.str1.4 info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata.str1.4 value: 589d54 addr: 584092
sym: .rodata.str1.4 info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata.str1.4 value: 589d54 addr: 584096
sym: printf info: 12 other: 00 shndx: 1 value: 55c size: 68
sym: printf value: 58455c addr: 58409c
sym: .rodata.str1.4 info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata.str1.4 value: 589d64 addr: 5840a6
sym: printf info: 12 other: 00 shndx: 1 value: 55c size: 68
sym: printf value: 58455c addr: 5840b4
sym: .rodata.str1.4 info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata.str1.4 value: 589d6c addr: 5840c2
sym:  .data info: 03 other: 00 shndx: 4 value: 0 size: 0
sym: .data value: 589da8 addr: 5840c6
sym: .rodata.str1.4 info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata.str1.4 value: 589d6c addr: 5840ca
sym:  .data info: 03 other: 00 shndx: 4 value: 0 size: 0
sym: .data value: 589da8 addr: 5840ce
sym: printf info: 12 other: 00 shndx: 1 value: 55c size: 68
sym: printf value: 58455c addr: 5840d4
sym: .rodata.str1.4 info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata.str1.4 value: 589d70 addr: 5840da
sym: .rodata.str1.4 info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata.str1.4 value: 589d70 addr: 5840de
sym: printf info: 12 other: 00 shndx: 1 value: 55c size: 68
sym: printf value: 58455c addr: 5840e8
sym: printf info: 12 other: 00 shndx: 1 value: 55c size: 68
sym: printf value: 58455c addr: 5840fc
sym: .rodata.str1.4 info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata.str1.4 value: 589d6c addr: 58410a
sym: .rodata.str1.4 info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata.str1.4 value: 589d6c addr: 58410e
sym: printf info: 12 other: 00 shndx: 1 value: 55c size: 68
sym: printf value: 58455c addr: 584114
sym: _rest32gpr_29_x info: 12 other: 00 shndx: 1 value: 8fc size: 0
sym: _rest32gpr_29_x value: 5848fc addr: 584124
sym: .rodata.str1.4 info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata.str1.4 value: 589d80 addr: 58412e
sym: .rodata.str1.4 info: 03 other: 00 shndx: 3 value: 0 size: 0
sym: .rodata.str1.4 value: 589d80 addr: 584136
sym: printf info: 12 other: 00 shndx: 1 value: 55c size: 68
sym: printf value: 58455c addr: 584140
sym: setup_arch info: 12 other: 00 shndx: 1 value: 98c size: 4
sym: setup_arch value: 58498c addr: 584144
sym: 

Re: BUG: 32 Bit Kernel kexec hangs on P2020

2013-07-10 Thread Stefani Seibold
Am Mittwoch, den 10.07.2013, 16:48 +0800 schrieb tiejun.chen:
> On 07/10/2013 04:39 PM, Stefani Seibold wrote:
> > Hi,
> >
> > i have tried to kexec a 32 bit kernel on a Freescale P2020 dual core CPU
> > (e500v2, revison 5.1 - pvr 8021 1051), but Kexec will hang after the
> > "Bye!".
> >
> > The host and the kexec kernel are the same, based on the current kernel
> > version 3.10.
> >
> > I have tried it with kexec-tools 2.0.4.git released 30 June 2013.
> >
> > Invoking kexec with
> >
> > kexec --command-line "1 maxcpus=1 noirqdistrib reset_devices $(cat 
> > /proc/cmdline)" -t elf-ppc --dtb=rs2020.dtb \
> > --reuse-node="/cpus/PowerPC,P2020@0/timebase-frequency" \
> > --reuse-node="/cpus/PowerPC,P2020@0/bus-frequency" \
> > --reuse-node="/cpus/PowerPC,P2020@0/clock-frequency" \
> > --reuse-node="/cpus/PowerPC,P2020@0/next-level-cache" \
> > --reuse-node="/cpus/PowerPC,P2020@1/timebase-frequency" \
> > --reuse-node="/cpus/PowerPC,P2020@1/bus-frequency" \
> > --reuse-node="/cpus/PowerPC,P2020@1/clock-frequency" \
> > --reuse-node="/cpus/PowerPC,P2020@1/next-level-cache" \
> > --reuse-node="/cpus/PowerPC,P2020@1/cpu-release-addr" \
> > --reuse-node="/cpus/PowerPC,P2020@1/enable-method" \
> > --reuse-node="/soc@ffe0/bus-frequency" \
> > --reuse-node="/soc@ffe0/serial@4500/clock-frequency" \
> > --reuse-node="/soc@ffe0/ethernet@24000/local-mac-address" \
> > -d -l -x vmlinux
> > kexec -e
> 
> Could you try again with one simple command like,
> 
> kexec -l vmlinux --append="`cat /proc/cmdline`";kexec -e
> 

Great, this works.

I did more test and the issue for the fail is the passing of the device
tree. But the device tree is exact the same as the previous used one.

Is passing a new device tree not allowed?

Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: X86: Impossible select Enhanced Real Time Clock Support (legacy PC RTC driver)

2014-05-02 Thread Stefani Seibold
On Mon, 2014-04-28 at 20:20 +0200, Alessandro Zummo wrote:
> On Mon, 28 Apr 2014 11:16:37 -0700
> John Stultz  wrote:
> 
> > >> This breaks the API since there is no more misc device /dev/rtc
> > >> available without a udev rule or a link to /dev/rtc0.  
> > 
> > So yea.. I feel like that /dev/rtcN renaming (and API) break (which
> > was a huge and annoying pain) was back in the 2.6.18-ish era?
> 
>  It's very old and linking/renaming is around since then.
> 
> > But it sounds like the driver/char/rtc bit is dead code, and needs a
> > cleanup? Or is there some use of that code that you need that the
> > generic RTC layer doesn't have?
> 

The driver/char/rtc is not working code and should be a wrapper to the
first rtc device.

>  as far as I know, no recent distribution is using the old code
>  anymore and the new (well, not so new anymore) framework provides the same 
> functionality.
>  

Embedded distribution use it and depending on the /dev/rtc device. So
what is missing is the /dev/rtc misc device 10,135.

- Stefani

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


EHCI hotplug kernel crash in kernel 3.14 and 3.13

2014-04-13 Thread Stefani Seibold
A hot plug of an USB 2.0 EHCI controller cardbus card will result in a
kernel crash. This is the kernel log of a vanilla 3.14 x86_64 kernel. I
will attach my kernel config.

[   70.418181] pcmcia_socket pcmcia_socket0: pccard: CardBus card inserted into 
slot 0
[   70.418209] pci :04:00.0: [1033:0035] type 00 class 0x0c0310
[   70.418239] pci :04:00.0: reg 0x10: [mem 0x-0x0fff]
[   70.418359] pci :04:00.0: supports D1 D2
[   70.418362] pci :04:00.0: PME# supported from D0 D1 D2 D3hot
[   70.418500] pci :04:00.1: [1033:0035] type 00 class 0x0c0310
[   70.418529] pci :04:00.1: reg 0x10: [mem 0x-0x0fff]
[   70.418653] pci :04:00.1: supports D1 D2
[   70.418655] pci :04:00.1: PME# supported from D0 D1 D2 D3hot
[   70.418756] pci :04:00.2: [1033:00e0] type 00 class 0x0c0320
[   70.418784] pci :04:00.2: reg 0x10: [mem 0x-0x00ff]
[   70.418912] pci :04:00.2: supports D1 D2
[   70.418915] pci :04:00.2: PME# supported from D0 D1 D2 D3hot
[   70.419040] pci :04:00.0: BAR 0: assigned [mem 0xf140-0xf1400fff]
[   70.419051] pci :04:00.1: BAR 0: assigned [mem 0xf1401000-0xf1401fff]
[   70.419059] pci :04:00.2: BAR 0: assigned [mem 0xf1402000-0xf14020ff]
[   70.419112] pci :04:00.0: enabling device ( -> 0002)
[   70.419350] pci :04:00.1: enabling device ( -> 0002)
[   70.419508] pci :04:00.2: enabling device ( -> 0002)
[   70.419755] ehci-pci :04:00.2: EHCI Host Controller
[   70.419874] ehci-pci :04:00.2: new USB bus registered, assigned bus 
number 9
[   70.419980] ehci-pci :04:00.2: irq 19, io mem 0xf1402000
[   70.422796] ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver
[   70.424894] ohci-pci: OHCI PCI platform driver
[   70.425072] ehci-pci :04:00.2: USB 2.0 started, EHCI 0.95
[   70.425132] usb usb9: New USB device found, idVendor=1d6b, idProduct=0002
[   70.425135] usb usb9: New USB device strings: Mfr=3, Product=2, 
SerialNumber=1
[   70.425138] usb usb9: Product: EHCI Host Controller
[   70.425141] usb usb9: Manufacturer: Linux 3.14.0 ehci_hcd
[   70.425144] usb usb9: SerialNumber: :04:00.2
[   70.425332] hub 9-0:1.0: USB hub found
[   70.425344] hub 9-0:1.0: 5 ports detected
[   70.425556] BUG: unable to handle kernel NULL pointer dereference at 
0040
[   70.425560] IP: [] usb_set_configuration+0x1c/0x7d0
[   70.425568] PGD 30d571067 PUD 30d570067 PMD 0 
[   70.425572] Oops:  [#1] PREEMPT SMP 
[   70.425576] Modules linked in: ohci_pci(+) ohci_hcd rfcomm btusb ppdev 
intel_agp intel_gtt 8250 video parport_pc parport serial_core nvidia(PO) drm 
agpgart
[   70.425604] CPU: 3 PID: 83 Comm: pccardd Tainted: P   O 3.14.0 #1
[   70.425607] Hardware name: Dell Inc. Precision M6400 
/0G841G, BIOS A13 06/05/2013
[   70.425609] task: 88030eefe150 ti: 88030ef86000 task.ti: 
88030ef86000
[   70.425611] RIP: 0010:[]  [] 
usb_set_configuration+0x1c/0x7d0
[   70.425615] RSP: 0018:88030ef87ba0  EFLAGS: 00010286
[   70.425617] RAX: 88030c7ce800 RBX:  RCX: 88030ea57000
[   70.425619] RDX: 88030dcb6800 RSI: 0001 RDI: 
[   70.425620] RBP: 88030ef87c38 R08: 88030dfa6910 R09: 
[   70.425622] R10: 4e2e R11:  R12: 
[   70.425624] R13:  R14: 814a0b80 R15: 88030c7ce800
[   70.425626] FS:  () GS:88031fd8() 
knlGS:
[   70.425628] CS:  0010 DS:  ES:  CR0: 8005003b
[   70.425630] CR2: 0040 CR3: 00030c714000 CR4: 000407e0
[   70.425631] Stack:
[   70.425632]  81340110 814a0b80 88030ef87bc8 
817b3858
[   70.425636]  88030dcb6898 88030ef87c00 813d6daf 
88030f429000
[   70.425640]  00010ef87bf0 813d4737 88030ef87c00 
8133f19a
[   70.425644] Call Trace:
[   70.425650]  [] ? pci_do_find_bus+0x70/0x70
[   70.425653]  [] ? hcd_pci_suspend_noirq+0xa0/0xa0
[   70.425659]  [] ? klist_iter_exit+0x18/0x30
[   70.425663]  [] ? bus_find_device+0x7f/0xb0
[   70.425666]  [] ? put_device+0x17/0x20
[   70.425669]  [] ? pci_dev_put+0x1a/0x20
[   70.425672]  [] ? pci_get_dev_by_id+0x61/0x90
[   70.425675]  [] ? hcd_pci_suspend_noirq+0xa0/0xa0
[   70.425679]  [] ehci_post_add+0x4b/0x60
[   70.425682]  [] for_each_companion+0x80/0xa0
[   70.425685]  [] usb_hcd_pci_probe+0x474/0x4e0
[   70.425688]  [] pci_device_probe+0x84/0xe0
[   70.425692]  [] driver_probe_device+0x76/0x240
[   70.425695]  [] ? driver_probe_device+0x240/0x240
[   70.425698]  [] __device_attach+0x3b/0x40
[   70.425701]  [] bus_for_each_drv+0x63/0xa0
[   70.425704]  [] device_attach+0x88/0xa0
[   70.425710]  [] pci_bus_add_device+0x3d/0x60
[   70.425714]  [] pci_bus_add_devices+0x39/0xa0
[   70.425717]  [] cb_alloc+0xd5/0xf0
[   70.425722]  [] socket_insert+0xf0/0x110
[   70.425725]  [] 

X86: Impossible select Enhanced Real Time Clock Support (legacy PC RTC driver)

2014-04-13 Thread Stefani Seibold
Since some kernel version it is impossible to select the Enhanced Real
Time Clock Support (legacy PC RTC driver) because RTC_LIB is set by
default in arch/x86/Kconfig, but the rule for selecting CONFIG_RTC is
RTC_LIB=n. So the code of driver/char/rtc is still useless.

This breaks the API since there is no more misc device /dev/rtc
available without a udev rule or a link to /dev/rtc0. 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Missing USB XHCI and EHCI reset for kexec

2014-04-13 Thread Stefani Seibold
When executing a kexec kernel on a PowerPC board the new started kernel
will not find already enumerated USB devices due a missing reset on the
USB bus.

As a work around a

echo 1 >/sys/bus/pci/drivers/[ex]hci-pci/BUS-ADDRESS-OF-THE-HCD/reset

will solve this. But this is far from beauty.

My latest kernel without this issue was for EHCI kernel 2.6.39 and for
XHCI kernel 3.4, but i have no idea when exactly this behavior was
introduced.

For X86 all is fine.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


X86: kexec issues with i915 in 3.14

2014-04-13 Thread Stefani Seibold
Rebooting my kernel vanilla kernel 3.14 will fail with tons of kernel
log messages:

[0.262754] IOMMU: Setting identity map for device :00:1a.0 [0x7c45f000 
- 0x7c46bfff]
[0.262780] IOMMU: Setting identity map for device :00:14.0 [0x7c45f000 
- 0x7c46bfff]
[0.262798] IOMMU: Prepare 0-16MiB unity mapping for LPC
[0.262807] IOMMU: Setting identity map for device :00:1f.0 [0x0 - 
0xff]
[0.262948] PCI-DMA: Intel(R) Virtualization Technology for Directed I/O
[0.262948] dmar: DRHD: handling fault status reg 3
[0.262951] dmar: DMAR:[DMA Write] Request device [00:02.0] fault addr 
e000 
DMAR:[fault reason 05] PTE Write access is not set
[0.262955] dmar: DRHD: handling fault status reg 3
[0.262959] dmar: DMAR:[DMA Write] Request device [00:02.0] fault addr 
fff3c000 
DMAR:[fault reason 05] PTE Write access is not set
[0.262965] dmar: DRHD: handling fault status reg 3
[0.262968] dmar: DMAR:[DMA Write] Request device [00:02.0] fault addr 
ffe4a000 
DMAR:[fault reason 05] PTE Write access is not set
[0.262974] dmar: DRHD: handling fault status reg 3
[0.262976] dmar: DMAR:[DMA Write] Request device [00:02.0] fault addr 
fff6f000 
DMAR:[fault reason 05] PTE Write access is not set
[0.262983] dmar: DRHD: handling fault status reg 3
[0.262985] dmar: DMAR:[DMA Write] Request device [00:02.0] fault addr 
ffe8c000 
DMAR:[fault reason 05] PTE Write access is not set
[0.262991] dmar: DRHD: handling fault status reg 3
[0.262994] dmar: DMAR:[DMA Write] Request device [00:02.0] fault addr 
fffb3000 
DMAR:[fault reason 05] PTE Write access is not set
[0.263000] dmar: DRHD: handling fault status reg 3
[0.263002] dmar: DMAR:[DMA Write] Request device [00:02.0] fault addr 
ffecf000 
DMAR:[fault reason 05] PTE Write access is not set
[0.263009] dmar: DRHD: handling fault status reg 3
[0.263011] dmar: DMAR:[DMA Write] Request device [00:02.0] fault addr 
d000

this message repeats more the 21000 times. After this the kernel
messages continues with

[0.683267] fbcon: inteldrmfb (fb0) is primary device
[0.864123] Console: switching to colour frame buffer device 320x90
[0.880630] i915 :00:02.0: fb0: inteldrmfb frame buffer device
[0.880632] i915 :00:02.0: registered panic notifier
[0.881077] ACPI Exception: AE_NOT_FOUND, Evaluating _DOD 
(20131218/video-1245)
[0.881081] ACPI: Video Device [PEGN] (multi-head: no  rom: yes  post: no)
[0.881134] input: Video Bus as 
/devices/LNXSYSTM:00/device:00/PNP0A08:00/device:10/LNXVIDEO:00/input/input2
[0.888055] ACPI: Video Device [GFX0] (multi-head: yes  rom: no  post: no)
[0.888266] input: Video Bus as 
/devices/LNXSYSTM:00/device:00/PNP0A08:00/LNXVIDEO:01/input/input3
[0.888289] [drm] Initialized i915 1.6.0 20080730 for :00:02.0 on minor 0
[0.888571] mei_me :00:16.0: irq 57 for MSI/MSI-X
[0.889545] rtsx_pci :3e:00.0: irq 58 for MSI/MSI-X
[0.889559] rtsx_pci :3e:00.0: rtsx_pci_acquire_irq: pcr->msi_en = 1, 
pci->irq = 58
[0.890098] ACPI Warning: SystemIO range 
0x1828-0x182f conflicts with OpRegion 
0x1800-0x187f (\PMIO) (20131218/utaddress-258)
[0.890104] ACPI: If an ACPI driver is available for this device, you should 
use it instead of the native driver
[0.890107] ACPI Warning: SystemIO range 
0x1c30-0x1c3f conflicts with OpRegion 
0x1c00-0x1c3f (\GPRL) (20131218/utaddress-258)
[0.890111] ACPI Warning: SystemIO range 
0x1c30-0x1c3f conflicts with OpRegion 
0x1c00-0x1fff (\GPR_) (20131218/utaddress-258)
[0.890114] ACPI: If an ACPI driver is available for this device, you should 
use it instead of the native driver
[0.890115] ACPI Warning: SystemIO range 
0x1c00-0x1c2f conflicts with OpRegion 
0x1c00-0x1c3f (\GPRL) (20131218/utaddress-258)
[0.890118] ACPI Warning: SystemIO range 
0x1c00-0x1c2f conflicts with OpRegion 
0x1c00-0x1fff (\GPR_) (20131218/utaddress-258)
[0.890122] ACPI: If an ACPI driver is available for this device, you should 
use it instead of the native driver
[0.890123] lpc_ich: Resource conflict(s) found affecting gpio_ich
[0.890215] ahci :00:1f.2: version 3.0

lspci give me for the device 00:02.0:

VGA compatible controller: Intel Corporation Xeon E3-1200 v3/4th Gen
Core Processor Integrated Graphics Controller (rev 06)

After this the system seams in normal condition, X is starting and i can
log on and use the machine. Any idea? 

But mostly the machine will look up and i see only garbage on the
screen.

I will attach my kernel config.



kernel.config.gz
Description: application/gzip


Re: Missing USB XHCI and EHCI reset for kexec

2014-04-14 Thread Stefani Seibold
Am Montag, den 14.04.2014, 12:27 -0400 schrieb Alan Stern:
> On Mon, 14 Apr 2014 stef...@seibold.net wrote:
> 
> > Zitat von Alan Stern :
> > 
> > 
> > >> <6>[  167.936921] usb 2-2.1: new full-speed USB device number 3  
> > >> using ohci-pci
> > >> <6>[  168.067890] usb 2-2.1: New USB device found, idVendor=076b,
> > >> idProduct=a021
> > >> <6>[  168.074871] usb 2-2.1: New USB device strings: Mfr=1, Product=2,
> > >> SerialNumber=0
> > >> <6>[  168.082226] usb 2-2.1: Product: Smart Card Reader
> > >> <6>[  168.086963] usb 2-2.1: Manufacturer: USB
> > >> <6>[  168.172893] usb 2-2.2: new low-speed USB device number 4  
> > >> using ohci-pci
> > >> <6>[  168.300839] usb 2-2.2: New USB device found, idVendor=0aad,
> > >> idProduct=0024
> > >> <6>[  168.307823] usb 2-2.2: New USB device strings: Mfr=1, Product=2,
> > >> SerialNumber=0
> > >> <6>[  168.315180] usb 2-2.2: Product: FrontPanel USB Keyboard
> > >> <6>[  168.320436] usb 2-2.2: Manufacturer: Rohde
> > >> <6>[  168.337895] input: Rohde FrontPanel USB Keyboard as
> > >> /devices/pci:00/:00:17.0/usb2/2-2/2-2.2/2-2.2:1.0/input/input0
> > >> <6>[  168.360988] input: Rohde FrontPanel USB Keyboard as
> > >> /devices/pci:00/:00:17.0/usb2/2-2/2-2.2/2-2.2:1.1/input/input1
> > >
> > > Since some devices work and some don't, maybe part of the problem lies
> > > in the particular devices.
> > >
> > 
> > The problem lies on the "Bus 001 Device 002: ID 0424:2514 Standard  
> > Microsystems Corp. USB 2.0 Hub", which hangs for arround 162 seconds  
> > after a kexec.
> > 
> > The "Bus 002 Device 003: ID 076b:a021 OmniKey AG CCID Smart Card  
> > Reader" and "Bus 002 Device 004: ID 0aad:0024 Rohde & Schwarz GmbH &  
> > Co. KG" are attached to this Hub.
> 
> Actually, it looks like they are plugged into the Texas Instruments
> hub, not the Standard Microsystems hub (because they are on bus 2, not
> bus 1).  Did you rearrange the USB cables?
> 

You are right, sorry for the confusion. I can't rearrange the cables
because the HUB is on board.

> > An other PowerPC device which is nearly eactly the same HW but without  
> > this USB HUB works perfectly.
> 
> Maybe you should replace that hub with a different brand.
> 

Thats not possible, because the Hub is soldered on the board. And it is
also not a HW issue, since the Hub works perfectly which all previous
kernels including 3.4.

> > >> This is the output of lsusb:
> > >>
> > >> Bus 001 Device 002: ID 0424:2514 Standard Microsystems Corp. USB 2.0 Hub
> > >> Bus 001 Device 004: ID 0928:0007 Oxford Semiconductor, Ltd
> > >> Bus 002 Device 002: ID 0451:2036 Texas Instruments, Inc. TUSB2036 Hub
> > >> Bus 001 Device 001: ID 1d6b:0002 Linux Foundation 2.0 root hub
> > >> Bus 002 Device 001: ID 1d6b:0001 Linux Foundation 1.1 root hub
> > >> Bus 003 Device 001: ID 1d6b:0001 Linux Foundation 1.1 root hub
> > >> Bus 002 Device 003: ID 076b:a021 OmniKey AG CCID Smart Card Reader
> > >> Bus 002 Device 004: ID 0aad:0024 Rohde & Schwarz GmbH & Co. KG
> 
> Here, the only device that might be plugged into the Standard
> Microsystems hub is the Oxford Semiconductor thing (whatever it is).
> 
> > > What about if you just do:
> > >
> > >   rmmod ehci-pci
> > >   modprobe ehci-pci
> > >
> > 
> > The kernel is monolitic because the USB HW is needed in a early boot  
> > stage. The problem also occurs with ehci-fsl used in by an other  
> > PowerPC device, which is a part of the SoC and is not attached to the  
> > PCI bus.
> > 
> > One thing is that the "echo 1  
> >  >/sys/bus/pci/drivers/ehci-pci/\:00\:17.2/reset" workaround will  
> > no longer work for kernel 3.14.
> 
> Instead, you could try
> 
>   echo :00:17.2 >/sys/bus/pci/drivers/ehci-pci/unbind
>   echo :00:17.2 >/sys/bus/pci/drivers/ehci-pci/bind
> 

I am now at home. I will do this tomorrow. Thanks so much for your
support.

- Stefani



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: EHCI hotplug kernel crash in kernel 3.14 and 3.13

2014-04-14 Thread Stefani Seibold
Hi Alan,

the patch fix the issue. I tested it on a notebook with a cardbus
controller and on desktop machine with a thunderbolt interface. Both are
working.

Thunderbolt will sometimes crash but i think this is a issue with
dynamic added pci bridges, which cause a lot of trouble and headache  to
me.

Thanks,

Greetings,
Stefani

Am Montag, den 14.04.2014, 11:11 -0400 schrieb Alan Stern:
> On Sun, 13 Apr 2014, Stefani Seibold wrote:
> 
> > A hot plug of an USB 2.0 EHCI controller cardbus card will result in a
> > kernel crash. This is the kernel log of a vanilla 3.14 x86_64 kernel. I
> > will attach my kernel config.
> 
> > [   70.419755] ehci-pci :04:00.2: EHCI Host Controller
> > [   70.419874] ehci-pci :04:00.2: new USB bus registered, assigned bus 
> > number 9
> > [   70.419980] ehci-pci :04:00.2: irq 19, io mem 0xf1402000
> > [   70.422796] ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver
> > [   70.424894] ohci-pci: OHCI PCI platform driver
> > [   70.425072] ehci-pci :04:00.2: USB 2.0 started, EHCI 0.95
> > [   70.425132] usb usb9: New USB device found, idVendor=1d6b, idProduct=0002
> > [   70.425135] usb usb9: New USB device strings: Mfr=3, Product=2, 
> > SerialNumber=1
> > [   70.425138] usb usb9: Product: EHCI Host Controller
> > [   70.425141] usb usb9: Manufacturer: Linux 3.14.0 ehci_hcd
> > [   70.425144] usb usb9: SerialNumber: :04:00.2
> > [   70.425332] hub 9-0:1.0: USB hub found
> > [   70.425344] hub 9-0:1.0: 5 ports detected
> > [   70.425556] BUG: unable to handle kernel NULL pointer dereference at 
> > 0040
> > [   70.425560] IP: [] usb_set_configuration+0x1c/0x7d0
> > [   70.425568] PGD 30d571067 PUD 30d570067 PMD 0 
> > [   70.425572] Oops:  [#1] PREEMPT SMP 
> > [   70.425576] Modules linked in: ohci_pci(+) ohci_hcd rfcomm btusb ppdev 
> > intel_agp intel_gtt 8250 video parport_pc parport serial_core nvidia(PO) 
> > drm agpgart
> > [   70.425604] CPU: 3 PID: 83 Comm: pccardd Tainted: P   O 3.14.0 #1
> > [   70.425607] Hardware name: Dell Inc. Precision M6400 
> > /0G841G, BIOS A13 06/05/2013
> > [   70.425609] task: 88030eefe150 ti: 88030ef86000 task.ti: 
> > 88030ef86000
> > [   70.425611] RIP: 0010:[]  [] 
> > usb_set_configuration+0x1c/0x7d0
> > [   70.425615] RSP: 0018:88030ef87ba0  EFLAGS: 00010286
> > [   70.425617] RAX: 88030c7ce800 RBX:  RCX: 
> > 88030ea57000
> > [   70.425619] RDX: 88030dcb6800 RSI: 0001 RDI: 
> > 
> > [   70.425620] RBP: 88030ef87c38 R08: 88030dfa6910 R09: 
> > 
> > [   70.425622] R10: 4e2e R11:  R12: 
> > 
> > [   70.425624] R13:  R14: 814a0b80 R15: 
> > 88030c7ce800
> > [   70.425626] FS:  () GS:88031fd8() 
> > knlGS:
> > [   70.425628] CS:  0010 DS:  ES:  CR0: 8005003b
> > [   70.425630] CR2: 0040 CR3: 00030c714000 CR4: 
> > 000407e0
> > [   70.425631] Stack:
> > [   70.425632]  81340110 814a0b80 88030ef87bc8 
> > 817b3858
> > [   70.425636]  88030dcb6898 88030ef87c00 813d6daf 
> > 88030f429000
> > [   70.425640]  00010ef87bf0 813d4737 88030ef87c00 
> > 8133f19a
> > [   70.425644] Call Trace:
> > [   70.425650]  [] ? pci_do_find_bus+0x70/0x70
> > [   70.425653]  [] ? hcd_pci_suspend_noirq+0xa0/0xa0
> > [   70.425659]  [] ? klist_iter_exit+0x18/0x30
> > [   70.425663]  [] ? bus_find_device+0x7f/0xb0
> > [   70.425666]  [] ? put_device+0x17/0x20
> > [   70.425669]  [] ? pci_dev_put+0x1a/0x20
> > [   70.425672]  [] ? pci_get_dev_by_id+0x61/0x90
> > [   70.425675]  [] ? hcd_pci_suspend_noirq+0xa0/0xa0
> > [   70.425679]  [] ehci_post_add+0x4b/0x60
> > [   70.425682]  [] for_each_companion+0x80/0xa0
> > [   70.425685]  [] usb_hcd_pci_probe+0x474/0x4e0
> 
> I think I see the problem; the driver data for the companion controller 
> gets set before we expect it.  The patch below should help, by adding a 
> check to see that the companion's root hub has been allocated.
> 
> Alan Stern
> 
> 
> 
> Index: usb-3.14/drivers/usb/core/hcd-pci.c
> ===
> --- usb-3.14.orig/drivers/usb/core/hcd-pci.c
> +++ usb-3.14/drivers/usb/core/hcd-pci.c
> @@ -75,7 +75,7 @@ static void for_each_companion(struct pc
>   PCI_SLOT(companion->devf

Re: X86: kexec issues with i915 in 3.14

2014-04-14 Thread Stefani Seibold
Am Montag, den 14.04.2014, 00:28 + schrieb Woodhouse, David:
> On Sun, 2014-04-13 at 22:01 +0200, Stefani Seibold wrote:
> > Rebooting my kernel vanilla kernel 3.14 will fail with tons of kernel
> > log messages:
> > 
> > [0.262754] IOMMU: Setting identity map for device :00:1a.0 
> > [0x7c45f000 - 0x7c46bfff]
> > [0.262780] IOMMU: Setting identity map for device :00:14.0 
> > [0x7c45f000 - 0x7c46bfff]
> > [0.262798] IOMMU: Prepare 0-16MiB unity mapping for LPC
> > [0.262807] IOMMU: Setting identity map for device :00:1f.0 [0x0 - 
> > 0xff]
> > [0.262948] PCI-DMA: Intel(R) Virtualization Technology for Directed I/O
> > [0.262948] dmar: DRHD: handling fault status reg 3
> > [0.262951] dmar: DMAR:[DMA Write] Request device [00:02.0] fault addr 
> > e000 
> > DMAR:[fault reason 05] PTE Write access is not set
> 
> I'm inferring from the subject line that you mean kexec, not
> "rebooting"?
> 

Rebooting via BIOS works, but booting via kexec will result the message
storm or hang kernel with a corrupted display.

> It looks like a peripheral device is being left active and doing DMA by
> the previous kernel, rather than being shut down. So as soon as the new
> kernel resets the IOMMU mappings, that peripheral device is causing
> faults.
> 
> We really ought to rate-limit the faults and isolate the offending
> device before there are 21,000 of them. As discussed elsewhere recently,
> we could do with a way to tell the PCI layer that it offended us but I
> suppose we could at *least* stop the IOMMU from reporting faults for it.
> 
> Is this new behaviour? I'm not sure why this should have changed...
> 

I can reproduce the behaviour also with a 3.13.7 kernel.

One thing i found after the end of the 21.000 messages was a GPU crash:

[5.002484] r8169 :03:00.0 eth0: link up
[5.002489] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
[6.745051] [drm:i915_hangcheck_elapsed] *ERROR* Hangcheck timer elapsed... 
blitter ring idle
[   11.743768] [drm] stuck on render ring
[   11.743773] [drm] GPU crash dump saved to /sys/class/drm/card0/error
[   11.743774] [drm] GPU hangs can indicate a bug anywhere in the entire gfx 
stack, including userspace.
[   11.743775] [drm] Please file a _new_ bug report on bugs.freedesktop.org 
against DRI -> DRM/Intel
[   11.743777] [drm] drm/i915 developers can then reassign to the right 
component if it's not a kernel issue.
[   11.743778] [drm] The gpu crash dump is required to analyze gpu hangs, so 
please always attach it.
[   14.240743] systemd-journald[158]: File 
/var/log/journal/bb613621feef82d686edde0046e9bcea/user-1000.journal corrupted 
or uncleanly shut down, renaming and replacing.

- Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Missing USB XHCI and EHCI reset for kexec

2014-04-15 Thread Stefani Seibold

Am Montag, den 14.04.2014, 12:27 -0400 schrieb Alan Stern:
> On Mon, 14 Apr 2014 stef...@seibold.net wrote:
> 
> > Zitat von Alan Stern :
> > 
> > 
> > >> <6>[  167.936921] usb 2-2.1: new full-speed USB device number 3  
> > >> using ohci-pci
> > >> <6>[  168.067890] usb 2-2.1: New USB device found, idVendor=076b,
> > >> idProduct=a021
> > >> <6>[  168.074871] usb 2-2.1: New USB device strings: Mfr=1, Product=2,
> > >> SerialNumber=0
> > >> <6>[  168.082226] usb 2-2.1: Product: Smart Card Reader
> > >> <6>[  168.086963] usb 2-2.1: Manufacturer: USB
> > >> <6>[  168.172893] usb 2-2.2: new low-speed USB device number 4  
> > >> using ohci-pci
> > >> <6>[  168.300839] usb 2-2.2: New USB device found, idVendor=0aad,
> > >> idProduct=0024
> > >> <6>[  168.307823] usb 2-2.2: New USB device strings: Mfr=1, Product=2,
> > >> SerialNumber=0
> > >> <6>[  168.315180] usb 2-2.2: Product: FrontPanel USB Keyboard
> > >> <6>[  168.320436] usb 2-2.2: Manufacturer: Rohde
> > >> <6>[  168.337895] input: Rohde FrontPanel USB Keyboard as
> > >> /devices/pci:00/:00:17.0/usb2/2-2/2-2.2/2-2.2:1.0/input/input0
> > >> <6>[  168.360988] input: Rohde FrontPanel USB Keyboard as
> > >> /devices/pci:00/:00:17.0/usb2/2-2/2-2.2/2-2.2:1.1/input/input1
> > >
> > > Since some devices work and some don't, maybe part of the problem lies
> > > in the particular devices.
> > >
> > 
> > The problem lies on the "Bus 001 Device 002: ID 0424:2514 Standard  
> > Microsystems Corp. USB 2.0 Hub", which hangs for arround 162 seconds  
> > after a kexec.
> > 
> > The "Bus 002 Device 003: ID 076b:a021 OmniKey AG CCID Smart Card  
> > Reader" and "Bus 002 Device 004: ID 0aad:0024 Rohde & Schwarz GmbH &  
> > Co. KG" are attached to this Hub.

> > > What about if you just do:
> > >
> > >   rmmod ehci-pci
> > >   modprobe ehci-pci
> > >
> > 
> > The kernel is monolitic because the USB HW is needed in a early boot  
> > stage. The problem also occurs with ehci-fsl used in by an other  
> > PowerPC device, which is a part of the SoC and is not attached to the  
> > PCI bus.
> > 
> > One thing is that the "echo 1  
> >  >/sys/bus/pci/drivers/ehci-pci/\:00\:17.2/reset" workaround will  
> > no longer work for kernel 3.14.
> 
> Instead, you could try
> 
>   echo :00:17.2 >/sys/bus/pci/drivers/ehci-pci/unbind
>   echo :00:17.2 >/sys/bus/pci/drivers/ehci-pci/bind
> 

I did a unbind and bind of the ehci-pci and ohci-pci, after this i got
the following dmesg log:

ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver
ehci-pci: EHCI PCI platform driver
ehci-pci :00:17.2: EHCI Host Controller
ehci-pci :00:17.2: new USB bus registered, assigned bus number 1
ehci-pci :00:17.2: irq 22, io mem 0xc0006800
ehci-pci :00:17.2: USB 2.0 started, EHCI 1.00
hub 1-0:1.0: USB hub found
hub 1-0:1.0: 5 ports detected
ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver
ohci_hcd :00:17.0: OHCI Host Controller
ohci_hcd :00:17.0: new USB bus registered, assigned bus number 2
ohci_hcd :00:17.0: irq 20, io mem 0xc0004000
hub 2-0:1.0: USB hub found
hub 2-0:1.0: 3 ports detected
ohci_hcd :00:17.1: OHCI Host Controller
ohci_hcd :00:17.1: new USB bus registered, assigned bus number 3
ohci_hcd :00:17.1: irq 21, io mem 0xc0005000
hub 3-0:1.0: USB hub found
hub 3-0:1.0: 2 ports detected
Freescale High-Speed USB SOC Device Controller driver (Apr 20, 2007)
mousedev: PS/2 mouse device common for all mice
i2c /dev entries driver
mpc-i2c fef03000.i2c: timeout 100 us
rtc-rs5c372 0-0032: rs5c372a found, 24hr, driver version 0.6
rtc-rs5c372 0-0032: rtc core: registered rtc-rs5c372 as rtc0
mpc-i2c fef03100.i2c: timeout 100 us
usbcore: registered new interface driver usbhid
usbhid: USB HID core driver
rsfrontp: using key table SMBV (117)
usbcore: registered new interface driver rsfrontp
rsfrontp: R USB HID Frontpanel driver (v1.2)
usbcore: registered new interface driver rsknop
rsknop: R USB HID Knop support (v1.4)
usb 1-2: new high-speed USB device number 2 using ehci-pci
zram: Created 1 device(s) ...
TCP: cubic registered
NET: Registered protocol family 17
rtc-rs5c372 0-0032: setting system clock to 2014-04-14 14:51:50 UTC (1397487110)
Freeing unused kernel memory: 996K (c032e000 - c0427000)
hub 1-2:1.0: USB hub found
hub 1-2:1.0: 4 ports detected
yaffs: dev is 32505859 name is "mtdblock3" rw
yaffs: passed flags ""
yaffs: yaffs: Attempting MTD mount of 31.3,"mtdblock3"
yaffs: auto selecting yaffs2
yaffs: yaffs_read_super: is_checkpointed 1
usb 2-2: new full-speed USB device number 2 using ohci_hcd
hub 2-2:1.0: USB hub found
hub 2-2:1.0: 2 ports detected
usbcore: registered new interface driver usb-storage
usb 2-2.1: new full-speed USB device number 3 using ohci_hcd
usb 2-2.2: new low-speed USB device number 4 using ohci_hcd
input: Rohde FrontPanel USB Keyboard as 
/devices/pci:00/:00:17.0/usb2/2-2/2-2.2/2-2.2:1.0/input/input0
fsl-gianfar fef24000.ethernet eth0: mac: 00:90:b8:1b:36:37
input: Rohde 

Re: Missing USB XHCI and EHCI reset for kexec

2014-04-15 Thread Stefani Seibold
Am Montag, den 14.04.2014, 13:58 -0400 schrieb Alan Stern:
> On Mon, 14 Apr 2014, Stefani Seibold wrote:
> 
> > > > An other PowerPC device which is nearly eactly the same HW but without  
> > > > this USB HUB works perfectly.
> > > 
> > > Maybe you should replace that hub with a different brand.
> > > 
> > 
> > Thats not possible, because the Hub is soldered on the board. And it is
> > also not a HW issue, since the Hub works perfectly which all previous
> > kernels including 3.4.
> 
> One other thing you can try is to increase the reset timeout in 
> drivers/usb/host/ehci-hub.c.  This is under the USB_PORT_FEAT_RESET 
> case in ehci_hub_control(), around line 1225:
> 
>   /*
>* caller must wait, then call GetPortStatus
>* usb 2.0 spec says 50 ms resets on root
>*/
>   ehci->reset_done [wIndex] = jiffies
>   + msecs_to_jiffies (50);
> 
> Increasing the 50 to 100 or more might help.
> 
> Alan Stern
> 

I tried this, when i increase the value to 1000, the reset and
enumeration process will be faster after a kexec: 28 Seconds vs. 162
Seconds.

- Stefani

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Missing USB XHCI and EHCI reset for kexec

2014-04-15 Thread Stefani Seibold
Am Dienstag, den 15.04.2014, 15:33 -0300 schrieb Thadeu Lima de Souza
Cascardo:
> On Tue, Apr 15, 2014 at 05:00:28PM +0200, stef...@seibold.net wrote:
> > 
> > Zitat von Thadeu Lima de Souza Cascardo :
> > 
> > >On Tue, Apr 15, 2014 at 12:04:17PM +0200, stef...@seibold.net wrote:
> > >>
> > >>Zitat von Thadeu Lima de Souza Cascardo :
> > >>
> > >>>On Mon, Apr 14, 2014 at 05:44:58PM +0200, stef...@seibold.net wrote:
> > 
> > Zitat von Benjamin Herrenschmidt :
> > 
> > >I don't know about EHCI specifically but this is a known issue with
> > >XHCI, I observe similar issues on other powerpc platforms (servers)
> > >and this isn't architecture specific (looks more like actualy xhc
> > >implementation specific).
> > >
> > >Thadeu Cascardo (on CC) has been the one investigating that on our 
> > >side,
> > >he might have more to add including patches.
> > >
> > 
> > I have now a kernel 3.14 dmesg log of the problem. After a kexec the
> > kexeced 3.14 kernel shows:
> > 
> > [1.170029] xhci_hcd 0001:03:00.0: xHCI Host Controller
> > [1.175306] xhci_hcd 0001:03:00.0: new USB bus registered,
> > assigned bus number 1
> > [1.212561] xhci_hcd 0001:03:00.0: Host not halted after 16000
> > microseconds.
> > [1.219621] xhci_hcd 0001:03:00.0: can't setup: -110
> > [1.224597] xhci_hcd 0001:03:00.0: USB bus 1 deregistered
> > [1.230021] xhci_hcd 0001:03:00.0: init 0001:03:00.0 fail, -110
> > [1.235955] xhci_hcd: probe of 0001:03:00.0 failed with error -110
> > 
> > >>>
> > >>>What is your controller vendor and device IDs? Is that a TI chip?
> > >>>
> > >>
> > >>Yes it is a TI chip, vendor ID 104c and product ID 8241.
> > >>
> > >>>Can you check if the patch I sent a month ago fixes it? [1] There's the
> > >>>whole story there. In fact, you will also need something like the patch
> > >>>below. Can you apply only the first one, verify, and, then, the other
> > >>>one as well, and report what worked for you?
> > >>>
> > >>>[1] http://marc.info/?l=linux-usb=139483181809062=2
> > >>>
> > >>
> > >>I tried the attach patch and it did not help. This is what i
> > >>expected because this is a fix in the shutdown path, which will
> > >>never called when doing a forced kexec.
> > >
> > >Hi, Stefani.
> > >
> > >Did you try with both patches applied? How do you evoke the forced
> > >kexec? Is that a kexec on panic? Does it really need to be forced? With
> > >no clean shutdown, platform and drivers would need to issue resets, like
> > >you mentioned below, to get the system into a clean state.
> > >
> > 
> > Yes, i applied both patches. But without success.
> > 
> > IMHO i think it is necessary to bring the device i a clean state
> > when the driver use the HW.
> > 
> > >>
> > >>I have a running a 3.10.23 kernel. This kernel do a kexec for a
> > >>kernel 3.14. Since the kernel 3.10.23 did not performe a clean
> > >>shutdown, the state of the XHCI Controller is undefined. So when
> > >
> > >And the clean shutdown requires both of my patches, for TI chips, as far
> > >as I know. It looks like the problem is issuing a halt when there are
> > >pending URBs.
> > >
> > >>kernel 3.14 will probe XHCI it will find a XHCI controller which was
> > >>not performed a reset.
> > >>
> > >
> > >The problem is not that a reset hasn't been issued. A PCI function reset
> > >should fix most of the problems with a bad device state, when the reset
> > >works. However, the problem is that it was not cleanly shut down. URBs
> > >should have been canceled and removed from the controller queue, and it
> > >should have halted after that.
> > 
> > Again, i think it is the job of the driver to bring the chip in a clean 
> > state
> > before using them. A driver should never expect a reset state of a chip.
> > 
> > >
> > >>So i think it is necessary to reset the XHCI controller and all
> > >>devices on this bus. This is what i do with a "echo 1
> > >>>/sys/bus/pci/drivers/xhci_hcd/0001:03:00.0/reset" before the kexec.
> > >>
> > >
> > >One way to look at that is making the PCI code issue resets to all buses
> > >before doing any other access. That will make booting more slow, and
> > >there are a lot of other corner cases where this might not be enough.
> > >It's probably more sane to try to get the 3.10.23 kernel to do a clean
> > >shutdown, if possible.
> > >
> > 
> > With this driver design the kexec functionality is usesless on PowerPC.
> > X86 looks a little bit better.
> > 
> > - Stefani
> > 
> > 
> 
> What is the vendor and device ID you are using on your X86 system? This
> is not a matter of what architecture you are using, it's the XHCI
> controller which does not behave as well as the one you are using on
> X86, which is likely an Intel one.
> 

It is an Intel 8086:8c31. But this was only a side note. We need a
generic solution not a vendor specific one. Otherwise kexec is useless
on other architectures.

- Stefani


--
To 

Re: X86: kexec issues with i915 in 3.14

2014-04-15 Thread Stefani Seibold
On Tuesday, 15.04.2014, 16:54 +0800 wrote Jiang Liu:
> Hi Stefanin,
>   As David has mentioned, the warning messages indicates the VGA
> controller hasn't been shut down correctly during reboot and keeps doing
> DMA write operations after loading the new kernel. Do you have found
> any older kernel without this issue?
>   There is a patch set to solve similar issue for crashdump,
> please refer to https://lkml.org/lkml/2014/1/10/518.
> 
> Thanks!
> Gerry
> 

I still understand. 

Maybe the above patch will cure the symptoms but i will not heal the
cause.

But the driver for the intel VGA must not assume the current state of
the device. It is necessary to setup the whole VGA device during the
probe phase. 

Otherwise when kexec a kernel there are tons of log entries or in many
cases a garbaged screen output and the whole kernels will hang.

- Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Missing USB XHCI and EHCI reset for kexec

2014-04-15 Thread Stefani Seibold
Am Dienstag, den 15.04.2014, 15:49 -0300 schrieb Thadeu Lima de Souza
Cascardo:
> On Tue, Apr 15, 2014 at 08:42:58PM +0200, Stefani Seibold wrote:
> > Am Dienstag, den 15.04.2014, 15:33 -0300 schrieb Thadeu Lima de Souza
> > Cascardo:
> > > On Tue, Apr 15, 2014 at 05:00:28PM +0200, stef...@seibold.net wrote:
> > > > 
> > > > Zitat von Thadeu Lima de Souza Cascardo :
> > > > 
> > > > >On Tue, Apr 15, 2014 at 12:04:17PM +0200, stef...@seibold.net wrote:
> > > > >>
> > > > >>Zitat von Thadeu Lima de Souza Cascardo :
> > > > >>
> > > > >>>On Mon, Apr 14, 2014 at 05:44:58PM +0200, stef...@seibold.net wrote:
> > > > >>>>
> > > > >>>>Zitat von Benjamin Herrenschmidt :
> > > > >>>>
> > > > >>>>>I don't know about EHCI specifically but this is a known issue with
> > > > >>>>>XHCI, I observe similar issues on other powerpc platforms (servers)
> > > > >>>>>and this isn't architecture specific (looks more like actualy xhc
> > > > >>>>>implementation specific).
> > > > >>>>>
> > > > >>>>>Thadeu Cascardo (on CC) has been the one investigating that on our 
> > > > >>>>>side,
> > > > >>>>>he might have more to add including patches.
> > > > >>>>>
> > > > >>>>
> > > > >>>>I have now a kernel 3.14 dmesg log of the problem. After a kexec the
> > > > >>>>kexeced 3.14 kernel shows:
> > > > >>>>
> > > > >>>>[1.170029] xhci_hcd 0001:03:00.0: xHCI Host Controller
> > > > >>>>[1.175306] xhci_hcd 0001:03:00.0: new USB bus registered,
> > > > >>>>assigned bus number 1
> > > > >>>>[1.212561] xhci_hcd 0001:03:00.0: Host not halted after 16000
> > > > >>>>microseconds.
> > > > >>>>[1.219621] xhci_hcd 0001:03:00.0: can't setup: -110
> > > > >>>>[1.224597] xhci_hcd 0001:03:00.0: USB bus 1 deregistered
> > > > >>>>[1.230021] xhci_hcd 0001:03:00.0: init 0001:03:00.0 fail, -110
> > > > >>>>[1.235955] xhci_hcd: probe of 0001:03:00.0 failed with error 
> > > > >>>>-110
> > > > >>>>
> > > > >>>
> > > > >>>What is your controller vendor and device IDs? Is that a TI chip?
> > > > >>>
> > > > >>
> > > > >>Yes it is a TI chip, vendor ID 104c and product ID 8241.
> > > > >>
> > > > >>>Can you check if the patch I sent a month ago fixes it? [1] There's 
> > > > >>>the
> > > > >>>whole story there. In fact, you will also need something like the 
> > > > >>>patch
> > > > >>>below. Can you apply only the first one, verify, and, then, the other
> > > > >>>one as well, and report what worked for you?
> > > > >>>
> > > > >>>[1] http://marc.info/?l=linux-usb=139483181809062=2
> > > > >>>
> > > > >>
> > > > >>I tried the attach patch and it did not help. This is what i
> > > > >>expected because this is a fix in the shutdown path, which will
> > > > >>never called when doing a forced kexec.
> > > > >
> > > > >Hi, Stefani.
> > > > >
> > > > >Did you try with both patches applied? How do you evoke the forced
> > > > >kexec? Is that a kexec on panic? Does it really need to be forced? With
> > > > >no clean shutdown, platform and drivers would need to issue resets, 
> > > > >like
> > > > >you mentioned below, to get the system into a clean state.
> > > > >
> > > > 
> > > > Yes, i applied both patches. But without success.
> > > > 
> > > > IMHO i think it is necessary to bring the device i a clean state
> > > > when the driver use the HW.
> > > > 
> > > > >>
> > > > >>I have a running a 3.10.23 kernel. This kernel do a kexec for a
> > > > >>kernel 3.14. Since the kernel 3.10.23 did not performe a clean
> > > > >>shutdown, the state of the XHCI Controller is undefined. So when
&

Re: Missing USB XHCI and EHCI reset for kexec

2014-04-15 Thread Stefani Seibold
Am Dienstag, den 15.04.2014, 15:02 -0400 schrieb Alan Stern:
> On Tue, 15 Apr 2014, Stefani Seibold wrote:
> 
> > I did a unbind and bind of the ehci-pci and ohci-pci, after this i got
> > the following dmesg log:
> > 
> > ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver
> > ehci-pci: EHCI PCI platform driver
> > ehci-pci :00:17.2: EHCI Host Controller
> > ehci-pci :00:17.2: new USB bus registered, assigned bus number 1
> > ehci-pci :00:17.2: irq 22, io mem 0xc0006800
> > ehci-pci :00:17.2: USB 2.0 started, EHCI 1.00
> > hub 1-0:1.0: USB hub found
> > hub 1-0:1.0: 5 ports detected
> > ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver
> > ohci_hcd :00:17.0: OHCI Host Controller
> > ohci_hcd :00:17.0: new USB bus registered, assigned bus number 2
> > ohci_hcd :00:17.0: irq 20, io mem 0xc0004000
> > hub 2-0:1.0: USB hub found
> > hub 2-0:1.0: 3 ports detected
> > ohci_hcd :00:17.1: OHCI Host Controller
> > ohci_hcd :00:17.1: new USB bus registered, assigned bus number 3
> > ohci_hcd :00:17.1: irq 21, io mem 0xc0005000
> > hub 3-0:1.0: USB hub found
> > hub 3-0:1.0: 2 ports detected
> ...
> > usbcore: registered new interface driver USB-SATA-storage
> > USB SATA Mass Storage support registered.
> > usb 1-4: new high-speed USB device number 4 using ehci-pci
> > : ports detected
> 
> What driver is this?  I've never heard of USB-SATA-storage.
> 

This is a special embedded USB SATA driver written by me. It is mostly a
fork of the usb-storage driver but handle only one vendor and product ID
and does switch on a port bit. On the other side this vendor and product
ID is black listed in the regular usb-storage driver. 

> > ohci_hcd :00:17.0: remove, state 1
> > usb usb2: USB disconnect, device number 1
> > usb 2-2: USB disconnect, device number 2
> > usb 2-2.1: USB disconnect, device number 3
> > usb 2-2.2: USB disconnect, device number 4
> > ohci_hcd :00:17.0: USB bus 2 deregistered
> > ohci_hcd :00:17.1: remove, state 1
> > usb usb3: USB disconnect, device number 1
> > ohci_hcd :00:17.1: USB bus 3 deregistered
> > ehci-pci :00:17.2: remove, state 1
> > usb usb1: USB disconnect, device number 1
> > usb 1-2: USB disconnect, device number 2
> > usb 1-4: USB disconnect, device number 4
> > ehci-pci :00:17.2: USB bus 1 deregistered
> > ohci_hcd :00:17.0: OHCI Host Controller
> > ohci_hcd :00:17.0: new USB bus registered, assigned bus number 1
> > ohci_hcd :00:17.0: irq 20, io mem 0xc0004000
> > hub 1-0:1.0: USB hub found
> > hub 1-0:1.0: 3 ports detected
> > ohci_hcd :00:17.1: OHCI Host Controller
> > ohci_hcd :00:17.1: new USB bus registered, assigned bus number 2
> > ohci_hcd :00:17.1: irq 21, io mem 0xc0005000
> > hub 2-0:1.0: USB hub found
> > hub 2-0:1.0: 2 ports detected
> > ehci-pci :00:17.2: EHCI Host Controller
> > ehci-pci :00:17.2: new USB bus registered, assigned bus number 3
> > ehci-pci :00:17.2: irq 22, io mem 0xc0006800
> > ehci-pci :00:17.2: USB 2.0 started, EHCI 1.00
> > hub 3-0:1.0: USB hub found
> > hub 3-0:1.0: 5 ports detected
> > hub 1-0:1.0: USB hub found
> > hub 1-0:1.0: 3 ports detected
> > hub 2-0:1.0: USB hub found
> > hub 2-0:1.0: 2 ports detected
> > usb 3-2: new high-speed USB device number 2 using ehci-pci
> > hub 3-2:1.0: USB hub found
> > hub 3-2:1.0: 4 ports detected
> > usb 3-4: new high-speed USB device number 4 using ehci-pci
> > usb 3-4: device descriptor read/64, error -110
> > usb 3-4: device descriptor read/64, error -110
> > usb 3-4: new high-speed USB device number 5 using ehci-pci
> > usb 3-4: device descriptor read/64, error -110
> > usb 3-4: device descriptor read/64, error -110
> > usb 3-4: new high-speed USB device number 6 using ehci-pci
> > usb 3-4: device descriptor read/8, error -110
> > usb 3-4: device descriptor read/8, error -110
> > usb 3-4: new high-speed USB device number 7 using ehci-pci
> > usb 3-4: device descriptor read/8, error -110
> > usb 3-4: device descriptor read/8, error -110
> > hub 3-0:1.0: unable to enumerate USB device on port 4
> > usb 1-2: new full-speed USB device number 2 using ohci_hcd
> > hub 1-2:1.0: USB hub found
> > hub 1-2:1.0: 2 ports detected
> > usb 2-2: new full-speed USB device number 2 using ohci_hcd
> > usb 2-2: device descriptor read/64, error -110
> > usb 2-2: device descriptor read/64, error -110
> > usb 2-2: new full-speed USB device number 3 using ohci_hcd
> > usb 2-2: device descript

Re: Missing USB XHCI and EHCI reset for kexec

2014-04-15 Thread Stefani Seibold
Am Dienstag, den 15.04.2014, 15:05 -0400 schrieb Alan Stern:
> On Tue, 15 Apr 2014, Stefani Seibold wrote:
> 
> > > One other thing you can try is to increase the reset timeout in 
> > > drivers/usb/host/ehci-hub.c.  This is under the USB_PORT_FEAT_RESET 
> > > case in ehci_hub_control(), around line 1225:
> > > 
> > >   /*
> > >* caller must wait, then call GetPortStatus
> > >* usb 2.0 spec says 50 ms resets on root
> > >*/
> > >   ehci->reset_done [wIndex] = jiffies
> > >   + msecs_to_jiffies (50);
> > > 
> > > Increasing the 50 to 100 or more might help.
> > > 
> > > Alan Stern
> > > 
> > 
> > I tried this, when i increase the value to 1000, the reset and
> > enumeration process will be faster after a kexec: 28 Seconds vs. 162
> > Seconds.
> 
> Even 28 seconds is much longer than it should be.  And a 1000-ms long 
> reset signal is a lot longer than any device should need.
> 
> Anyway, since you saw the same problem after unbind and rebind, you 
> don't have to perform a kexec for testing.
> 

Right, but i would prefer a solution for this. Since it works perfectly
in kernel 3.4 i don't think it is a hardware issue.

- Stefani


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Missing USB XHCI and EHCI reset for kexec

2014-04-15 Thread Stefani Seibold
Am Dienstag, den 15.04.2014, 15:02 -0400 schrieb Alan Stern:
> On Tue, 15 Apr 2014, Stefani Seibold wrote:
> 
> > I did a unbind and bind of the ehci-pci and ohci-pci, after this i got
> > the following dmesg log:
> > 
> > ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver
> > ehci-pci: EHCI PCI platform driver
> > ehci-pci :00:17.2: EHCI Host Controller
> > ehci-pci :00:17.2: new USB bus registered, assigned bus number 1
> > ehci-pci :00:17.2: irq 22, io mem 0xc0006800
> > ehci-pci :00:17.2: USB 2.0 started, EHCI 1.00
> > hub 1-0:1.0: USB hub found
> > hub 1-0:1.0: 5 ports detected
> > ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver
> > ohci_hcd :00:17.0: OHCI Host Controller
> > ohci_hcd :00:17.0: new USB bus registered, assigned bus number 2
> > ohci_hcd :00:17.0: irq 20, io mem 0xc0004000
> > hub 2-0:1.0: USB hub found
> > hub 2-0:1.0: 3 ports detected
> > ohci_hcd :00:17.1: OHCI Host Controller
> > ohci_hcd :00:17.1: new USB bus registered, assigned bus number 3
> > ohci_hcd :00:17.1: irq 21, io mem 0xc0005000
> > hub 3-0:1.0: USB hub found
> > hub 3-0:1.0: 2 ports detected
> ...
> > usbcore: registered new interface driver USB-SATA-storage
> > USB SATA Mass Storage support registered.
> > usb 1-4: new high-speed USB device number 4 using ehci-pci
> > : ports detected
> 
> What driver is this?  I've never heard of USB-SATA-storage.
> 
> > ohci_hcd :00:17.0: remove, state 1
> > usb usb2: USB disconnect, device number 1
> > usb 2-2: USB disconnect, device number 2
> > usb 2-2.1: USB disconnect, device number 3
> > usb 2-2.2: USB disconnect, device number 4
> > ohci_hcd :00:17.0: USB bus 2 deregistered
> > ohci_hcd :00:17.1: remove, state 1
> > usb usb3: USB disconnect, device number 1
> > ohci_hcd :00:17.1: USB bus 3 deregistered
> > ehci-pci :00:17.2: remove, state 1
> > usb usb1: USB disconnect, device number 1
> > usb 1-2: USB disconnect, device number 2
> > usb 1-4: USB disconnect, device number 4
> > ehci-pci :00:17.2: USB bus 1 deregistered
> > ohci_hcd :00:17.0: OHCI Host Controller
> > ohci_hcd :00:17.0: new USB bus registered, assigned bus number 1
> > ohci_hcd :00:17.0: irq 20, io mem 0xc0004000
> > hub 1-0:1.0: USB hub found
> > hub 1-0:1.0: 3 ports detected
> > ohci_hcd :00:17.1: OHCI Host Controller
> > ohci_hcd :00:17.1: new USB bus registered, assigned bus number 2
> > ohci_hcd :00:17.1: irq 21, io mem 0xc0005000
> > hub 2-0:1.0: USB hub found
> > hub 2-0:1.0: 2 ports detected
> > ehci-pci :00:17.2: EHCI Host Controller
> > ehci-pci :00:17.2: new USB bus registered, assigned bus number 3
> > ehci-pci :00:17.2: irq 22, io mem 0xc0006800
> > ehci-pci :00:17.2: USB 2.0 started, EHCI 1.00
> > hub 3-0:1.0: USB hub found
> > hub 3-0:1.0: 5 ports detected
> > hub 1-0:1.0: USB hub found
> > hub 1-0:1.0: 3 ports detected
> > hub 2-0:1.0: USB hub found
> > hub 2-0:1.0: 2 ports detected
> > usb 3-2: new high-speed USB device number 2 using ehci-pci
> > hub 3-2:1.0: USB hub found
> > hub 3-2:1.0: 4 ports detected
> > usb 3-4: new high-speed USB device number 4 using ehci-pci
> > usb 3-4: device descriptor read/64, error -110
> > usb 3-4: device descriptor read/64, error -110
> > usb 3-4: new high-speed USB device number 5 using ehci-pci
> > usb 3-4: device descriptor read/64, error -110
> > usb 3-4: device descriptor read/64, error -110
> > usb 3-4: new high-speed USB device number 6 using ehci-pci
> > usb 3-4: device descriptor read/8, error -110
> > usb 3-4: device descriptor read/8, error -110
> > usb 3-4: new high-speed USB device number 7 using ehci-pci
> > usb 3-4: device descriptor read/8, error -110
> > usb 3-4: device descriptor read/8, error -110
> > hub 3-0:1.0: unable to enumerate USB device on port 4
> > usb 1-2: new full-speed USB device number 2 using ohci_hcd
> > hub 1-2:1.0: USB hub found
> > hub 1-2:1.0: 2 ports detected
> > usb 2-2: new full-speed USB device number 2 using ohci_hcd
> > usb 2-2: device descriptor read/64, error -110
> > usb 2-2: device descriptor read/64, error -110
> > usb 2-2: new full-speed USB device number 3 using ohci_hcd
> > usb 2-2: device descriptor read/64, error -110
> > usb 2-2: device descriptor read/64, error -110
> > usb 2-2: new full-speed USB device number 4 using ohci_hcd
> > usb 2-2: device descriptor read/8, error -110
> > usb 2-2: device descriptor read/8, error -110
> > usb 2-2: n

Re: Missing USB XHCI and EHCI reset for kexec

2014-04-15 Thread Stefani Seibold
Am Dienstag, den 15.04.2014, 15:14 -0400 schrieb Alan Stern:
> On Tue, 15 Apr 2014, Stefani Seibold wrote:
> 
> > > > usbcore: registered new interface driver USB-SATA-storage
> > > > USB SATA Mass Storage support registered.
> > > > usb 1-4: new high-speed USB device number 4 using ehci-pci
> > > > : ports detected
> > > 
> > > What driver is this?  I've never heard of USB-SATA-storage.
> > > 
> > 
> > This is a special embedded USB SATA driver written by me. It is mostly a
> > fork of the usb-storage driver but handle only one vendor and product ID
> > and does switch on a port bit. On the other side this vendor and product
> > ID is black listed in the regular usb-storage driver. 
> 
> Is it possible that your driver leaves the device in a strange state 
> where it won't initialize properly?
> 

I don't think so, because the driver was not used in this test due a
missing harddisk. But i will do a test tomorrow in the office.

- Stefani



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Make vsyscall_gtod_data handling x86 generic

2014-03-05 Thread tip-bot for Stefani Seibold
Commit-ID:  644641cf162515479f34db62145d2efe084cb60e
Gitweb: http://git.kernel.org/tip/644641cf162515479f34db62145d2efe084cb60e
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 3 Mar 2014 22:12:12 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Wed, 5 Mar 2014 14:02:37 -0800

x86, vdso: Make vsyscall_gtod_data handling x86 generic

This patch move the vsyscall_gtod_data handling out of vsyscall_64.c
into an additonal file vsyscall_gtod.c to make the functionality
available for x86 32 bit kernel.

It also adds a new vsyscall_32.c which setup the VVAR page.

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1393881143-3569-2-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/Kconfig   |  4 +--
 arch/x86/include/asm/clocksource.h |  4 ---
 arch/x86/include/asm/fixmap.h  |  2 ++
 arch/x86/include/asm/vvar.h| 12 ++--
 arch/x86/kernel/Makefile   |  3 +-
 arch/x86/kernel/hpet.c |  4 ---
 arch/x86/kernel/setup.c|  2 --
 arch/x86/kernel/tsc.c  |  2 --
 arch/x86/kernel/vmlinux.lds.S  |  3 --
 arch/x86/kernel/vsyscall_32.c  | 20 +
 arch/x86/kernel/vsyscall_64.c  | 45 -
 arch/x86/kernel/vsyscall_gtod.c| 59 ++
 arch/x86/tools/relocs.c|  2 +-
 13 files changed, 95 insertions(+), 67 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0af5250..0da3b39 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -107,9 +107,9 @@ config X86
select HAVE_ARCH_SOFT_DIRTY
select CLOCKSOURCE_WATCHDOG
select GENERIC_CLOCKEVENTS
-   select ARCH_CLOCKSOURCE_DATA if X86_64
+   select ARCH_CLOCKSOURCE_DATA
select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32  
X86_LOCAL_APIC)
-   select GENERIC_TIME_VSYSCALL if X86_64
+   select GENERIC_TIME_VSYSCALL
select KTIME_SCALAR if X86_32
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
diff --git a/arch/x86/include/asm/clocksource.h 
b/arch/x86/include/asm/clocksource.h
index 16a57f4..eda81dc 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -3,8 +3,6 @@
 #ifndef _ASM_X86_CLOCKSOURCE_H
 #define _ASM_X86_CLOCKSOURCE_H
 
-#ifdef CONFIG_X86_64
-
 #define VCLOCK_NONE 0  /* No vDSO clock available. */
 #define VCLOCK_TSC  1  /* vDSO should use vread_tsc.   */
 #define VCLOCK_HPET 2  /* vDSO should use vread_hpet.  */
@@ -14,6 +12,4 @@ struct arch_clocksource_data {
int vclock_mode;
 };
 
-#endif /* CONFIG_X86_64 */
-
 #endif /* _ASM_X86_CLOCKSOURCE_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 7252cd3..094d0cc 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -75,6 +75,8 @@ enum fixed_addresses {
 #ifdef CONFIG_X86_32
FIX_HOLE,
FIX_VDSO,
+   VVAR_PAGE,
+   VSYSCALL_HPET,
 #else
VSYSCALL_LAST_PAGE,
VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index d76ac40..0a534ea 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -16,9 +16,6 @@
  * you mess up, the linker will catch it.)
  */
 
-/* Base address of vvars.  This is not ABI. */
-#define VVAR_ADDRESS (-10*1024*1024 - 4096)
-
 #if defined(__VVAR_KERNEL_LDS)
 
 /* The kernel linker script defines its own magic to put vvars in the
@@ -29,6 +26,15 @@
 
 #else
 
+extern char __vvar_page;
+
+/* Base address of vvars.  This is not ABI. */
+#ifdef CONFIG_X86_64
+#define VVAR_ADDRESS (-10*1024*1024 - 4096)
+#else
+#define VVAR_ADDRESS (__vvar_page)
+#endif
+
 #define DECLARE_VVAR(offset, type, name)   \
static type const * const vvaraddr_ ## name =   \
(void *)(VVAR_ADDRESS + (offset));
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index cb648c8..3282eda 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -26,7 +26,8 @@ obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-y  += probe_roms.o
 obj-$(CONFIG_X86_32)   += i386_ksyms_32.o
 obj-$(CONFIG_X86_64)   += sys_x86_64.o x8664_ksyms_64.o
-obj-y  += syscall_$(BITS).o
+obj-y  += syscall_$(BITS).o vsyscall_gtod.o
+obj-$(CONFIG_X86_32)   += vsyscall_32.o
 obj-$(CONFIG_X86_64)   += vsyscall_64.o
 obj-$(CONFIG_X86_64)   += vsyscall_emu_64.o
 obj-$(CONFIG_SYSFS)+= ksysfs.o
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index da85a8e..54263f0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -74,9 +74,7 @@ static inline void hpet_writel(unsigned int d, unsigned int a)
 static inline void hpet_set_mapping(void)
 {
hpet_virt_address

[tip:x86/vdso] x86, vdso: __vdso_clock_gettime() cleanup

2014-03-05 Thread tip-bot for Stefani Seibold
Commit-ID:  44e016d37d0e84f1d7063f699ee8df68d7bd7607
Gitweb: http://git.kernel.org/tip/44e016d37d0e84f1d7063f699ee8df68d7bd7607
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 3 Mar 2014 22:12:15 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Wed, 5 Mar 2014 14:02:38 -0800

x86, vdso: __vdso_clock_gettime() cleanup

This patch is a small code cleanup for the __vdso_clock_gettime() function.

It removes the unneeded return values from do_monotonic_coarse() and
do_realtime_coarse() and add a fallback label for doing the kernel
gettimeofday() system call.

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1393881143-3569-5-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/vdso/vclock_gettime.c | 27 ++-
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index bbc8065..fd074dd 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -209,7 +209,7 @@ notrace static int do_monotonic(struct timespec *ts)
return mode;
 }
 
-notrace static int do_realtime_coarse(struct timespec *ts)
+notrace static void do_realtime_coarse(struct timespec *ts)
 {
unsigned long seq;
do {
@@ -217,10 +217,9 @@ notrace static int do_realtime_coarse(struct timespec *ts)
ts-tv_sec = gtod-wall_time_coarse.tv_sec;
ts-tv_nsec = gtod-wall_time_coarse.tv_nsec;
} while (unlikely(read_seqcount_retry(gtod-seq, seq)));
-   return 0;
 }
 
-notrace static int do_monotonic_coarse(struct timespec *ts)
+notrace static void do_monotonic_coarse(struct timespec *ts)
 {
unsigned long seq;
do {
@@ -228,30 +227,32 @@ notrace static int do_monotonic_coarse(struct timespec 
*ts)
ts-tv_sec = gtod-monotonic_time_coarse.tv_sec;
ts-tv_nsec = gtod-monotonic_time_coarse.tv_nsec;
} while (unlikely(read_seqcount_retry(gtod-seq, seq)));
-
-   return 0;
 }
 
 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 {
-   int ret = VCLOCK_NONE;
-
switch (clock) {
case CLOCK_REALTIME:
-   ret = do_realtime(ts);
+   if (do_realtime(ts) == VCLOCK_NONE)
+   goto fallback;
break;
case CLOCK_MONOTONIC:
-   ret = do_monotonic(ts);
+   if (do_monotonic(ts) == VCLOCK_NONE)
+   goto fallback;
break;
case CLOCK_REALTIME_COARSE:
-   return do_realtime_coarse(ts);
+   do_realtime_coarse(ts);
+   break;
case CLOCK_MONOTONIC_COARSE:
-   return do_monotonic_coarse(ts);
+   do_monotonic_coarse(ts);
+   break;
+   default:
+   goto fallback;
}
 
-   if (ret == VCLOCK_NONE)
-   return vdso_fallback_gettime(clock, ts);
return 0;
+fallback:
+   return vdso_fallback_gettime(clock, ts);
 }
 int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias(__vdso_clock_gettime)));
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] mm: Add new func _install_special_mapping() to mmap.c

2014-03-05 Thread tip-bot for Stefani Seibold
Commit-ID:  1c6ff9d5108b5056dea85916990af036bb9dd10c
Gitweb: http://git.kernel.org/tip/1c6ff9d5108b5056dea85916990af036bb9dd10c
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 3 Mar 2014 22:12:13 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Wed, 5 Mar 2014 14:02:37 -0800

mm: Add new func _install_special_mapping() to mmap.c

The _install_special_mapping() is the new base function for
install_special_mapping(). This function will return a pointer of the
created VMA or a error code in an ERR_PTR()

This new function will be needed by the for the vdso 32 bit support to map the
additonal vvar and hpet pages into the 32 bit address space. This will be done
with io_remap_pfn_range() and remap_pfn_range, which requieres a vm_area_struct.

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1393881143-3569-3-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 include/linux/mm.h |  3 +++
 mm/mmap.c  | 20 
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f28f46e..55342aa 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1740,6 +1740,9 @@ extern void set_mm_exe_file(struct mm_struct *mm, struct 
file *new_exe_file);
 extern struct file *get_mm_exe_file(struct mm_struct *mm);
 
 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
+extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
+  unsigned long addr, unsigned long len,
+  unsigned long flags, struct page **pages);
 extern int install_special_mapping(struct mm_struct *mm,
   unsigned long addr, unsigned long len,
   unsigned long flags, struct page **pages);
diff --git a/mm/mmap.c b/mm/mmap.c
index 20ff0c3..81ba54f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2918,7 +2918,7 @@ static const struct vm_operations_struct 
special_mapping_vmops = {
  * The array pointer and the pages it points to are assumed to stay alive
  * for as long as this mapping might exist.
  */
-int install_special_mapping(struct mm_struct *mm,
+struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long vm_flags, struct page **pages)
 {
@@ -2927,7 +2927,7 @@ int install_special_mapping(struct mm_struct *mm,
 
vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (unlikely(vma == NULL))
-   return -ENOMEM;
+   return ERR_PTR(-ENOMEM);
 
INIT_LIST_HEAD(vma-anon_vma_chain);
vma-vm_mm = mm;
@@ -2948,11 +2948,23 @@ int install_special_mapping(struct mm_struct *mm,
 
perf_event_mmap(vma);
 
-   return 0;
+   return vma;
 
 out:
kmem_cache_free(vm_area_cachep, vma);
-   return ret;
+   return ERR_PTR(ret);
+}
+
+int install_special_mapping(struct mm_struct *mm,
+   unsigned long addr, unsigned long len,
+   unsigned long vm_flags, struct page **pages)
+{
+   struct vm_area_struct *vma = _install_special_mapping(mm,
+   addr, len, vm_flags, pages);
+
+   if (IS_ERR(vma))
+   return PTR_ERR(vma);
+   return 0;
 }
 
 static DEFINE_MUTEX(mm_all_locks_mutex);
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Cleanup __vdso_gettimeofday()

2014-03-05 Thread tip-bot for Stefani Seibold
Commit-ID:  6543ca6fee7d3b314bda69b83fd429ed3e336645
Gitweb: http://git.kernel.org/tip/6543ca6fee7d3b314bda69b83fd429ed3e336645
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 3 Mar 2014 22:12:17 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Wed, 5 Mar 2014 14:02:38 -0800

x86, vdso: Cleanup __vdso_gettimeofday()

This patch do a little cleanup for the __vdso_gettimeofday() function.

It kicks out an unneeded ret local variable and makes the code faster
if only the timezone is needed (an admittedly rare case.)

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1393881143-3569-7-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/vdso/vclock_gettime.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 743f277..09dae4a 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -259,13 +259,12 @@ int clock_gettime(clockid_t, struct timespec *)
 
 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
-   long ret = VCLOCK_NONE;
-
if (likely(tv != NULL)) {
BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
 offsetof(struct timespec, tv_nsec) ||
 sizeof(*tv) != sizeof(struct timespec));
-   ret = do_realtime((struct timespec *)tv);
+   if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
+   return vdso_fallback_gtod(tv, tz);
tv-tv_usec /= 1000;
}
if (unlikely(tz != NULL)) {
@@ -274,8 +273,6 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct 
timezone *tz)
tz-tz_dsttime = gtod-sys_tz.tz_dsttime;
}
 
-   if (ret == VCLOCK_NONE)
-   return vdso_fallback_gtod(tv, tz);
return 0;
 }
 int gettimeofday(struct timeval *, struct timezone *)
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Introduce VVAR marco for vdso32

2014-03-05 Thread tip-bot for Stefani Seibold
Commit-ID:  47ffeb5bae376766da7f4a326f13e980a280338b
Gitweb: http://git.kernel.org/tip/47ffeb5bae376766da7f4a326f13e980a280338b
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 3 Mar 2014 22:12:18 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Wed, 5 Mar 2014 14:02:38 -0800

x86, vdso: Introduce VVAR marco for vdso32

This patch revamps the vvar.h for introduce the VVAR macro for vdso32.

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1393881143-3569-8-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/include/asm/vvar.h | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 0a534ea..52c79ff 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -26,6 +26,15 @@
 
 #else
 
+#ifdef BUILD_VDSO32
+
+#define DECLARE_VVAR(offset, type, name)   \
+   extern type vvar_ ## name __attribute__((visibility(hidden)));
+
+#define VVAR(name) (vvar_ ## name)
+
+#else
+
 extern char __vvar_page;
 
 /* Base address of vvars.  This is not ABI. */
@@ -39,12 +48,13 @@ extern char __vvar_page;
static type const * const vvaraddr_ ## name =   \
(void *)(VVAR_ADDRESS + (offset));
 
+#define VVAR(name) (*vvaraddr_ ## name)
+#endif
+
 #define DEFINE_VVAR(type, name)
\
type name   \
__attribute__((section(.vvar_ #name), aligned(16))) __visible
 
-#define VVAR(name) (*vvaraddr_ ## name)
-
 #endif
 
 /* DECLARE_VVAR(offset, type, name) */
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Add 32 bit VDSO time support for 32 bit kernel

2014-03-05 Thread tip-bot for Stefani Seibold
Commit-ID:  4dea8e4824b363c53f320d328040d7c6c5921419
Gitweb: http://git.kernel.org/tip/4dea8e4824b363c53f320d328040d7c6c5921419
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 3 Mar 2014 22:12:20 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Wed, 5 Mar 2014 14:02:38 -0800

x86, vdso: Add 32 bit VDSO time support for 32 bit kernel

This patch add the time support for 32 bit a VDSO to a 32 bit kernel.

For 32 bit programs running on a 32 bit kernel, the same mechanism is
used as for 64 bit programs running on a 64 bit kernel.

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1393881143-3569-10-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/include/asm/vdso.h   |  3 ++
 arch/x86/include/asm/vdso32.h | 11 +
 arch/x86/vdso/Makefile|  8 
 arch/x86/vdso/vclock_gettime.c| 76 +++
 arch/x86/vdso/vdso-layout.lds.S   | 22 ++
 arch/x86/vdso/vdso32-setup.c  | 53 +---
 arch/x86/vdso/vdso32/vclock_gettime.c |  3 ++
 arch/x86/vdso/vdso32/vdso32.lds.S |  9 +
 8 files changed, 172 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 6e39eb8..0f363c1 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -2,6 +2,9 @@
 #define _ASM_X86_VDSO_H
 
 #if defined CONFIG_X86_32 || defined CONFIG_COMPAT
+
+#include asm/vdso32.h
+
 extern const char VDSO32_PRELINK[];
 
 /*
diff --git a/arch/x86/include/asm/vdso32.h b/arch/x86/include/asm/vdso32.h
new file mode 100644
index 000..7efb701
--- /dev/null
+++ b/arch/x86/include/asm/vdso32.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_X86_VDSO32_H
+#define _ASM_X86_VDSO32_H
+
+#define VDSO_BASE_PAGE 0
+#define VDSO_VVAR_PAGE 1
+#define VDSO_HPET_PAGE 2
+#define VDSO_PAGES 3
+#define VDSO_PREV_PAGES2
+#define VDSO_OFFSET(x) ((x) * PAGE_SIZE)
+
+#endif
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 7a3d13e..6cef7a1 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -146,8 +146,16 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
 $(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
 $(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
 
+KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
+$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+
 $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
 $(obj)/vdso32/vdso32.lds \
+$(obj)/vdso32/vclock_gettime.o \
 $(obj)/vdso32/note.o \
 $(obj)/vdso32/%.o
$(call if_changed,vdso)
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 09dae4a..90bb5e8 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -4,6 +4,9 @@
  *
  * Fast user context implementation of clock_gettime, gettimeofday, and time.
  *
+ * 32 Bit compat layer by Stefani Seibold stef...@seibold.net
+ *  sponsored by Rohde  Schwarz GmbH  Co. KG Munich/Germany
+ *
  * The code should have no internal unresolved relocations.
  * Check with readelf after changing.
  */
@@ -12,13 +15,11 @@
 #define DISABLE_BRANCH_PROFILING
 
 #include linux/kernel.h
-#include linux/posix-timers.h
-#include linux/time.h
+#include uapi/linux/time.h
 #include linux/string.h
 #include asm/vsyscall.h
 #include asm/fixmap.h
 #include asm/vgtod.h
-#include asm/timex.h
 #include asm/hpet.h
 #include asm/unistd.h
 #include asm/io.h
@@ -26,6 +27,12 @@
 
 #define gtod (VVAR(vsyscall_gtod_data))
 
+extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
+extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
+extern time_t __vdso_time(time_t *t);
+
+#ifndef BUILD_VDSO32
+
 static notrace cycle_t vread_hpet(void)
 {
return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 
HPET_COUNTER);
@@ -118,6 +125,59 @@ static notrace cycle_t vread_pvclock(int *mode)
 }
 #endif
 
+#else
+
+extern u8 hpet_page
+   __attribute__((visibility(hidden)));
+
+#ifdef CONFIG_HPET_TIMER
+static notrace cycle_t vread_hpet(void)
+{
+   return readl((const void __iomem *)(hpet_page + HPET_COUNTER));
+}
+#endif
+
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+   long ret;
+
+   asm(
+   mov %%ebx, %%edx \n
+   mov %2, %%ebx \n
+   call VDSO32_vsyscall \n
+   mov %%edx, %%ebx \n

[tip:x86/vdso] x86, vdso: Replace VVAR(vsyscall_gtod_data) by gtod macro

2014-03-05 Thread tip-bot for Stefani Seibold
Commit-ID:  2e0035eefd9cc8a386c706cab7547b5280772726
Gitweb: http://git.kernel.org/tip/2e0035eefd9cc8a386c706cab7547b5280772726
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 3 Mar 2014 22:12:16 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Wed, 5 Mar 2014 14:02:38 -0800

x86, vdso: Replace VVAR(vsyscall_gtod_data) by gtod macro

There a currently more than 30 users of the gtod macro, so replace the
last VVAR(vsyscall_gtod_data) by gtod macro.

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1393881143-3569-6-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/vdso/vclock_gettime.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index fd074dd..743f277 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -109,7 +109,7 @@ static notrace cycle_t vread_pvclock(int *mode)
*mode = VCLOCK_NONE;
 
/* refer to tsc.c read_tsc() comment for rationale */
-   last = VVAR(vsyscall_gtod_data).clock.cycle_last;
+   last = gtod-clock.cycle_last;
 
if (likely(ret = last))
return ret;
@@ -133,7 +133,7 @@ notrace static cycle_t vread_tsc(void)
rdtsc_barrier();
ret = (cycle_t)vget_cycles();
 
-   last = VVAR(vsyscall_gtod_data).clock.cycle_last;
+   last = gtod-clock.cycle_last;
 
if (likely(ret = last))
return ret;
@@ -288,7 +288,7 @@ int gettimeofday(struct timeval *, struct timezone *)
 notrace time_t __vdso_time(time_t *t)
 {
/* This is atomic on x86_64 so we don't need any locks. */
-   time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec);
+   time_t result = ACCESS_ONCE(gtod-wall_time_sec);
 
if (t)
*t = result;
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Revamp vclock_gettime.c

2014-03-05 Thread tip-bot for Stefani Seibold
Commit-ID:  20dd4e23a10764f886c2a615bdc1f0c7db991cdb
Gitweb: http://git.kernel.org/tip/20dd4e23a10764f886c2a615bdc1f0c7db991cdb
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 3 Mar 2014 22:12:14 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Wed, 5 Mar 2014 14:02:37 -0800

x86, vdso: Revamp vclock_gettime.c

This intermediate patch revamps the vclock_gettime.c by moving some functions
around. It is only for spliting purpose, to make whole the 32 bit vdso timer
patch easier to review.

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1393881143-3569-4-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/vdso/vclock_gettime.c | 85 +-
 1 file changed, 42 insertions(+), 43 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index eb5d7a5..bbc8065 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -26,41 +26,26 @@
 
 #define gtod (VVAR(vsyscall_gtod_data))
 
-notrace static cycle_t vread_tsc(void)
+static notrace cycle_t vread_hpet(void)
 {
-   cycle_t ret;
-   u64 last;
-
-   /*
-* Empirically, a fence (of type that depends on the CPU)
-* before rdtsc is enough to ensure that rdtsc is ordered
-* with respect to loads.  The various CPU manuals are unclear
-* as to whether rdtsc can be reordered with later loads,
-* but no one has ever seen it happen.
-*/
-   rdtsc_barrier();
-   ret = (cycle_t)vget_cycles();
-
-   last = VVAR(vsyscall_gtod_data).clock.cycle_last;
-
-   if (likely(ret = last))
-   return ret;
+   return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 
HPET_COUNTER);
+}
 
-   /*
-* GCC likes to generate cmov here, but this branch is extremely
-* predictable (it's just a funciton of time and the likely is
-* very likely) and there's a data dependence, so force GCC
-* to generate a branch instead.  I don't barrier() because
-* we don't actually need a barrier, and if this function
-* ever gets inlined it will generate worse code.
-*/
-   asm volatile ();
-   return last;
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+   long ret;
+   asm(syscall : =a (ret) :
+   0 (__NR_clock_gettime), D (clock), S (ts) : memory);
+   return ret;
 }
 
-static notrace cycle_t vread_hpet(void)
+notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
 {
-   return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 
HPET_COUNTER);
+   long ret;
+
+   asm(syscall : =a (ret) :
+   0 (__NR_gettimeofday), D (tv), S (tz) : memory);
+   return ret;
 }
 
 #ifdef CONFIG_PARAVIRT_CLOCK
@@ -133,23 +118,37 @@ static notrace cycle_t vread_pvclock(int *mode)
 }
 #endif
 
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+notrace static cycle_t vread_tsc(void)
 {
-   long ret;
-   asm(syscall : =a (ret) :
-   0 (__NR_clock_gettime),D (clock), S (ts) : memory);
-   return ret;
-}
+   cycle_t ret;
+   u64 last;
 
-notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
-{
-   long ret;
+   /*
+* Empirically, a fence (of type that depends on the CPU)
+* before rdtsc is enough to ensure that rdtsc is ordered
+* with respect to loads.  The various CPU manuals are unclear
+* as to whether rdtsc can be reordered with later loads,
+* but no one has ever seen it happen.
+*/
+   rdtsc_barrier();
+   ret = (cycle_t)vget_cycles();
 
-   asm(syscall : =a (ret) :
-   0 (__NR_gettimeofday), D (tv), S (tz) : memory);
-   return ret;
-}
+   last = VVAR(vsyscall_gtod_data).clock.cycle_last;
 
+   if (likely(ret = last))
+   return ret;
+
+   /*
+* GCC likes to generate cmov here, but this branch is extremely
+* predictable (it's just a funciton of time and the likely is
+* very likely) and there's a data dependence, so force GCC
+* to generate a branch instead.  I don't barrier() because
+* we don't actually need a barrier, and if this function
+* ever gets inlined it will generate worse code.
+*/
+   asm volatile ();
+   return last;
+}
 
 notrace static inline u64 vgetsns(int *mode)
 {
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Add 32 bit VDSO time support for 64 bit kernel

2014-03-05 Thread tip-bot for Stefani Seibold
Commit-ID:  821130f9335f5808968fedab50268ba9159cc330
Gitweb: http://git.kernel.org/tip/821130f9335f5808968fedab50268ba9159cc330
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 3 Mar 2014 22:12:21 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Wed, 5 Mar 2014 14:02:38 -0800

x86, vdso: Add 32 bit VDSO time support for 64 bit kernel

This patch add the VDSO time support for the IA32 Emulation Layer.

Due the nature of the kernel headers and the LP64 compiler where the
size of a long and a pointer differs against a 32 bit compiler, there
is some type hacking necessary for optimal performance.

The vsyscall_gtod_data struture must be a rearranged to serve 32- and
64-bit code access at the same time:

- The seqcount_t was replaced by an unsigned, this makes the
  vsyscall_gtod_data intedepend of kernel configuration and internal functions.
- All kernel internal structures are replaced by fix size elements
  which works for 32- and 64-bit access
- The inner struct clock was removed to pack the whole struct.

The unsigned seq would be handled by functions derivated from seqcount_t.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1393881143-3569-11-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/include/asm/vgtod.h  | 71 +--
 arch/x86/include/asm/vvar.h   |  5 ++
 arch/x86/kernel/vsyscall_gtod.c   | 34 -
 arch/x86/vdso/vclock_gettime.c| 91 +++
 arch/x86/vdso/vdso32/vclock_gettime.c | 21 
 5 files changed, 155 insertions(+), 67 deletions(-)

diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 46e24d3..3c3366c 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -1,30 +1,73 @@
 #ifndef _ASM_X86_VGTOD_H
 #define _ASM_X86_VGTOD_H
 
-#include asm/vsyscall.h
+#include linux/compiler.h
 #include linux/clocksource.h
 
+#ifdef BUILD_VDSO32_64
+typedef u64 gtod_long_t;
+#else
+typedef unsigned long gtod_long_t;
+#endif
+/*
+ * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
+ * so be carefull by modifying this structure.
+ */
 struct vsyscall_gtod_data {
-   seqcount_t  seq;
+   unsigned seq;
 
-   struct { /* extract of a clocksource struct */
-   int vclock_mode;
-   cycle_t cycle_last;
-   cycle_t mask;
-   u32 mult;
-   u32 shift;
-   } clock;
+   int vclock_mode;
+   cycle_t cycle_last;
+   cycle_t mask;
+   u32 mult;
+   u32 shift;
 
/* open coded 'struct timespec' */
-   time_t  wall_time_sec;
u64 wall_time_snsec;
+   gtod_long_t wall_time_sec;
+   gtod_long_t monotonic_time_sec;
u64 monotonic_time_snsec;
-   time_t  monotonic_time_sec;
+   gtod_long_t wall_time_coarse_sec;
+   gtod_long_t wall_time_coarse_nsec;
+   gtod_long_t monotonic_time_coarse_sec;
+   gtod_long_t monotonic_time_coarse_nsec;
 
-   struct timezone sys_tz;
-   struct timespec wall_time_coarse;
-   struct timespec monotonic_time_coarse;
+   int tz_minuteswest;
+   int tz_dsttime;
 };
 extern struct vsyscall_gtod_data vsyscall_gtod_data;
 
+static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
+{
+   unsigned ret;
+
+repeat:
+   ret = ACCESS_ONCE(s-seq);
+   if (unlikely(ret  1)) {
+   cpu_relax();
+   goto repeat;
+   }
+   smp_rmb();
+   return ret;
+}
+
+static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
+   unsigned start)
+{
+   smp_rmb();
+   return unlikely(s-seq != start);
+}
+
+static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
+{
+   ++s-seq;
+   smp_wmb();
+}
+
+static inline void gtod_write_end(struct vsyscall_gtod_data *s)
+{
+   smp_wmb();
+   ++s-seq;
+}
+
 #endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 52c79ff..081d909 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -16,6 +16,9 @@
  * you mess up, the linker will catch it.)
  */
 
+#ifndef _ASM_X86_VVAR_H
+#define _ASM_X86_VVAR_H
+
 #if defined(__VVAR_KERNEL_LDS)
 
 /* The kernel linker script defines its own magic to put vvars in the
@@ -64,3 +67,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode)
 DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
 
 #undef DECLARE_VVAR
+
+#endif
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index b5a943d..f9c6e56 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -4,6 +4,7 @@
  *
  *  Modified for x86 32 bit architecture by
  *  Stefani Seibold

[tip:x86/vdso] x86, vdso: vclock_gettime.c __vdso_clock_gettime cleanup

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  3b19f50facf0488e193ebae00b864fdaeeb25dbb
Gitweb: http://git.kernel.org/tip/3b19f50facf0488e193ebae00b864fdaeeb25dbb
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Sun, 16 Feb 2014 22:52:42 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Sun, 16 Feb 2014 15:06:47 -0800

x86, vdso: vclock_gettime.c __vdso_clock_gettime cleanup

This patch is a small code cleanup for the __vdso_clock_gettime()
function.

It removes the unneeded return values from do_monotonic_coarse() and
do_realtime_coarse() and add a fallback label for doing the kernel
gettimeofday() system call.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1392587568-7325-5-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/vdso/vclock_gettime.c | 27 ++-
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index bbc8065..fd074dd 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -209,7 +209,7 @@ notrace static int do_monotonic(struct timespec *ts)
return mode;
 }
 
-notrace static int do_realtime_coarse(struct timespec *ts)
+notrace static void do_realtime_coarse(struct timespec *ts)
 {
unsigned long seq;
do {
@@ -217,10 +217,9 @@ notrace static int do_realtime_coarse(struct timespec *ts)
ts-tv_sec = gtod-wall_time_coarse.tv_sec;
ts-tv_nsec = gtod-wall_time_coarse.tv_nsec;
} while (unlikely(read_seqcount_retry(gtod-seq, seq)));
-   return 0;
 }
 
-notrace static int do_monotonic_coarse(struct timespec *ts)
+notrace static void do_monotonic_coarse(struct timespec *ts)
 {
unsigned long seq;
do {
@@ -228,30 +227,32 @@ notrace static int do_monotonic_coarse(struct timespec 
*ts)
ts-tv_sec = gtod-monotonic_time_coarse.tv_sec;
ts-tv_nsec = gtod-monotonic_time_coarse.tv_nsec;
} while (unlikely(read_seqcount_retry(gtod-seq, seq)));
-
-   return 0;
 }
 
 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 {
-   int ret = VCLOCK_NONE;
-
switch (clock) {
case CLOCK_REALTIME:
-   ret = do_realtime(ts);
+   if (do_realtime(ts) == VCLOCK_NONE)
+   goto fallback;
break;
case CLOCK_MONOTONIC:
-   ret = do_monotonic(ts);
+   if (do_monotonic(ts) == VCLOCK_NONE)
+   goto fallback;
break;
case CLOCK_REALTIME_COARSE:
-   return do_realtime_coarse(ts);
+   do_realtime_coarse(ts);
+   break;
case CLOCK_MONOTONIC_COARSE:
-   return do_monotonic_coarse(ts);
+   do_monotonic_coarse(ts);
+   break;
+   default:
+   goto fallback;
}
 
-   if (ret == VCLOCK_NONE)
-   return vdso_fallback_gettime(clock, ts);
return 0;
+fallback:
+   return vdso_fallback_gettime(clock, ts);
 }
 int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias(__vdso_clock_gettime)));
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] mm: Add new func _install_special_mapping() to mmap.c

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  48be0cb586e850e3ff5c37fe9339f233f9c893e4
Gitweb: http://git.kernel.org/tip/48be0cb586e850e3ff5c37fe9339f233f9c893e4
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Sun, 16 Feb 2014 22:52:40 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Sun, 16 Feb 2014 15:04:23 -0800

mm: Add new func _install_special_mapping() to mmap.c

The _install_special_mapping() is the new base function for
install_special_mapping(). This function will return a pointer of the
created VMA or a error code in an ERR_PTR().

This new function will be needed by the for the x86 vdso 32-bit
support to map the additonal vvar and hpet pages into the 32 bit
address space. This will be done with io_remap_pfn_range() and
remap_pfn_range, which requieres a vm_area_struct.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1392587568-7325-3-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 include/linux/mm.h |  3 +++
 mm/mmap.c  | 20 
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f28f46e..55342aa 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1740,6 +1740,9 @@ extern void set_mm_exe_file(struct mm_struct *mm, struct 
file *new_exe_file);
 extern struct file *get_mm_exe_file(struct mm_struct *mm);
 
 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
+extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
+  unsigned long addr, unsigned long len,
+  unsigned long flags, struct page **pages);
 extern int install_special_mapping(struct mm_struct *mm,
   unsigned long addr, unsigned long len,
   unsigned long flags, struct page **pages);
diff --git a/mm/mmap.c b/mm/mmap.c
index 20ff0c3..81ba54f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2918,7 +2918,7 @@ static const struct vm_operations_struct 
special_mapping_vmops = {
  * The array pointer and the pages it points to are assumed to stay alive
  * for as long as this mapping might exist.
  */
-int install_special_mapping(struct mm_struct *mm,
+struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long vm_flags, struct page **pages)
 {
@@ -2927,7 +2927,7 @@ int install_special_mapping(struct mm_struct *mm,
 
vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (unlikely(vma == NULL))
-   return -ENOMEM;
+   return ERR_PTR(-ENOMEM);
 
INIT_LIST_HEAD(vma-anon_vma_chain);
vma-vm_mm = mm;
@@ -2948,11 +2948,23 @@ int install_special_mapping(struct mm_struct *mm,
 
perf_event_mmap(vma);
 
-   return 0;
+   return vma;
 
 out:
kmem_cache_free(vm_area_cachep, vma);
-   return ret;
+   return ERR_PTR(ret);
+}
+
+int install_special_mapping(struct mm_struct *mm,
+   unsigned long addr, unsigned long len,
+   unsigned long vm_flags, struct page **pages)
+{
+   struct vm_area_struct *vma = _install_special_mapping(mm,
+   addr, len, vm_flags, pages);
+
+   if (IS_ERR(vma))
+   return PTR_ERR(vma);
+   return 0;
 }
 
 static DEFINE_MUTEX(mm_all_locks_mutex);
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Cleanup __vdso_gettimeofday()

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  bada923abe5d8b015efe0e49ca47f76af853972d
Gitweb: http://git.kernel.org/tip/bada923abe5d8b015efe0e49ca47f76af853972d
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Sun, 16 Feb 2014 22:52:44 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Sun, 16 Feb 2014 15:07:31 -0800

x86, vdso: Cleanup __vdso_gettimeofday()

This patch do a little cleanup for the __vdso_gettimeofday() function.

It kicks out an unneeded ret local variable and makes the code faster
if only the timezone is needed.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1392587568-7325-7-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/vdso/vclock_gettime.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 743f277..09dae4a 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -259,13 +259,12 @@ int clock_gettime(clockid_t, struct timespec *)
 
 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
-   long ret = VCLOCK_NONE;
-
if (likely(tv != NULL)) {
BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
 offsetof(struct timespec, tv_nsec) ||
 sizeof(*tv) != sizeof(struct timespec));
-   ret = do_realtime((struct timespec *)tv);
+   if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
+   return vdso_fallback_gtod(tv, tz);
tv-tv_usec /= 1000;
}
if (unlikely(tz != NULL)) {
@@ -274,8 +273,6 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct 
timezone *tz)
tz-tz_dsttime = gtod-sys_tz.tz_dsttime;
}
 
-   if (ret == VCLOCK_NONE)
-   return vdso_fallback_gtod(tv, tz);
return 0;
 }
 int gettimeofday(struct timeval *, struct timezone *)
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Replace VVAR(vsyscall_gtod_data) by the gtod macro

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  d3e68e3e3fed760169cef2fa95e73551f5d24022
Gitweb: http://git.kernel.org/tip/d3e68e3e3fed760169cef2fa95e73551f5d24022
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Sun, 16 Feb 2014 22:52:43 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Sun, 16 Feb 2014 15:07:01 -0800

x86, vdso: Replace VVAR(vsyscall_gtod_data) by the gtod macro

There a currently more than 30 users of the gtod macro, so replace the
last VVAR(vsyscall_gtod_data) by gtod macro.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1392587568-7325-6-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/vdso/vclock_gettime.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index fd074dd..743f277 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -109,7 +109,7 @@ static notrace cycle_t vread_pvclock(int *mode)
*mode = VCLOCK_NONE;
 
/* refer to tsc.c read_tsc() comment for rationale */
-   last = VVAR(vsyscall_gtod_data).clock.cycle_last;
+   last = gtod-clock.cycle_last;
 
if (likely(ret = last))
return ret;
@@ -133,7 +133,7 @@ notrace static cycle_t vread_tsc(void)
rdtsc_barrier();
ret = (cycle_t)vget_cycles();
 
-   last = VVAR(vsyscall_gtod_data).clock.cycle_last;
+   last = gtod-clock.cycle_last;
 
if (likely(ret = last))
return ret;
@@ -288,7 +288,7 @@ int gettimeofday(struct timeval *, struct timezone *)
 notrace time_t __vdso_time(time_t *t)
 {
/* This is atomic on x86_64 so we don't need any locks. */
-   time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec);
+   time_t result = ACCESS_ONCE(gtod-wall_time_sec);
 
if (t)
*t = result;
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Introduce VVAR marco for vdso32

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  995106bc0373be03295aa6e0e380dd33a3a37ea4
Gitweb: http://git.kernel.org/tip/995106bc0373be03295aa6e0e380dd33a3a37ea4
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Sun, 16 Feb 2014 22:52:45 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Sun, 16 Feb 2014 15:07:45 -0800

x86, vdso: Introduce VVAR marco for vdso32

This patch revamps vvar.h for introduce the VVAR macro for vdso32.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1392587568-7325-8-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/include/asm/vvar.h | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 0a534ea..52c79ff 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -26,6 +26,15 @@
 
 #else
 
+#ifdef BUILD_VDSO32
+
+#define DECLARE_VVAR(offset, type, name)   \
+   extern type vvar_ ## name __attribute__((visibility(hidden)));
+
+#define VVAR(name) (vvar_ ## name)
+
+#else
+
 extern char __vvar_page;
 
 /* Base address of vvars.  This is not ABI. */
@@ -39,12 +48,13 @@ extern char __vvar_page;
static type const * const vvaraddr_ ## name =   \
(void *)(VVAR_ADDRESS + (offset));
 
+#define VVAR(name) (*vvaraddr_ ## name)
+#endif
+
 #define DEFINE_VVAR(type, name)
\
type name   \
__attribute__((section(.vvar_ #name), aligned(16))) __visible
 
-#define VVAR(name) (*vvaraddr_ ## name)
-
 #endif
 
 /* DECLARE_VVAR(offset, type, name) */
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Add 32-bit VDSO time support for the 32-bit kernel

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  feea5bae36ba8fcd7095e1b23cc2c537f4d24562
Gitweb: http://git.kernel.org/tip/feea5bae36ba8fcd7095e1b23cc2c537f4d24562
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Sun, 16 Feb 2014 22:52:46 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Sun, 16 Feb 2014 15:08:18 -0800

x86, vdso: Add 32-bit VDSO time support for the 32-bit kernel

This patch add the time support for the 32-bit VDSO to the 32 bit
kernel.

For 32-bit programs running on a 32-bit kernel, the same mechanism is
used as for 64-bit programs running on a 64-bit kernel.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1392587568-7325-9-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/include/asm/vdso.h   |  3 ++
 arch/x86/include/asm/vdso32.h | 11 ++
 arch/x86/vdso/Makefile|  8 
 arch/x86/vdso/vclock_gettime.c| 74 ---
 arch/x86/vdso/vdso-layout.lds.S   | 22 +++
 arch/x86/vdso/vdso32-setup.c  | 53 ++---
 arch/x86/vdso/vdso32/vclock_gettime.c | 35 +
 arch/x86/vdso/vdso32/vdso32.lds.S |  9 +
 8 files changed, 203 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index fddb53d..fe3cef9 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -2,6 +2,9 @@
 #define _ASM_X86_VDSO_H
 
 #if defined CONFIG_X86_32 || defined CONFIG_COMPAT
+
+#include asm/vdso32.h
+
 extern const char VDSO32_PRELINK[];
 
 /*
diff --git a/arch/x86/include/asm/vdso32.h b/arch/x86/include/asm/vdso32.h
new file mode 100644
index 000..7efb701
--- /dev/null
+++ b/arch/x86/include/asm/vdso32.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_X86_VDSO32_H
+#define _ASM_X86_VDSO32_H
+
+#define VDSO_BASE_PAGE 0
+#define VDSO_VVAR_PAGE 1
+#define VDSO_HPET_PAGE 2
+#define VDSO_PAGES 3
+#define VDSO_PREV_PAGES2
+#define VDSO_OFFSET(x) ((x) * PAGE_SIZE)
+
+#endif
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index fd14be1..92daaa6 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -145,8 +145,16 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
 $(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
 $(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
 
+KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=3 -freg-struct-return -fpic
+$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+
 $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
 $(obj)/vdso32/vdso32.lds \
+$(obj)/vdso32/vclock_gettime.o \
 $(obj)/vdso32/note.o \
 $(obj)/vdso32/%.o
$(call if_changed,vdso)
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 09dae4a..fcbc974 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -4,6 +4,9 @@
  *
  * Fast user context implementation of clock_gettime, gettimeofday, and time.
  *
+ * 32 Bit compat layer by Stefani Seibold stef...@seibold.net
+ *  sponsored by Rohde  Schwarz GmbH  Co. KG Munich/Germany
+ *
  * The code should have no internal unresolved relocations.
  * Check with readelf after changing.
  */
@@ -12,13 +15,11 @@
 #define DISABLE_BRANCH_PROFILING
 
 #include linux/kernel.h
-#include linux/posix-timers.h
-#include linux/time.h
+#include uapi/linux/time.h
 #include linux/string.h
 #include asm/vsyscall.h
 #include asm/fixmap.h
 #include asm/vgtod.h
-#include asm/timex.h
 #include asm/hpet.h
 #include asm/unistd.h
 #include asm/io.h
@@ -26,6 +27,12 @@
 
 #define gtod (VVAR(vsyscall_gtod_data))
 
+extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
+extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
+extern time_t __vdso_time(time_t *t);
+
+#ifndef BUILD_VDSO32
+
 static notrace cycle_t vread_hpet(void)
 {
return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 
HPET_COUNTER);
@@ -118,6 +125,59 @@ static notrace cycle_t vread_pvclock(int *mode)
 }
 #endif
 
+#else
+
+extern u8 hpet_page
+   __attribute__((visibility(hidden)));
+
+#ifdef CONFIG_HPET_TIMER
+static notrace cycle_t vread_hpet(void)
+{
+   return readl((const void __iomem *)(hpet_page + HPET_COUNTER));
+}
+#endif
+
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+   long ret;
+
+   asm(
+   mov %%ebx, %%edx \n
+   mov %2, %%ebx \n
+   call VDSO32_vsyscall \n
+   mov %%edx, %%ebx \n

[tip:x86/vdso] x86, vdso: Add 32-bit VDSO time support for the 64-bit kernel

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  249adfe2c86766eaa739d342525e55a96bf9efa7
Gitweb: http://git.kernel.org/tip/249adfe2c86766eaa739d342525e55a96bf9efa7
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Sun, 16 Feb 2014 22:52:47 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Sun, 16 Feb 2014 15:08:29 -0800

x86, vdso: Add 32-bit VDSO time support for the 64-bit kernel

This patch add the VDSO time support for the IA32 Emulation Layer.

Due the nature of the kernel headers and the LP64 compiler where the
size of a long and a pointer differs against a 32 bit compiler, there
is some type hacking necessary for optimal performance.

The vsyscall_gtod_data struture must be a rearranged to serve 32- and
64-bit code access at the same time:

- The seqcount_t was replaced by an unsigned, this makes the
  vsyscall_gtod_data intedepend of kernel configuration and internal functions.
- All kernel internal structures are replaced by fix size elements
  which works for 32- and 64-bit access
- The inner struct clock was removed to pack the whole struct.

The unsigned seq would be handled by functions derivated from seqcount_t.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1392587568-7325-10-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/include/asm/vgtod.h  | 69 ---
 arch/x86/include/asm/vvar.h   |  5 +++
 arch/x86/kernel/vsyscall_gtod.c   | 34 +++--
 arch/x86/vdso/vclock_gettime.c| 68 +-
 arch/x86/vdso/vdso32/vclock_gettime.c | 33 +
 5 files changed, 149 insertions(+), 60 deletions(-)

diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 46e24d3..abb9e45 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -4,27 +4,70 @@
 #include asm/vsyscall.h
 #include linux/clocksource.h
 
+#ifdef CONFIG_X86_64
+typedef u64 gtod_long_t;
+#else
+typedef u32 gtod_long_t;
+#endif
+/*
+ * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
+ * so be carefull by modifying this structure.
+ */
 struct vsyscall_gtod_data {
-   seqcount_t  seq;
+   unsigned seq;
 
-   struct { /* extract of a clocksource struct */
-   int vclock_mode;
-   cycle_t cycle_last;
-   cycle_t mask;
-   u32 mult;
-   u32 shift;
-   } clock;
+   int vclock_mode;
+   cycle_t cycle_last;
+   cycle_t mask;
+   u32 mult;
+   u32 shift;
 
/* open coded 'struct timespec' */
-   time_t  wall_time_sec;
u64 wall_time_snsec;
+   gtod_long_t wall_time_sec;
+   gtod_long_t monotonic_time_sec;
u64 monotonic_time_snsec;
-   time_t  monotonic_time_sec;
+   gtod_long_t wall_time_coarse_sec;
+   gtod_long_t wall_time_coarse_nsec;
+   gtod_long_t monotonic_time_coarse_sec;
+   gtod_long_t monotonic_time_coarse_nsec;
 
-   struct timezone sys_tz;
-   struct timespec wall_time_coarse;
-   struct timespec monotonic_time_coarse;
+   int tz_minuteswest;
+   int tz_dsttime;
 };
 extern struct vsyscall_gtod_data vsyscall_gtod_data;
 
+static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
+{
+   unsigned ret;
+
+repeat:
+   ret = ACCESS_ONCE(s-seq);
+   if (unlikely(ret  1)) {
+   cpu_relax();
+   goto repeat;
+   }
+   smp_rmb();
+   return ret;
+}
+
+static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
+   unsigned start)
+{
+   smp_rmb();
+   return unlikely(s-seq != start);
+}
+
+static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
+{
+   ++s-seq;
+   smp_wmb();
+}
+
+static inline void gtod_write_end(struct vsyscall_gtod_data *s)
+{
+   smp_wmb();
+   ++s-seq;
+}
+
 #endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 52c79ff..081d909 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -16,6 +16,9 @@
  * you mess up, the linker will catch it.)
  */
 
+#ifndef _ASM_X86_VVAR_H
+#define _ASM_X86_VVAR_H
+
 #if defined(__VVAR_KERNEL_LDS)
 
 /* The kernel linker script defines its own magic to put vvars in the
@@ -64,3 +67,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode)
 DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
 
 #undef DECLARE_VVAR
+
+#endif
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index b5a943d..973dcc4 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -4,6 +4,7 @@
  *
  *  Modified for x86 32 bit architecture by
  *  Stefani Seibold stef...@seibold.net
+ *  sponsored by Rohde  Schwarz GmbH  Co. KG

[tip:x86/vdso] x86, vdso: Make vsyscall_gtod_data handling x86 generic

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  0d3ad8c4e6246637b289c22dfe12e3dbae516aef
Gitweb: http://git.kernel.org/tip/0d3ad8c4e6246637b289c22dfe12e3dbae516aef
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Sun, 16 Feb 2014 22:52:39 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Sun, 16 Feb 2014 15:04:06 -0800

x86, vdso: Make vsyscall_gtod_data handling x86 generic

This patch move the vsyscall_gtod_data handling out of vsyscall_64.c
into an additonal file vsyscall_gtod.c to make the functionality
available for the x86 32-bit kernel.

It also adds a new vsyscall_32.c which sets up the VVAR page.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1392587568-7325-2-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/Kconfig   |  4 +--
 arch/x86/include/asm/clocksource.h |  4 ---
 arch/x86/include/asm/fixmap.h  |  2 ++
 arch/x86/include/asm/vvar.h| 12 ++--
 arch/x86/kernel/Makefile   |  3 +-
 arch/x86/kernel/hpet.c |  4 ---
 arch/x86/kernel/setup.c|  2 --
 arch/x86/kernel/tsc.c  |  2 --
 arch/x86/kernel/vmlinux.lds.S  |  3 --
 arch/x86/kernel/vsyscall_32.c  | 20 +
 arch/x86/kernel/vsyscall_64.c  | 45 -
 arch/x86/kernel/vsyscall_gtod.c| 59 ++
 arch/x86/tools/relocs.c|  2 +-
 13 files changed, 95 insertions(+), 67 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0af5250..0da3b39 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -107,9 +107,9 @@ config X86
select HAVE_ARCH_SOFT_DIRTY
select CLOCKSOURCE_WATCHDOG
select GENERIC_CLOCKEVENTS
-   select ARCH_CLOCKSOURCE_DATA if X86_64
+   select ARCH_CLOCKSOURCE_DATA
select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32  
X86_LOCAL_APIC)
-   select GENERIC_TIME_VSYSCALL if X86_64
+   select GENERIC_TIME_VSYSCALL
select KTIME_SCALAR if X86_32
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
diff --git a/arch/x86/include/asm/clocksource.h 
b/arch/x86/include/asm/clocksource.h
index 16a57f4..eda81dc 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -3,8 +3,6 @@
 #ifndef _ASM_X86_CLOCKSOURCE_H
 #define _ASM_X86_CLOCKSOURCE_H
 
-#ifdef CONFIG_X86_64
-
 #define VCLOCK_NONE 0  /* No vDSO clock available. */
 #define VCLOCK_TSC  1  /* vDSO should use vread_tsc.   */
 #define VCLOCK_HPET 2  /* vDSO should use vread_hpet.  */
@@ -14,6 +12,4 @@ struct arch_clocksource_data {
int vclock_mode;
 };
 
-#endif /* CONFIG_X86_64 */
-
 #endif /* _ASM_X86_CLOCKSOURCE_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 7252cd3..094d0cc 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -75,6 +75,8 @@ enum fixed_addresses {
 #ifdef CONFIG_X86_32
FIX_HOLE,
FIX_VDSO,
+   VVAR_PAGE,
+   VSYSCALL_HPET,
 #else
VSYSCALL_LAST_PAGE,
VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index d76ac40..0a534ea 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -16,9 +16,6 @@
  * you mess up, the linker will catch it.)
  */
 
-/* Base address of vvars.  This is not ABI. */
-#define VVAR_ADDRESS (-10*1024*1024 - 4096)
-
 #if defined(__VVAR_KERNEL_LDS)
 
 /* The kernel linker script defines its own magic to put vvars in the
@@ -29,6 +26,15 @@
 
 #else
 
+extern char __vvar_page;
+
+/* Base address of vvars.  This is not ABI. */
+#ifdef CONFIG_X86_64
+#define VVAR_ADDRESS (-10*1024*1024 - 4096)
+#else
+#define VVAR_ADDRESS (__vvar_page)
+#endif
+
 #define DECLARE_VVAR(offset, type, name)   \
static type const * const vvaraddr_ ## name =   \
(void *)(VVAR_ADDRESS + (offset));
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index cb648c8..3282eda 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -26,7 +26,8 @@ obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-y  += probe_roms.o
 obj-$(CONFIG_X86_32)   += i386_ksyms_32.o
 obj-$(CONFIG_X86_64)   += sys_x86_64.o x8664_ksyms_64.o
-obj-y  += syscall_$(BITS).o
+obj-y  += syscall_$(BITS).o vsyscall_gtod.o
+obj-$(CONFIG_X86_32)   += vsyscall_32.o
 obj-$(CONFIG_X86_64)   += vsyscall_64.o
 obj-$(CONFIG_X86_64)   += vsyscall_emu_64.o
 obj-$(CONFIG_SYSFS)+= ksysfs.o
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index da85a8e..54263f0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -74,9 +74,7 @@ static inline void hpet_writel(unsigned int d, unsigned int a)
 static inline void hpet_set_mapping(void)
 {
hpet_virt_address = ioremap_nocache(hpet_address

[tip:x86/vdso] x86, vdso: Revamp vclock_gettime.c

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  0b20a1f58d3502a8dfec98a8926f26c43429bee7
Gitweb: http://git.kernel.org/tip/0b20a1f58d3502a8dfec98a8926f26c43429bee7
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Sun, 16 Feb 2014 22:52:41 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Sun, 16 Feb 2014 15:06:39 -0800

x86, vdso: Revamp vclock_gettime.c

This intermediate patch revamps the vclock_gettime.c by moving some
functions around.  This is only code movement, to make the whole
32-bit vdso timer patchset easier to review.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1392587568-7325-4-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/vdso/vclock_gettime.c | 85 +-
 1 file changed, 42 insertions(+), 43 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index eb5d7a5..bbc8065 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -26,41 +26,26 @@
 
 #define gtod (VVAR(vsyscall_gtod_data))
 
-notrace static cycle_t vread_tsc(void)
+static notrace cycle_t vread_hpet(void)
 {
-   cycle_t ret;
-   u64 last;
-
-   /*
-* Empirically, a fence (of type that depends on the CPU)
-* before rdtsc is enough to ensure that rdtsc is ordered
-* with respect to loads.  The various CPU manuals are unclear
-* as to whether rdtsc can be reordered with later loads,
-* but no one has ever seen it happen.
-*/
-   rdtsc_barrier();
-   ret = (cycle_t)vget_cycles();
-
-   last = VVAR(vsyscall_gtod_data).clock.cycle_last;
-
-   if (likely(ret = last))
-   return ret;
+   return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 
HPET_COUNTER);
+}
 
-   /*
-* GCC likes to generate cmov here, but this branch is extremely
-* predictable (it's just a funciton of time and the likely is
-* very likely) and there's a data dependence, so force GCC
-* to generate a branch instead.  I don't barrier() because
-* we don't actually need a barrier, and if this function
-* ever gets inlined it will generate worse code.
-*/
-   asm volatile ();
-   return last;
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+   long ret;
+   asm(syscall : =a (ret) :
+   0 (__NR_clock_gettime), D (clock), S (ts) : memory);
+   return ret;
 }
 
-static notrace cycle_t vread_hpet(void)
+notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
 {
-   return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 
HPET_COUNTER);
+   long ret;
+
+   asm(syscall : =a (ret) :
+   0 (__NR_gettimeofday), D (tv), S (tz) : memory);
+   return ret;
 }
 
 #ifdef CONFIG_PARAVIRT_CLOCK
@@ -133,23 +118,37 @@ static notrace cycle_t vread_pvclock(int *mode)
 }
 #endif
 
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+notrace static cycle_t vread_tsc(void)
 {
-   long ret;
-   asm(syscall : =a (ret) :
-   0 (__NR_clock_gettime),D (clock), S (ts) : memory);
-   return ret;
-}
+   cycle_t ret;
+   u64 last;
 
-notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
-{
-   long ret;
+   /*
+* Empirically, a fence (of type that depends on the CPU)
+* before rdtsc is enough to ensure that rdtsc is ordered
+* with respect to loads.  The various CPU manuals are unclear
+* as to whether rdtsc can be reordered with later loads,
+* but no one has ever seen it happen.
+*/
+   rdtsc_barrier();
+   ret = (cycle_t)vget_cycles();
 
-   asm(syscall : =a (ret) :
-   0 (__NR_gettimeofday), D (tv), S (tz) : memory);
-   return ret;
-}
+   last = VVAR(vsyscall_gtod_data).clock.cycle_last;
 
+   if (likely(ret = last))
+   return ret;
+
+   /*
+* GCC likes to generate cmov here, but this branch is extremely
+* predictable (it's just a funciton of time and the likely is
+* very likely) and there's a data dependence, so force GCC
+* to generate a branch instead.  I don't barrier() because
+* we don't actually need a barrier, and if this function
+* ever gets inlined it will generate worse code.
+*/
+   asm volatile ();
+   return last;
+}
 
 notrace static inline u64 vgetsns(int *mode)
 {
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Add 32-bit VDSO time support for the 64-bit kernel

2014-02-17 Thread tip-bot for Stefani Seibold
Commit-ID:  43fdf391fa08779fa60658b38a6ecee0a509b4a0
Gitweb: http://git.kernel.org/tip/43fdf391fa08779fa60658b38a6ecee0a509b4a0
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 17 Feb 2014 11:03:43 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Mon, 17 Feb 2014 07:54:43 -0800

x86, vdso: Add 32-bit VDSO time support for the 64-bit kernel

This patch add the VDSO time support for the IA32 Emulation Layer.

Due the nature of the kernel headers and the LP64 compiler where the
size of a long and a pointer differs against a 32 bit compiler, there
is some type hacking necessary for optimal performance.

The vsyscall_gtod_data struture must be a rearranged to serve 32- and
64-bit code access at the same time:

- The seqcount_t was replaced by an unsigned, this makes the
  vsyscall_gtod_data intedepend of kernel configuration and internal functions.
- All kernel internal structures are replaced by fix size elements
  which works for 32- and 64-bit access
- The inner struct clock was removed to pack the whole struct.

The unsigned seq would be handled by functions derivated from seqcount_t.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1392631424-32205-10-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/include/asm/vgtod.h  | 71 ++--
 arch/x86/include/asm/vvar.h   |  5 ++
 arch/x86/kernel/vsyscall_gtod.c   | 35 +-
 arch/x86/vdso/vclock_gettime.c| 89 +++
 arch/x86/vdso/vdso32/vclock_gettime.c | 19 
 5 files changed, 152 insertions(+), 67 deletions(-)

diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 46e24d3..3c3366c 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -1,30 +1,73 @@
 #ifndef _ASM_X86_VGTOD_H
 #define _ASM_X86_VGTOD_H
 
-#include asm/vsyscall.h
+#include linux/compiler.h
 #include linux/clocksource.h
 
+#ifdef BUILD_VDSO32_64
+typedef u64 gtod_long_t;
+#else
+typedef unsigned long gtod_long_t;
+#endif
+/*
+ * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
+ * so be carefull by modifying this structure.
+ */
 struct vsyscall_gtod_data {
-   seqcount_t  seq;
+   unsigned seq;
 
-   struct { /* extract of a clocksource struct */
-   int vclock_mode;
-   cycle_t cycle_last;
-   cycle_t mask;
-   u32 mult;
-   u32 shift;
-   } clock;
+   int vclock_mode;
+   cycle_t cycle_last;
+   cycle_t mask;
+   u32 mult;
+   u32 shift;
 
/* open coded 'struct timespec' */
-   time_t  wall_time_sec;
u64 wall_time_snsec;
+   gtod_long_t wall_time_sec;
+   gtod_long_t monotonic_time_sec;
u64 monotonic_time_snsec;
-   time_t  monotonic_time_sec;
+   gtod_long_t wall_time_coarse_sec;
+   gtod_long_t wall_time_coarse_nsec;
+   gtod_long_t monotonic_time_coarse_sec;
+   gtod_long_t monotonic_time_coarse_nsec;
 
-   struct timezone sys_tz;
-   struct timespec wall_time_coarse;
-   struct timespec monotonic_time_coarse;
+   int tz_minuteswest;
+   int tz_dsttime;
 };
 extern struct vsyscall_gtod_data vsyscall_gtod_data;
 
+static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
+{
+   unsigned ret;
+
+repeat:
+   ret = ACCESS_ONCE(s-seq);
+   if (unlikely(ret  1)) {
+   cpu_relax();
+   goto repeat;
+   }
+   smp_rmb();
+   return ret;
+}
+
+static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
+   unsigned start)
+{
+   smp_rmb();
+   return unlikely(s-seq != start);
+}
+
+static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
+{
+   ++s-seq;
+   smp_wmb();
+}
+
+static inline void gtod_write_end(struct vsyscall_gtod_data *s)
+{
+   smp_wmb();
+   ++s-seq;
+}
+
 #endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 52c79ff..081d909 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -16,6 +16,9 @@
  * you mess up, the linker will catch it.)
  */
 
+#ifndef _ASM_X86_VVAR_H
+#define _ASM_X86_VVAR_H
+
 #if defined(__VVAR_KERNEL_LDS)
 
 /* The kernel linker script defines its own magic to put vvars in the
@@ -64,3 +67,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode)
 DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
 
 #undef DECLARE_VVAR
+
+#endif
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index b5a943d..eacd12f 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -4,6 +4,7 @@
  *
  *  Modified for x86 32 bit architecture by
  *  Stefani

[tip:x86/vdso] x86, vdso: Add 32-bit VDSO time support for the 32-bit kernel

2014-02-17 Thread tip-bot for Stefani Seibold
Commit-ID:  bfd5846a1327fb82885eaac04fd4ef2385572325
Gitweb: http://git.kernel.org/tip/bfd5846a1327fb82885eaac04fd4ef2385572325
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 17 Feb 2014 11:03:42 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Mon, 17 Feb 2014 07:54:25 -0800

x86, vdso: Add 32-bit VDSO time support for the 32-bit kernel

This patch adds the time support for the 32-bit VDSO to the 32-bit kernel.

For 32-bit programs running on a 32-bit kernel, the same mechanism is
used as for 64-bit programs running on a 64-bit kernel.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1392631424-32205-9-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/include/asm/vdso.h   |  3 ++
 arch/x86/include/asm/vdso32.h | 11 ++
 arch/x86/vdso/Makefile|  8 
 arch/x86/vdso/vclock_gettime.c| 74 ---
 arch/x86/vdso/vdso-layout.lds.S   | 22 +++
 arch/x86/vdso/vdso32-setup.c  | 53 ++---
 arch/x86/vdso/vdso32/vclock_gettime.c |  3 ++
 arch/x86/vdso/vdso32/vdso32.lds.S |  9 +
 8 files changed, 171 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index fddb53d..fe3cef9 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -2,6 +2,9 @@
 #define _ASM_X86_VDSO_H
 
 #if defined CONFIG_X86_32 || defined CONFIG_COMPAT
+
+#include asm/vdso32.h
+
 extern const char VDSO32_PRELINK[];
 
 /*
diff --git a/arch/x86/include/asm/vdso32.h b/arch/x86/include/asm/vdso32.h
new file mode 100644
index 000..7efb701
--- /dev/null
+++ b/arch/x86/include/asm/vdso32.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_X86_VDSO32_H
+#define _ASM_X86_VDSO32_H
+
+#define VDSO_BASE_PAGE 0
+#define VDSO_VVAR_PAGE 1
+#define VDSO_HPET_PAGE 2
+#define VDSO_PAGES 3
+#define VDSO_PREV_PAGES2
+#define VDSO_OFFSET(x) ((x) * PAGE_SIZE)
+
+#endif
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index fd14be1..92daaa6 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -145,8 +145,16 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
 $(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
 $(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
 
+KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=3 -freg-struct-return -fpic
+$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+
 $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
 $(obj)/vdso32/vdso32.lds \
+$(obj)/vdso32/vclock_gettime.o \
 $(obj)/vdso32/note.o \
 $(obj)/vdso32/%.o
$(call if_changed,vdso)
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 09dae4a..fcbc974 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -4,6 +4,9 @@
  *
  * Fast user context implementation of clock_gettime, gettimeofday, and time.
  *
+ * 32 Bit compat layer by Stefani Seibold stef...@seibold.net
+ *  sponsored by Rohde  Schwarz GmbH  Co. KG Munich/Germany
+ *
  * The code should have no internal unresolved relocations.
  * Check with readelf after changing.
  */
@@ -12,13 +15,11 @@
 #define DISABLE_BRANCH_PROFILING
 
 #include linux/kernel.h
-#include linux/posix-timers.h
-#include linux/time.h
+#include uapi/linux/time.h
 #include linux/string.h
 #include asm/vsyscall.h
 #include asm/fixmap.h
 #include asm/vgtod.h
-#include asm/timex.h
 #include asm/hpet.h
 #include asm/unistd.h
 #include asm/io.h
@@ -26,6 +27,12 @@
 
 #define gtod (VVAR(vsyscall_gtod_data))
 
+extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
+extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
+extern time_t __vdso_time(time_t *t);
+
+#ifndef BUILD_VDSO32
+
 static notrace cycle_t vread_hpet(void)
 {
return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 
HPET_COUNTER);
@@ -118,6 +125,59 @@ static notrace cycle_t vread_pvclock(int *mode)
 }
 #endif
 
+#else
+
+extern u8 hpet_page
+   __attribute__((visibility(hidden)));
+
+#ifdef CONFIG_HPET_TIMER
+static notrace cycle_t vread_hpet(void)
+{
+   return readl((const void __iomem *)(hpet_page + HPET_COUNTER));
+}
+#endif
+
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+   long ret;
+
+   asm(
+   mov %%ebx, %%edx \n
+   mov %2, %%ebx \n
+   call VDSO32_vsyscall \n
+   mov %%edx, %%ebx \n

[tip:x86/vdso] x86, vdso: Add 32-bit VDSO time support for the 64-bit kernel

2014-02-17 Thread tip-bot for Stefani Seibold
Commit-ID:  8ec3d81889b5b10a832b6327e23293c1488c7810
Gitweb: http://git.kernel.org/tip/8ec3d81889b5b10a832b6327e23293c1488c7810
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 17 Feb 2014 11:03:43 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Mon, 17 Feb 2014 10:59:10 -0800

x86, vdso: Add 32-bit VDSO time support for the 64-bit kernel

This patch add the VDSO time support for the IA32 Emulation Layer.

Due the nature of the kernel headers and the LP64 compiler where the
size of a long and a pointer differs against a 32 bit compiler, there
is some type hacking necessary for optimal performance.

The vsyscall_gtod_data struture must be a rearranged to serve 32- and
64-bit code access at the same time:

- The seqcount_t was replaced by an unsigned, this makes the
  vsyscall_gtod_data intedepend of kernel configuration and internal functions.
- All kernel internal structures are replaced by fix size elements
  which works for 32- and 64-bit access
- The inner struct clock was removed to pack the whole struct.

The unsigned seq would be handled by functions derivated from seqcount_t.

[ hpa: folded fix to #undef CONFIG_SPARSEMEM_VMEMMAP ]

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1392631424-32205-10-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/include/asm/vgtod.h  | 71 ++--
 arch/x86/include/asm/vvar.h   |  5 ++
 arch/x86/kernel/vsyscall_gtod.c   | 35 +-
 arch/x86/vdso/vclock_gettime.c| 89 +++
 arch/x86/vdso/vdso32/vclock_gettime.c | 20 
 5 files changed, 153 insertions(+), 67 deletions(-)

diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 46e24d3..3c3366c 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -1,30 +1,73 @@
 #ifndef _ASM_X86_VGTOD_H
 #define _ASM_X86_VGTOD_H
 
-#include asm/vsyscall.h
+#include linux/compiler.h
 #include linux/clocksource.h
 
+#ifdef BUILD_VDSO32_64
+typedef u64 gtod_long_t;
+#else
+typedef unsigned long gtod_long_t;
+#endif
+/*
+ * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
+ * so be carefull by modifying this structure.
+ */
 struct vsyscall_gtod_data {
-   seqcount_t  seq;
+   unsigned seq;
 
-   struct { /* extract of a clocksource struct */
-   int vclock_mode;
-   cycle_t cycle_last;
-   cycle_t mask;
-   u32 mult;
-   u32 shift;
-   } clock;
+   int vclock_mode;
+   cycle_t cycle_last;
+   cycle_t mask;
+   u32 mult;
+   u32 shift;
 
/* open coded 'struct timespec' */
-   time_t  wall_time_sec;
u64 wall_time_snsec;
+   gtod_long_t wall_time_sec;
+   gtod_long_t monotonic_time_sec;
u64 monotonic_time_snsec;
-   time_t  monotonic_time_sec;
+   gtod_long_t wall_time_coarse_sec;
+   gtod_long_t wall_time_coarse_nsec;
+   gtod_long_t monotonic_time_coarse_sec;
+   gtod_long_t monotonic_time_coarse_nsec;
 
-   struct timezone sys_tz;
-   struct timespec wall_time_coarse;
-   struct timespec monotonic_time_coarse;
+   int tz_minuteswest;
+   int tz_dsttime;
 };
 extern struct vsyscall_gtod_data vsyscall_gtod_data;
 
+static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
+{
+   unsigned ret;
+
+repeat:
+   ret = ACCESS_ONCE(s-seq);
+   if (unlikely(ret  1)) {
+   cpu_relax();
+   goto repeat;
+   }
+   smp_rmb();
+   return ret;
+}
+
+static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
+   unsigned start)
+{
+   smp_rmb();
+   return unlikely(s-seq != start);
+}
+
+static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
+{
+   ++s-seq;
+   smp_wmb();
+}
+
+static inline void gtod_write_end(struct vsyscall_gtod_data *s)
+{
+   smp_wmb();
+   ++s-seq;
+}
+
 #endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 52c79ff..081d909 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -16,6 +16,9 @@
  * you mess up, the linker will catch it.)
  */
 
+#ifndef _ASM_X86_VVAR_H
+#define _ASM_X86_VVAR_H
+
 #if defined(__VVAR_KERNEL_LDS)
 
 /* The kernel linker script defines its own magic to put vvars in the
@@ -64,3 +67,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode)
 DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
 
 #undef DECLARE_VVAR
+
+#endif
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index b5a943d..eacd12f 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -4,6 +4,7

[tip:x86/vdso] x86, vdso: Add 32-bit VDSO time support for the 64-bit kernel

2014-02-19 Thread tip-bot for Stefani Seibold
Commit-ID:  0fc8a237cbe98a06962f5ea37d24fc2369e23c74
Gitweb: http://git.kernel.org/tip/0fc8a237cbe98a06962f5ea37d24fc2369e23c74
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Wed, 19 Feb 2014 10:09:10 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Wed, 19 Feb 2014 14:08:34 -0800

x86, vdso: Add 32-bit VDSO time support for the 64-bit kernel

This patch add the VDSO time support for the IA32 Emulation Layer.

Due the nature of the kernel headers and the LP64 compiler where the
size of a long and a pointer differs versus a 32-bit compiler, there
is some type hacking necessary for optimal performance.

The vsyscall_gtod_data struture must be a rearranged to serve 32- and
64-bit code access at the same time:

- The seqcount_t was replaced by an unsigned, this makes the
  vsyscall_gtod_data intedepend of kernel configuration and internal functions.
- All kernel internal structures are replaced by fix size elements
  which works for 32- and 64-bit access
- The inner struct clock was removed to pack the whole struct.

The unsigned seq would be handled by functions derivated from seqcount_t.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1392800951-2683-10-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/include/asm/vgtod.h  | 71 +--
 arch/x86/include/asm/vvar.h   |  5 ++
 arch/x86/kernel/vsyscall_gtod.c   | 35 +-
 arch/x86/vdso/vclock_gettime.c| 91 +++
 arch/x86/vdso/vdso32/vclock_gettime.c | 22 +
 5 files changed, 157 insertions(+), 67 deletions(-)

diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 46e24d3..3c3366c 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -1,30 +1,73 @@
 #ifndef _ASM_X86_VGTOD_H
 #define _ASM_X86_VGTOD_H
 
-#include asm/vsyscall.h
+#include linux/compiler.h
 #include linux/clocksource.h
 
+#ifdef BUILD_VDSO32_64
+typedef u64 gtod_long_t;
+#else
+typedef unsigned long gtod_long_t;
+#endif
+/*
+ * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
+ * so be carefull by modifying this structure.
+ */
 struct vsyscall_gtod_data {
-   seqcount_t  seq;
+   unsigned seq;
 
-   struct { /* extract of a clocksource struct */
-   int vclock_mode;
-   cycle_t cycle_last;
-   cycle_t mask;
-   u32 mult;
-   u32 shift;
-   } clock;
+   int vclock_mode;
+   cycle_t cycle_last;
+   cycle_t mask;
+   u32 mult;
+   u32 shift;
 
/* open coded 'struct timespec' */
-   time_t  wall_time_sec;
u64 wall_time_snsec;
+   gtod_long_t wall_time_sec;
+   gtod_long_t monotonic_time_sec;
u64 monotonic_time_snsec;
-   time_t  monotonic_time_sec;
+   gtod_long_t wall_time_coarse_sec;
+   gtod_long_t wall_time_coarse_nsec;
+   gtod_long_t monotonic_time_coarse_sec;
+   gtod_long_t monotonic_time_coarse_nsec;
 
-   struct timezone sys_tz;
-   struct timespec wall_time_coarse;
-   struct timespec monotonic_time_coarse;
+   int tz_minuteswest;
+   int tz_dsttime;
 };
 extern struct vsyscall_gtod_data vsyscall_gtod_data;
 
+static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
+{
+   unsigned ret;
+
+repeat:
+   ret = ACCESS_ONCE(s-seq);
+   if (unlikely(ret  1)) {
+   cpu_relax();
+   goto repeat;
+   }
+   smp_rmb();
+   return ret;
+}
+
+static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
+   unsigned start)
+{
+   smp_rmb();
+   return unlikely(s-seq != start);
+}
+
+static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
+{
+   ++s-seq;
+   smp_wmb();
+}
+
+static inline void gtod_write_end(struct vsyscall_gtod_data *s)
+{
+   smp_wmb();
+   ++s-seq;
+}
+
 #endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 52c79ff..081d909 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -16,6 +16,9 @@
  * you mess up, the linker will catch it.)
  */
 
+#ifndef _ASM_X86_VVAR_H
+#define _ASM_X86_VVAR_H
+
 #if defined(__VVAR_KERNEL_LDS)
 
 /* The kernel linker script defines its own magic to put vvars in the
@@ -64,3 +67,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode)
 DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
 
 #undef DECLARE_VVAR
+
+#endif
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index b5a943d..eacd12f 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -4,6 +4,7 @@
  *
  *  Modified for x86 32 bit architecture by
  *  Stefani

[tip:x86/vdso] x86, vdso: Do conditional fixmap of VVAR and HPET page

2014-02-19 Thread tip-bot for Stefani Seibold
Commit-ID:  6fa967bb8354fff2d5e116d97f4d83fdab644b67
Gitweb: http://git.kernel.org/tip/6fa967bb8354fff2d5e116d97f4d83fdab644b67
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 17 Feb 2014 11:03:44 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Wed, 19 Feb 2014 14:09:07 -0800

x86, vdso: Do conditional fixmap of VVAR and HPET page

This patch adds conditional fixmap of the VVAR and HPET pages for the
32-bit kernel.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1392631424-32205-11-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/include/asm/vdso.h   | 6 ++
 arch/x86/kernel/hpet.c| 5 +
 arch/x86/kernel/vsyscall_32.c | 4 
 arch/x86/vdso/vdso32-setup.c  | 6 --
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index fe3cef9..6db8b23 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -1,6 +1,12 @@
 #ifndef _ASM_X86_VDSO_H
 #define _ASM_X86_VDSO_H
 
+enum {
+   VDSO_DISABLED = 0,
+   VDSO_ENABLED = 1,
+   VDSO_COMPAT = 2,
+};
+
 #if defined CONFIG_X86_32 || defined CONFIG_COMPAT
 
 #include asm/vdso32.h
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 54263f0..b99544b 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -15,6 +15,7 @@
 #include asm/fixmap.h
 #include asm/hpet.h
 #include asm/time.h
+#include asm/elf.h
 
 #define HPET_MASK  CLOCKSOURCE_MASK(32)
 
@@ -74,6 +75,10 @@ static inline void hpet_writel(unsigned int d, unsigned int 
a)
 static inline void hpet_set_mapping(void)
 {
hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+#ifdef CONFIG_X86_32
+   if (vdso_enabled != VDSO_COMPAT)
+   return;
+#endif
__set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VVAR_NOCACHE);
 }
 
diff --git a/arch/x86/kernel/vsyscall_32.c b/arch/x86/kernel/vsyscall_32.c
index 4b94c47..0cbf94b 100644
--- a/arch/x86/kernel/vsyscall_32.c
+++ b/arch/x86/kernel/vsyscall_32.c
@@ -13,8 +13,12 @@
 #include asm/vsyscall.h
 #include asm/pgtable.h
 #include asm/fixmap.h
+#include asm/elf.h
 
 void __init map_vsyscall(void)
 {
+   if (vdso_enabled != VDSO_COMPAT)
+   return;
+
__set_fixmap(VVAR_PAGE, __pa_symbol(__vvar_page), PAGE_KERNEL_VVAR);
 }
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 9b57770..6b74a46 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -29,12 +29,6 @@
 #include asm/hpet.h
 #include asm/vvar.h
 
-enum {
-   VDSO_DISABLED = 0,
-   VDSO_ENABLED = 1,
-   VDSO_COMPAT = 2,
-};
-
 #ifdef CONFIG_COMPAT_VDSO
 #define VDSO_DEFAULT   VDSO_COMPAT
 #else
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] mm: Add new func _install_special_mapping() to mmap.c

2014-03-18 Thread tip-bot for Stefani Seibold
Commit-ID:  3935ed6a3a533c1736e3ca65bff72afd1773be27
Gitweb: http://git.kernel.org/tip/3935ed6a3a533c1736e3ca65bff72afd1773be27
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 17 Mar 2014 23:22:02 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Tue, 18 Mar 2014 12:51:56 -0700

mm: Add new func _install_special_mapping() to mmap.c

The _install_special_mapping() is the new base function for
install_special_mapping(). This function will return a pointer of the
created VMA or a error code in an ERR_PTR()

This new function will be needed by the for the vdso 32 bit support to map the
additonal vvar and hpet pages into the 32 bit address space. This will be done
with io_remap_pfn_range() and remap_pfn_range, which requieres a vm_area_struct.

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1395094933-14252-3-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 include/linux/mm.h |  3 +++
 mm/mmap.c  | 20 
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c1b7414..6c7fedf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1750,6 +1750,9 @@ extern void set_mm_exe_file(struct mm_struct *mm, struct 
file *new_exe_file);
 extern struct file *get_mm_exe_file(struct mm_struct *mm);
 
 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
+extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
+  unsigned long addr, unsigned long len,
+  unsigned long flags, struct page **pages);
 extern int install_special_mapping(struct mm_struct *mm,
   unsigned long addr, unsigned long len,
   unsigned long flags, struct page **pages);
diff --git a/mm/mmap.c b/mm/mmap.c
index 20ff0c3..81ba54f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2918,7 +2918,7 @@ static const struct vm_operations_struct 
special_mapping_vmops = {
  * The array pointer and the pages it points to are assumed to stay alive
  * for as long as this mapping might exist.
  */
-int install_special_mapping(struct mm_struct *mm,
+struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long vm_flags, struct page **pages)
 {
@@ -2927,7 +2927,7 @@ int install_special_mapping(struct mm_struct *mm,
 
vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (unlikely(vma == NULL))
-   return -ENOMEM;
+   return ERR_PTR(-ENOMEM);
 
INIT_LIST_HEAD(vma-anon_vma_chain);
vma-vm_mm = mm;
@@ -2948,11 +2948,23 @@ int install_special_mapping(struct mm_struct *mm,
 
perf_event_mmap(vma);
 
-   return 0;
+   return vma;
 
 out:
kmem_cache_free(vm_area_cachep, vma);
-   return ret;
+   return ERR_PTR(ret);
+}
+
+int install_special_mapping(struct mm_struct *mm,
+   unsigned long addr, unsigned long len,
+   unsigned long vm_flags, struct page **pages)
+{
+   struct vm_area_struct *vma = _install_special_mapping(mm,
+   addr, len, vm_flags, pages);
+
+   if (IS_ERR(vma))
+   return PTR_ERR(vma);
+   return 0;
 }
 
 static DEFINE_MUTEX(mm_all_locks_mutex);
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Add 32 bit VDSO time support for 64 bit kernel

2014-03-18 Thread tip-bot for Stefani Seibold
Commit-ID:  7c03156f34d113f885f045d8fb8cc3efd9e64751
Gitweb: http://git.kernel.org/tip/7c03156f34d113f885f045d8fb8cc3efd9e64751
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 17 Mar 2014 23:22:10 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Tue, 18 Mar 2014 12:52:41 -0700

x86, vdso: Add 32 bit VDSO time support for 64 bit kernel

This patch add the VDSO time support for the IA32 Emulation Layer.

Due the nature of the kernel headers and the LP64 compiler where the
size of a long and a pointer differs against a 32 bit compiler, there
is some type hacking necessary for optimal performance.

The vsyscall_gtod_data struture must be a rearranged to serve 32- and
64-bit code access at the same time:

- The seqcount_t was replaced by an unsigned, this makes the
  vsyscall_gtod_data intedepend of kernel configuration and internal functions.
- All kernel internal structures are replaced by fix size elements
  which works for 32- and 64-bit access
- The inner struct clock was removed to pack the whole struct.

The unsigned seq would be handled by functions derivated from seqcount_t.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1395094933-14252-11-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/include/asm/vgtod.h  | 71 +--
 arch/x86/include/asm/vvar.h   |  5 ++
 arch/x86/kernel/vsyscall_gtod.c   | 34 -
 arch/x86/vdso/vclock_gettime.c| 91 +++
 arch/x86/vdso/vdso32/vclock_gettime.c | 21 
 5 files changed, 155 insertions(+), 67 deletions(-)

diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 46e24d3..3c3366c 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -1,30 +1,73 @@
 #ifndef _ASM_X86_VGTOD_H
 #define _ASM_X86_VGTOD_H
 
-#include asm/vsyscall.h
+#include linux/compiler.h
 #include linux/clocksource.h
 
+#ifdef BUILD_VDSO32_64
+typedef u64 gtod_long_t;
+#else
+typedef unsigned long gtod_long_t;
+#endif
+/*
+ * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
+ * so be carefull by modifying this structure.
+ */
 struct vsyscall_gtod_data {
-   seqcount_t  seq;
+   unsigned seq;
 
-   struct { /* extract of a clocksource struct */
-   int vclock_mode;
-   cycle_t cycle_last;
-   cycle_t mask;
-   u32 mult;
-   u32 shift;
-   } clock;
+   int vclock_mode;
+   cycle_t cycle_last;
+   cycle_t mask;
+   u32 mult;
+   u32 shift;
 
/* open coded 'struct timespec' */
-   time_t  wall_time_sec;
u64 wall_time_snsec;
+   gtod_long_t wall_time_sec;
+   gtod_long_t monotonic_time_sec;
u64 monotonic_time_snsec;
-   time_t  monotonic_time_sec;
+   gtod_long_t wall_time_coarse_sec;
+   gtod_long_t wall_time_coarse_nsec;
+   gtod_long_t monotonic_time_coarse_sec;
+   gtod_long_t monotonic_time_coarse_nsec;
 
-   struct timezone sys_tz;
-   struct timespec wall_time_coarse;
-   struct timespec monotonic_time_coarse;
+   int tz_minuteswest;
+   int tz_dsttime;
 };
 extern struct vsyscall_gtod_data vsyscall_gtod_data;
 
+static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
+{
+   unsigned ret;
+
+repeat:
+   ret = ACCESS_ONCE(s-seq);
+   if (unlikely(ret  1)) {
+   cpu_relax();
+   goto repeat;
+   }
+   smp_rmb();
+   return ret;
+}
+
+static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
+   unsigned start)
+{
+   smp_rmb();
+   return unlikely(s-seq != start);
+}
+
+static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
+{
+   ++s-seq;
+   smp_wmb();
+}
+
+static inline void gtod_write_end(struct vsyscall_gtod_data *s)
+{
+   smp_wmb();
+   ++s-seq;
+}
+
 #endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 52c79ff..081d909 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -16,6 +16,9 @@
  * you mess up, the linker will catch it.)
  */
 
+#ifndef _ASM_X86_VVAR_H
+#define _ASM_X86_VVAR_H
+
 #if defined(__VVAR_KERNEL_LDS)
 
 /* The kernel linker script defines its own magic to put vvars in the
@@ -64,3 +67,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode)
 DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
 
 #undef DECLARE_VVAR
+
+#endif
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index b5a943d..f9c6e56 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -4,6 +4,7 @@
  *
  *  Modified for x86 32 bit architecture by
  *  Stefani

[tip:x86/vdso] x86, vdso: Make vsyscall_gtod_data handling x86 generic

2014-03-18 Thread tip-bot for Stefani Seibold
Commit-ID:  d2312e3379d581d2c3603357a0181046448e1de3
Gitweb: http://git.kernel.org/tip/d2312e3379d581d2c3603357a0181046448e1de3
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 17 Mar 2014 23:22:01 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Tue, 18 Mar 2014 12:51:52 -0700

x86, vdso: Make vsyscall_gtod_data handling x86 generic

This patch move the vsyscall_gtod_data handling out of vsyscall_64.c
into an additonal file vsyscall_gtod.c to make the functionality
available for x86 32 bit kernel.

It also adds a new vsyscall_32.c which setup the VVAR page.

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1395094933-14252-2-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/Kconfig   |  4 +--
 arch/x86/include/asm/clocksource.h |  4 ---
 arch/x86/include/asm/vvar.h| 12 ++--
 arch/x86/kernel/Makefile   |  2 +-
 arch/x86/kernel/hpet.c |  2 --
 arch/x86/kernel/tsc.c  |  2 --
 arch/x86/kernel/vmlinux.lds.S  |  3 --
 arch/x86/kernel/vsyscall_64.c  | 45 -
 arch/x86/kernel/vsyscall_gtod.c| 59 ++
 arch/x86/tools/relocs.c|  2 +-
 10 files changed, 72 insertions(+), 63 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9122f6b..ab3ebc8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -107,9 +107,9 @@ config X86
select HAVE_ARCH_SOFT_DIRTY
select CLOCKSOURCE_WATCHDOG
select GENERIC_CLOCKEVENTS
-   select ARCH_CLOCKSOURCE_DATA if X86_64
+   select ARCH_CLOCKSOURCE_DATA
select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32  
X86_LOCAL_APIC)
-   select GENERIC_TIME_VSYSCALL if X86_64
+   select GENERIC_TIME_VSYSCALL
select KTIME_SCALAR if X86_32
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
diff --git a/arch/x86/include/asm/clocksource.h 
b/arch/x86/include/asm/clocksource.h
index 16a57f4..eda81dc 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -3,8 +3,6 @@
 #ifndef _ASM_X86_CLOCKSOURCE_H
 #define _ASM_X86_CLOCKSOURCE_H
 
-#ifdef CONFIG_X86_64
-
 #define VCLOCK_NONE 0  /* No vDSO clock available. */
 #define VCLOCK_TSC  1  /* vDSO should use vread_tsc.   */
 #define VCLOCK_HPET 2  /* vDSO should use vread_hpet.  */
@@ -14,6 +12,4 @@ struct arch_clocksource_data {
int vclock_mode;
 };
 
-#endif /* CONFIG_X86_64 */
-
 #endif /* _ASM_X86_CLOCKSOURCE_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index d76ac40..0a534ea 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -16,9 +16,6 @@
  * you mess up, the linker will catch it.)
  */
 
-/* Base address of vvars.  This is not ABI. */
-#define VVAR_ADDRESS (-10*1024*1024 - 4096)
-
 #if defined(__VVAR_KERNEL_LDS)
 
 /* The kernel linker script defines its own magic to put vvars in the
@@ -29,6 +26,15 @@
 
 #else
 
+extern char __vvar_page;
+
+/* Base address of vvars.  This is not ABI. */
+#ifdef CONFIG_X86_64
+#define VVAR_ADDRESS (-10*1024*1024 - 4096)
+#else
+#define VVAR_ADDRESS (__vvar_page)
+#endif
+
 #define DECLARE_VVAR(offset, type, name)   \
static type const * const vvaraddr_ ## name =   \
(void *)(VVAR_ADDRESS + (offset));
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index cb648c8..f4d9600 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -26,7 +26,7 @@ obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-y  += probe_roms.o
 obj-$(CONFIG_X86_32)   += i386_ksyms_32.o
 obj-$(CONFIG_X86_64)   += sys_x86_64.o x8664_ksyms_64.o
-obj-y  += syscall_$(BITS).o
+obj-y  += syscall_$(BITS).o vsyscall_gtod.o
 obj-$(CONFIG_X86_64)   += vsyscall_64.o
 obj-$(CONFIG_X86_64)   += vsyscall_emu_64.o
 obj-$(CONFIG_SYSFS)+= ksysfs.o
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index da85a8e..e4b86ab 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -752,9 +752,7 @@ static struct clocksource clocksource_hpet = {
.mask   = HPET_MASK,
.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
.resume = hpet_resume_counter,
-#ifdef CONFIG_X86_64
.archdata   = { .vclock_mode = VCLOCK_HPET },
-#endif
 };
 
 static int hpet_clocksource_register(void)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index cfbe99f..227dcfc 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -985,9 +985,7 @@ static struct clocksource clocksource_tsc = {
.mask   = CLOCKSOURCE_MASK(64),
.flags  = CLOCK_SOURCE_IS_CONTINUOUS |
  CLOCK_SOURCE_MUST_VERIFY

[tip:x86/vdso] x86, vdso: Replace VVAR(vsyscall_gtod_data) by gtod macro

2014-03-18 Thread tip-bot for Stefani Seibold
Commit-ID:  af8c93d8d9809c3cf71cae2c398069399e64efa3
Gitweb: http://git.kernel.org/tip/af8c93d8d9809c3cf71cae2c398069399e64efa3
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 17 Mar 2014 23:22:05 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Tue, 18 Mar 2014 12:52:03 -0700

x86, vdso: Replace VVAR(vsyscall_gtod_data) by gtod macro

There a currently more than 30 users of the gtod macro, so replace the
last VVAR(vsyscall_gtod_data) by gtod macro.

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1395094933-14252-6-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/vdso/vclock_gettime.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index fd074dd..743f277 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -109,7 +109,7 @@ static notrace cycle_t vread_pvclock(int *mode)
*mode = VCLOCK_NONE;
 
/* refer to tsc.c read_tsc() comment for rationale */
-   last = VVAR(vsyscall_gtod_data).clock.cycle_last;
+   last = gtod-clock.cycle_last;
 
if (likely(ret = last))
return ret;
@@ -133,7 +133,7 @@ notrace static cycle_t vread_tsc(void)
rdtsc_barrier();
ret = (cycle_t)vget_cycles();
 
-   last = VVAR(vsyscall_gtod_data).clock.cycle_last;
+   last = gtod-clock.cycle_last;
 
if (likely(ret = last))
return ret;
@@ -288,7 +288,7 @@ int gettimeofday(struct timeval *, struct timezone *)
 notrace time_t __vdso_time(time_t *t)
 {
/* This is atomic on x86_64 so we don't need any locks. */
-   time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec);
+   time_t result = ACCESS_ONCE(gtod-wall_time_sec);
 
if (t)
*t = result;
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Cleanup __vdso_gettimeofday()

2014-03-18 Thread tip-bot for Stefani Seibold
Commit-ID:  0df1ea2b7955d3cb311a549c44ed482452b859ff
Gitweb: http://git.kernel.org/tip/0df1ea2b7955d3cb311a549c44ed482452b859ff
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 17 Mar 2014 23:22:06 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Tue, 18 Mar 2014 12:52:26 -0700

x86, vdso: Cleanup __vdso_gettimeofday()

This patch cleans up the __vdso_gettimeofday() function a little.

It kicks out an unneeded ret local variable and makes the code faster
if only the timezone is needed (an admittedly rare case.)

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1395094933-14252-7-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/vdso/vclock_gettime.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 743f277..09dae4a 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -259,13 +259,12 @@ int clock_gettime(clockid_t, struct timespec *)
 
 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
-   long ret = VCLOCK_NONE;
-
if (likely(tv != NULL)) {
BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
 offsetof(struct timespec, tv_nsec) ||
 sizeof(*tv) != sizeof(struct timespec));
-   ret = do_realtime((struct timespec *)tv);
+   if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
+   return vdso_fallback_gtod(tv, tz);
tv-tv_usec /= 1000;
}
if (unlikely(tz != NULL)) {
@@ -274,8 +273,6 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct 
timezone *tz)
tz-tz_dsttime = gtod-sys_tz.tz_dsttime;
}
 
-   if (ret == VCLOCK_NONE)
-   return vdso_fallback_gtod(tv, tz);
return 0;
 }
 int gettimeofday(struct timeval *, struct timezone *)
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Revamp vclock_gettime.c

2014-03-18 Thread tip-bot for Stefani Seibold
Commit-ID:  411f790cd7e91fac0db80d3cf789cb6deeac298e
Gitweb: http://git.kernel.org/tip/411f790cd7e91fac0db80d3cf789cb6deeac298e
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 17 Mar 2014 23:22:03 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Tue, 18 Mar 2014 12:51:59 -0700

x86, vdso: Revamp vclock_gettime.c

This intermediate patch revamps the vclock_gettime.c by moving some functions
around. It is only for spliting purpose, to make whole the 32 bit vdso timer
patch easier to review.

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1395094933-14252-4-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/vdso/vclock_gettime.c | 85 +-
 1 file changed, 42 insertions(+), 43 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index eb5d7a5..bbc8065 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -26,41 +26,26 @@
 
 #define gtod (VVAR(vsyscall_gtod_data))
 
-notrace static cycle_t vread_tsc(void)
+static notrace cycle_t vread_hpet(void)
 {
-   cycle_t ret;
-   u64 last;
-
-   /*
-* Empirically, a fence (of type that depends on the CPU)
-* before rdtsc is enough to ensure that rdtsc is ordered
-* with respect to loads.  The various CPU manuals are unclear
-* as to whether rdtsc can be reordered with later loads,
-* but no one has ever seen it happen.
-*/
-   rdtsc_barrier();
-   ret = (cycle_t)vget_cycles();
-
-   last = VVAR(vsyscall_gtod_data).clock.cycle_last;
-
-   if (likely(ret = last))
-   return ret;
+   return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 
HPET_COUNTER);
+}
 
-   /*
-* GCC likes to generate cmov here, but this branch is extremely
-* predictable (it's just a funciton of time and the likely is
-* very likely) and there's a data dependence, so force GCC
-* to generate a branch instead.  I don't barrier() because
-* we don't actually need a barrier, and if this function
-* ever gets inlined it will generate worse code.
-*/
-   asm volatile ();
-   return last;
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+   long ret;
+   asm(syscall : =a (ret) :
+   0 (__NR_clock_gettime), D (clock), S (ts) : memory);
+   return ret;
 }
 
-static notrace cycle_t vread_hpet(void)
+notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
 {
-   return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 
HPET_COUNTER);
+   long ret;
+
+   asm(syscall : =a (ret) :
+   0 (__NR_gettimeofday), D (tv), S (tz) : memory);
+   return ret;
 }
 
 #ifdef CONFIG_PARAVIRT_CLOCK
@@ -133,23 +118,37 @@ static notrace cycle_t vread_pvclock(int *mode)
 }
 #endif
 
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+notrace static cycle_t vread_tsc(void)
 {
-   long ret;
-   asm(syscall : =a (ret) :
-   0 (__NR_clock_gettime),D (clock), S (ts) : memory);
-   return ret;
-}
+   cycle_t ret;
+   u64 last;
 
-notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
-{
-   long ret;
+   /*
+* Empirically, a fence (of type that depends on the CPU)
+* before rdtsc is enough to ensure that rdtsc is ordered
+* with respect to loads.  The various CPU manuals are unclear
+* as to whether rdtsc can be reordered with later loads,
+* but no one has ever seen it happen.
+*/
+   rdtsc_barrier();
+   ret = (cycle_t)vget_cycles();
 
-   asm(syscall : =a (ret) :
-   0 (__NR_gettimeofday), D (tv), S (tz) : memory);
-   return ret;
-}
+   last = VVAR(vsyscall_gtod_data).clock.cycle_last;
 
+   if (likely(ret = last))
+   return ret;
+
+   /*
+* GCC likes to generate cmov here, but this branch is extremely
+* predictable (it's just a funciton of time and the likely is
+* very likely) and there's a data dependence, so force GCC
+* to generate a branch instead.  I don't barrier() because
+* we don't actually need a barrier, and if this function
+* ever gets inlined it will generate worse code.
+*/
+   asm volatile ();
+   return last;
+}
 
 notrace static inline u64 vgetsns(int *mode)
 {
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso32: handle 32 bit vDSO larger one page

2014-03-18 Thread tip-bot for Stefani Seibold
Commit-ID:  4e40112c4ff6a577dd06d92b2a54cdf06265bf74
Gitweb: http://git.kernel.org/tip/4e40112c4ff6a577dd06d92b2a54cdf06265bf74
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 17 Mar 2014 23:22:13 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Tue, 18 Mar 2014 12:52:54 -0700

x86, vdso32: handle 32 bit vDSO larger one page

This patch enables 32 bit vDSO which are larger than a page.

Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1395094933-14252-14-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/vdso/vdso32-setup.c | 22 +++---
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index e10abdf..5b4aaef 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -16,6 +16,7 @@
 #include linux/mm.h
 #include linux/err.h
 #include linux/module.h
+#include linux/slab.h
 
 #include asm/cpufeature.h
 #include asm/msr.h
@@ -69,7 +70,8 @@ __setup_param(vdso=, vdso32_setup, vdso_setup, 0);
 EXPORT_SYMBOL_GPL(vdso_enabled);
 #endif
 
-static struct page *vdso32_pages[1];
+static struct page **vdso32_pages;
+static unsigned int vdso32_size;
 
 #ifdef CONFIG_X86_64
 
@@ -115,11 +117,10 @@ void enable_sep_cpu(void)
 
 int __init sysenter_setup(void)
 {
-   void *vdso_page = (void *)get_zeroed_page(GFP_ATOMIC);
+   void *vdso_pages;
const void *vdso;
size_t vdso_len;
-
-   vdso32_pages[0] = virt_to_page(vdso_page);
+   unsigned int i;
 
if (vdso32_syscall()) {
vdso = vdso32_syscall_start;
@@ -132,8 +133,15 @@ int __init sysenter_setup(void)
vdso_len = vdso32_int80_end - vdso32_int80_start;
}
 
-   memcpy(vdso_page, vdso, vdso_len);
-   patch_vdso32(vdso_page, vdso_len);
+   vdso32_size = (vdso_len + PAGE_SIZE - 1) / PAGE_SIZE;
+   vdso32_pages = kmalloc(sizeof(*vdso32_pages) * vdso32_size, GFP_ATOMIC);
+   vdso_pages = kmalloc(VDSO_OFFSET(vdso32_size), GFP_ATOMIC);
+
+   for(i = 0; i != vdso32_size; ++i)
+   vdso32_pages[i] = virt_to_page(vdso_pages + VDSO_OFFSET(i));
+
+   memcpy(vdso_pages, vdso, vdso_len);
+   patch_vdso32(vdso_pages, vdso_len);
 
return 0;
 }
@@ -169,7 +177,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, 
int uses_interp)
 */
ret = install_special_mapping(mm,
addr,
-   VDSO_OFFSET(VDSO_PAGES - VDSO_PREV_PAGES),
+   VDSO_OFFSET(vdso32_size),
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
vdso32_pages);
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Add 32 bit VDSO time support for 32 bit kernel

2014-03-18 Thread tip-bot for Stefani Seibold
Commit-ID:  7a59ed415f5b57469e22e41fc4188d5399e0b194
Gitweb: http://git.kernel.org/tip/7a59ed415f5b57469e22e41fc4188d5399e0b194
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 17 Mar 2014 23:22:09 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Tue, 18 Mar 2014 12:52:37 -0700

x86, vdso: Add 32 bit VDSO time support for 32 bit kernel

This patch add the time support for 32 bit a VDSO to a 32 bit kernel.

For 32 bit programs running on a 32 bit kernel, the same mechanism is
used as for 64 bit programs running on a 64 bit kernel.

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1395094933-14252-10-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/include/asm/vdso.h   |  5 +++
 arch/x86/include/asm/vdso32.h | 11 +
 arch/x86/vdso/Makefile|  8 
 arch/x86/vdso/vclock_gettime.c| 76 +++
 arch/x86/vdso/vdso-layout.lds.S   | 22 ++
 arch/x86/vdso/vdso32-setup.c  | 47 --
 arch/x86/vdso/vdso32/vclock_gettime.c |  9 +
 arch/x86/vdso/vdso32/vdso32.lds.S | 11 +
 8 files changed, 178 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index f8605e6..bde4359 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -2,6 +2,11 @@
 #define _ASM_X86_VDSO_H
 
 #if defined CONFIG_X86_32 || defined CONFIG_COMPAT
+
+#include asm/vdso32.h
+
+extern const char VDSO32_PRELINK[];
+
 /*
  * Given a pointer to the vDSO image, find the pointer to VDSO32_name
  * as that symbol is defined in the vDSO sources or linker script.
diff --git a/arch/x86/include/asm/vdso32.h b/arch/x86/include/asm/vdso32.h
new file mode 100644
index 000..7efb701
--- /dev/null
+++ b/arch/x86/include/asm/vdso32.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_X86_VDSO32_H
+#define _ASM_X86_VDSO32_H
+
+#define VDSO_BASE_PAGE 0
+#define VDSO_VVAR_PAGE 1
+#define VDSO_HPET_PAGE 2
+#define VDSO_PAGES 3
+#define VDSO_PREV_PAGES2
+#define VDSO_OFFSET(x) ((x) * PAGE_SIZE)
+
+#endif
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 7a3d13e..6cef7a1 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -146,8 +146,16 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
 $(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
 $(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
 
+KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
+$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+
 $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
 $(obj)/vdso32/vdso32.lds \
+$(obj)/vdso32/vclock_gettime.o \
 $(obj)/vdso32/note.o \
 $(obj)/vdso32/%.o
$(call if_changed,vdso)
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 09dae4a..90bb5e8 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -4,6 +4,9 @@
  *
  * Fast user context implementation of clock_gettime, gettimeofday, and time.
  *
+ * 32 Bit compat layer by Stefani Seibold stef...@seibold.net
+ *  sponsored by Rohde  Schwarz GmbH  Co. KG Munich/Germany
+ *
  * The code should have no internal unresolved relocations.
  * Check with readelf after changing.
  */
@@ -12,13 +15,11 @@
 #define DISABLE_BRANCH_PROFILING
 
 #include linux/kernel.h
-#include linux/posix-timers.h
-#include linux/time.h
+#include uapi/linux/time.h
 #include linux/string.h
 #include asm/vsyscall.h
 #include asm/fixmap.h
 #include asm/vgtod.h
-#include asm/timex.h
 #include asm/hpet.h
 #include asm/unistd.h
 #include asm/io.h
@@ -26,6 +27,12 @@
 
 #define gtod (VVAR(vsyscall_gtod_data))
 
+extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
+extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
+extern time_t __vdso_time(time_t *t);
+
+#ifndef BUILD_VDSO32
+
 static notrace cycle_t vread_hpet(void)
 {
return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 
HPET_COUNTER);
@@ -118,6 +125,59 @@ static notrace cycle_t vread_pvclock(int *mode)
 }
 #endif
 
+#else
+
+extern u8 hpet_page
+   __attribute__((visibility(hidden)));
+
+#ifdef CONFIG_HPET_TIMER
+static notrace cycle_t vread_hpet(void)
+{
+   return readl((const void __iomem *)(hpet_page + HPET_COUNTER));
+}
+#endif
+
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+   long ret;
+
+   asm

[tip:x86/vdso] x86, vdso: __vdso_clock_gettime() cleanup

2014-03-18 Thread tip-bot for Stefani Seibold
Commit-ID:  ce39c64028a075d14af32bfb8336bfe1370c0443
Gitweb: http://git.kernel.org/tip/ce39c64028a075d14af32bfb8336bfe1370c0443
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 17 Mar 2014 23:22:04 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Tue, 18 Mar 2014 12:52:01 -0700

x86, vdso: __vdso_clock_gettime() cleanup

This patch is a small code cleanup for the __vdso_clock_gettime() function.

It removes the unneeded return values from do_monotonic_coarse() and
do_realtime_coarse() and add a fallback label for doing the kernel
gettimeofday() system call.

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1395094933-14252-5-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/vdso/vclock_gettime.c | 27 ++-
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index bbc8065..fd074dd 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -209,7 +209,7 @@ notrace static int do_monotonic(struct timespec *ts)
return mode;
 }
 
-notrace static int do_realtime_coarse(struct timespec *ts)
+notrace static void do_realtime_coarse(struct timespec *ts)
 {
unsigned long seq;
do {
@@ -217,10 +217,9 @@ notrace static int do_realtime_coarse(struct timespec *ts)
ts-tv_sec = gtod-wall_time_coarse.tv_sec;
ts-tv_nsec = gtod-wall_time_coarse.tv_nsec;
} while (unlikely(read_seqcount_retry(gtod-seq, seq)));
-   return 0;
 }
 
-notrace static int do_monotonic_coarse(struct timespec *ts)
+notrace static void do_monotonic_coarse(struct timespec *ts)
 {
unsigned long seq;
do {
@@ -228,30 +227,32 @@ notrace static int do_monotonic_coarse(struct timespec 
*ts)
ts-tv_sec = gtod-monotonic_time_coarse.tv_sec;
ts-tv_nsec = gtod-monotonic_time_coarse.tv_nsec;
} while (unlikely(read_seqcount_retry(gtod-seq, seq)));
-
-   return 0;
 }
 
 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 {
-   int ret = VCLOCK_NONE;
-
switch (clock) {
case CLOCK_REALTIME:
-   ret = do_realtime(ts);
+   if (do_realtime(ts) == VCLOCK_NONE)
+   goto fallback;
break;
case CLOCK_MONOTONIC:
-   ret = do_monotonic(ts);
+   if (do_monotonic(ts) == VCLOCK_NONE)
+   goto fallback;
break;
case CLOCK_REALTIME_COARSE:
-   return do_realtime_coarse(ts);
+   do_realtime_coarse(ts);
+   break;
case CLOCK_MONOTONIC_COARSE:
-   return do_monotonic_coarse(ts);
+   do_monotonic_coarse(ts);
+   break;
+   default:
+   goto fallback;
}
 
-   if (ret == VCLOCK_NONE)
-   return vdso_fallback_gettime(clock, ts);
return 0;
+fallback:
+   return vdso_fallback_gettime(clock, ts);
 }
 int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias(__vdso_clock_gettime)));
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Introduce VVAR marco for vdso32

2014-03-18 Thread tip-bot for Stefani Seibold
Commit-ID:  ef721987aef0cc0abba08c88810f2155f76b0b1f
Gitweb: http://git.kernel.org/tip/ef721987aef0cc0abba08c88810f2155f76b0b1f
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Mon, 17 Mar 2014 23:22:07 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Tue, 18 Mar 2014 12:52:29 -0700

x86, vdso: Introduce VVAR marco for vdso32

This patch revamps the vvar.h for introduce the VVAR macro for vdso32.

Reviewed-by: Andy Lutomirski l...@amacapital.net
Signed-off-by: Stefani Seibold stef...@seibold.net
Link: 
http://lkml.kernel.org/r/1395094933-14252-8-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/include/asm/vvar.h | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 0a534ea..52c79ff 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -26,6 +26,15 @@
 
 #else
 
+#ifdef BUILD_VDSO32
+
+#define DECLARE_VVAR(offset, type, name)   \
+   extern type vvar_ ## name __attribute__((visibility(hidden)));
+
+#define VVAR(name) (vvar_ ## name)
+
+#else
+
 extern char __vvar_page;
 
 /* Base address of vvars.  This is not ABI. */
@@ -39,12 +48,13 @@ extern char __vvar_page;
static type const * const vvaraddr_ ## name =   \
(void *)(VVAR_ADDRESS + (offset));
 
+#define VVAR(name) (*vvaraddr_ ## name)
+#endif
+
 #define DEFINE_VVAR(type, name)
\
type name   \
__attribute__((section(.vvar_ #name), aligned(16))) __visible
 
-#define VVAR(name) (*vvaraddr_ ## name)
-
 #endif
 
 /* DECLARE_VVAR(offset, type, name) */
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Fix size of get_unmapped_area()

2014-03-24 Thread tip-bot for Stefani Seibold
Commit-ID:  645a387ecbdb4aa78c8451a66416340616134537
Gitweb: http://git.kernel.org/tip/645a387ecbdb4aa78c8451a66416340616134537
Author: Stefani Seibold stef...@seibold.net
AuthorDate: Sun, 23 Mar 2014 17:38:14 +0100
Committer:  H. Peter Anvin h...@linux.intel.com
CommitDate: Mon, 24 Mar 2014 09:31:23 -0700

x86, vdso: Fix size of get_unmapped_area()

The size of the reserved memory for a 32 bit vdso must be the size of the
32 bit vDSO in pages + HPET page + VVAR page.

One page is not enough for this. G silly copy and paste bug,
was right in previous patch.

Signed-off-by: Stefani Seibold stef...@seibold.net
Cc: Andy Lutomirski l...@amacapital.net
Link: 
http://lkml.kernel.org/r/1395592694-20571-1-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin h...@linux.intel.com
---
 arch/x86/vdso/vdso32-setup.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 791c1cb..0034898 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -165,12 +165,14 @@ int arch_setup_additional_pages(struct linux_binprm 
*bprm, int uses_interp)
 
down_write(mm-mmap_sem);
 
-   addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+   addr = get_unmapped_area(NULL, 0, vdso32_size + 
VDSO_OFFSET(VDSO_PREV_PAGES), 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
}
 
+   addr += VDSO_OFFSET(VDSO_PREV_PAGES);
+
current-mm-context.vdso = (void *)addr;
 
/*
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: vclock_gettime.c __vdso_clock_gettime cleanup

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  3b19f50facf0488e193ebae00b864fdaeeb25dbb
Gitweb: http://git.kernel.org/tip/3b19f50facf0488e193ebae00b864fdaeeb25dbb
Author: Stefani Seibold 
AuthorDate: Sun, 16 Feb 2014 22:52:42 +0100
Committer:  H. Peter Anvin 
CommitDate: Sun, 16 Feb 2014 15:06:47 -0800

x86, vdso: vclock_gettime.c __vdso_clock_gettime cleanup

This patch is a small code cleanup for the __vdso_clock_gettime()
function.

It removes the unneeded return values from do_monotonic_coarse() and
do_realtime_coarse() and add a fallback label for doing the kernel
gettimeofday() system call.

Signed-off-by: Stefani Seibold 
Link: 
http://lkml.kernel.org/r/1392587568-7325-5-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin 
---
 arch/x86/vdso/vclock_gettime.c | 27 ++-
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index bbc8065..fd074dd 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -209,7 +209,7 @@ notrace static int do_monotonic(struct timespec *ts)
return mode;
 }
 
-notrace static int do_realtime_coarse(struct timespec *ts)
+notrace static void do_realtime_coarse(struct timespec *ts)
 {
unsigned long seq;
do {
@@ -217,10 +217,9 @@ notrace static int do_realtime_coarse(struct timespec *ts)
ts->tv_sec = gtod->wall_time_coarse.tv_sec;
ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
} while (unlikely(read_seqcount_retry(>seq, seq)));
-   return 0;
 }
 
-notrace static int do_monotonic_coarse(struct timespec *ts)
+notrace static void do_monotonic_coarse(struct timespec *ts)
 {
unsigned long seq;
do {
@@ -228,30 +227,32 @@ notrace static int do_monotonic_coarse(struct timespec 
*ts)
ts->tv_sec = gtod->monotonic_time_coarse.tv_sec;
ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
} while (unlikely(read_seqcount_retry(>seq, seq)));
-
-   return 0;
 }
 
 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 {
-   int ret = VCLOCK_NONE;
-
switch (clock) {
case CLOCK_REALTIME:
-   ret = do_realtime(ts);
+   if (do_realtime(ts) == VCLOCK_NONE)
+   goto fallback;
break;
case CLOCK_MONOTONIC:
-   ret = do_monotonic(ts);
+   if (do_monotonic(ts) == VCLOCK_NONE)
+   goto fallback;
break;
case CLOCK_REALTIME_COARSE:
-   return do_realtime_coarse(ts);
+   do_realtime_coarse(ts);
+   break;
case CLOCK_MONOTONIC_COARSE:
-   return do_monotonic_coarse(ts);
+   do_monotonic_coarse(ts);
+   break;
+   default:
+   goto fallback;
}
 
-   if (ret == VCLOCK_NONE)
-   return vdso_fallback_gettime(clock, ts);
return 0;
+fallback:
+   return vdso_fallback_gettime(clock, ts);
 }
 int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias("__vdso_clock_gettime")));
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] mm: Add new func _install_special_mapping() to mmap.c

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  48be0cb586e850e3ff5c37fe9339f233f9c893e4
Gitweb: http://git.kernel.org/tip/48be0cb586e850e3ff5c37fe9339f233f9c893e4
Author: Stefani Seibold 
AuthorDate: Sun, 16 Feb 2014 22:52:40 +0100
Committer:  H. Peter Anvin 
CommitDate: Sun, 16 Feb 2014 15:04:23 -0800

mm: Add new func _install_special_mapping() to mmap.c

The _install_special_mapping() is the new base function for
install_special_mapping(). This function will return a pointer of the
created VMA or a error code in an ERR_PTR().

This new function will be needed by the for the x86 vdso 32-bit
support to map the additonal vvar and hpet pages into the 32 bit
address space. This will be done with io_remap_pfn_range() and
remap_pfn_range, which requieres a vm_area_struct.

Signed-off-by: Stefani Seibold 
Link: 
http://lkml.kernel.org/r/1392587568-7325-3-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin 
---
 include/linux/mm.h |  3 +++
 mm/mmap.c  | 20 
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f28f46e..55342aa 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1740,6 +1740,9 @@ extern void set_mm_exe_file(struct mm_struct *mm, struct 
file *new_exe_file);
 extern struct file *get_mm_exe_file(struct mm_struct *mm);
 
 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
+extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
+  unsigned long addr, unsigned long len,
+  unsigned long flags, struct page **pages);
 extern int install_special_mapping(struct mm_struct *mm,
   unsigned long addr, unsigned long len,
   unsigned long flags, struct page **pages);
diff --git a/mm/mmap.c b/mm/mmap.c
index 20ff0c3..81ba54f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2918,7 +2918,7 @@ static const struct vm_operations_struct 
special_mapping_vmops = {
  * The array pointer and the pages it points to are assumed to stay alive
  * for as long as this mapping might exist.
  */
-int install_special_mapping(struct mm_struct *mm,
+struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long vm_flags, struct page **pages)
 {
@@ -2927,7 +2927,7 @@ int install_special_mapping(struct mm_struct *mm,
 
vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (unlikely(vma == NULL))
-   return -ENOMEM;
+   return ERR_PTR(-ENOMEM);
 
INIT_LIST_HEAD(>anon_vma_chain);
vma->vm_mm = mm;
@@ -2948,11 +2948,23 @@ int install_special_mapping(struct mm_struct *mm,
 
perf_event_mmap(vma);
 
-   return 0;
+   return vma;
 
 out:
kmem_cache_free(vm_area_cachep, vma);
-   return ret;
+   return ERR_PTR(ret);
+}
+
+int install_special_mapping(struct mm_struct *mm,
+   unsigned long addr, unsigned long len,
+   unsigned long vm_flags, struct page **pages)
+{
+   struct vm_area_struct *vma = _install_special_mapping(mm,
+   addr, len, vm_flags, pages);
+
+   if (IS_ERR(vma))
+   return PTR_ERR(vma);
+   return 0;
 }
 
 static DEFINE_MUTEX(mm_all_locks_mutex);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Cleanup __vdso_gettimeofday()

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  bada923abe5d8b015efe0e49ca47f76af853972d
Gitweb: http://git.kernel.org/tip/bada923abe5d8b015efe0e49ca47f76af853972d
Author: Stefani Seibold 
AuthorDate: Sun, 16 Feb 2014 22:52:44 +0100
Committer:  H. Peter Anvin 
CommitDate: Sun, 16 Feb 2014 15:07:31 -0800

x86, vdso: Cleanup __vdso_gettimeofday()

This patch do a little cleanup for the __vdso_gettimeofday() function.

It kicks out an unneeded ret local variable and makes the code faster
if only the timezone is needed.

Signed-off-by: Stefani Seibold 
Link: 
http://lkml.kernel.org/r/1392587568-7325-7-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin 
---
 arch/x86/vdso/vclock_gettime.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 743f277..09dae4a 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -259,13 +259,12 @@ int clock_gettime(clockid_t, struct timespec *)
 
 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
-   long ret = VCLOCK_NONE;
-
if (likely(tv != NULL)) {
BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
 offsetof(struct timespec, tv_nsec) ||
 sizeof(*tv) != sizeof(struct timespec));
-   ret = do_realtime((struct timespec *)tv);
+   if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
+   return vdso_fallback_gtod(tv, tz);
tv->tv_usec /= 1000;
}
if (unlikely(tz != NULL)) {
@@ -274,8 +273,6 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct 
timezone *tz)
tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
}
 
-   if (ret == VCLOCK_NONE)
-   return vdso_fallback_gtod(tv, tz);
return 0;
 }
 int gettimeofday(struct timeval *, struct timezone *)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Replace VVAR(vsyscall_gtod_data) by the gtod macro

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  d3e68e3e3fed760169cef2fa95e73551f5d24022
Gitweb: http://git.kernel.org/tip/d3e68e3e3fed760169cef2fa95e73551f5d24022
Author: Stefani Seibold 
AuthorDate: Sun, 16 Feb 2014 22:52:43 +0100
Committer:  H. Peter Anvin 
CommitDate: Sun, 16 Feb 2014 15:07:01 -0800

x86, vdso: Replace VVAR(vsyscall_gtod_data) by the gtod macro

There a currently more than 30 users of the gtod macro, so replace the
last VVAR(vsyscall_gtod_data) by gtod macro.

Signed-off-by: Stefani Seibold 
Link: 
http://lkml.kernel.org/r/1392587568-7325-6-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin 
---
 arch/x86/vdso/vclock_gettime.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index fd074dd..743f277 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -109,7 +109,7 @@ static notrace cycle_t vread_pvclock(int *mode)
*mode = VCLOCK_NONE;
 
/* refer to tsc.c read_tsc() comment for rationale */
-   last = VVAR(vsyscall_gtod_data).clock.cycle_last;
+   last = gtod->clock.cycle_last;
 
if (likely(ret >= last))
return ret;
@@ -133,7 +133,7 @@ notrace static cycle_t vread_tsc(void)
rdtsc_barrier();
ret = (cycle_t)vget_cycles();
 
-   last = VVAR(vsyscall_gtod_data).clock.cycle_last;
+   last = gtod->clock.cycle_last;
 
if (likely(ret >= last))
return ret;
@@ -288,7 +288,7 @@ int gettimeofday(struct timeval *, struct timezone *)
 notrace time_t __vdso_time(time_t *t)
 {
/* This is atomic on x86_64 so we don't need any locks. */
-   time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec);
+   time_t result = ACCESS_ONCE(gtod->wall_time_sec);
 
if (t)
*t = result;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Introduce VVAR marco for vdso32

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  995106bc0373be03295aa6e0e380dd33a3a37ea4
Gitweb: http://git.kernel.org/tip/995106bc0373be03295aa6e0e380dd33a3a37ea4
Author: Stefani Seibold 
AuthorDate: Sun, 16 Feb 2014 22:52:45 +0100
Committer:  H. Peter Anvin 
CommitDate: Sun, 16 Feb 2014 15:07:45 -0800

x86, vdso: Introduce VVAR marco for vdso32

This patch revamps vvar.h for introduce the VVAR macro for vdso32.

Signed-off-by: Stefani Seibold 
Link: 
http://lkml.kernel.org/r/1392587568-7325-8-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin 
---
 arch/x86/include/asm/vvar.h | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 0a534ea..52c79ff 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -26,6 +26,15 @@
 
 #else
 
+#ifdef BUILD_VDSO32
+
+#define DECLARE_VVAR(offset, type, name)   \
+   extern type vvar_ ## name __attribute__((visibility("hidden")));
+
+#define VVAR(name) (vvar_ ## name)
+
+#else
+
 extern char __vvar_page;
 
 /* Base address of vvars.  This is not ABI. */
@@ -39,12 +48,13 @@ extern char __vvar_page;
static type const * const vvaraddr_ ## name =   \
(void *)(VVAR_ADDRESS + (offset));
 
+#define VVAR(name) (*vvaraddr_ ## name)
+#endif
+
 #define DEFINE_VVAR(type, name)
\
type name   \
__attribute__((section(".vvar_" #name), aligned(16))) __visible
 
-#define VVAR(name) (*vvaraddr_ ## name)
-
 #endif
 
 /* DECLARE_VVAR(offset, type, name) */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Add 32-bit VDSO time support for the 32-bit kernel

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  feea5bae36ba8fcd7095e1b23cc2c537f4d24562
Gitweb: http://git.kernel.org/tip/feea5bae36ba8fcd7095e1b23cc2c537f4d24562
Author: Stefani Seibold 
AuthorDate: Sun, 16 Feb 2014 22:52:46 +0100
Committer:  H. Peter Anvin 
CommitDate: Sun, 16 Feb 2014 15:08:18 -0800

x86, vdso: Add 32-bit VDSO time support for the 32-bit kernel

This patch add the time support for the 32-bit VDSO to the 32 bit
kernel.

For 32-bit programs running on a 32-bit kernel, the same mechanism is
used as for 64-bit programs running on a 64-bit kernel.

Signed-off-by: Stefani Seibold 
Link: 
http://lkml.kernel.org/r/1392587568-7325-9-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin 
---
 arch/x86/include/asm/vdso.h   |  3 ++
 arch/x86/include/asm/vdso32.h | 11 ++
 arch/x86/vdso/Makefile|  8 
 arch/x86/vdso/vclock_gettime.c| 74 ---
 arch/x86/vdso/vdso-layout.lds.S   | 22 +++
 arch/x86/vdso/vdso32-setup.c  | 53 ++---
 arch/x86/vdso/vdso32/vclock_gettime.c | 35 +
 arch/x86/vdso/vdso32/vdso32.lds.S |  9 +
 8 files changed, 203 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index fddb53d..fe3cef9 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -2,6 +2,9 @@
 #define _ASM_X86_VDSO_H
 
 #if defined CONFIG_X86_32 || defined CONFIG_COMPAT
+
+#include 
+
 extern const char VDSO32_PRELINK[];
 
 /*
diff --git a/arch/x86/include/asm/vdso32.h b/arch/x86/include/asm/vdso32.h
new file mode 100644
index 000..7efb701
--- /dev/null
+++ b/arch/x86/include/asm/vdso32.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_X86_VDSO32_H
+#define _ASM_X86_VDSO32_H
+
+#define VDSO_BASE_PAGE 0
+#define VDSO_VVAR_PAGE 1
+#define VDSO_HPET_PAGE 2
+#define VDSO_PAGES 3
+#define VDSO_PREV_PAGES2
+#define VDSO_OFFSET(x) ((x) * PAGE_SIZE)
+
+#endif
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index fd14be1..92daaa6 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -145,8 +145,16 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
 $(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
 $(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
 
+KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=3 -freg-struct-return -fpic
+$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+
 $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
 $(obj)/vdso32/vdso32.lds \
+$(obj)/vdso32/vclock_gettime.o \
 $(obj)/vdso32/note.o \
 $(obj)/vdso32/%.o
$(call if_changed,vdso)
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 09dae4a..fcbc974 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -4,6 +4,9 @@
  *
  * Fast user context implementation of clock_gettime, gettimeofday, and time.
  *
+ * 32 Bit compat layer by Stefani Seibold 
+ *  sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
+ *
  * The code should have no internal unresolved relocations.
  * Check with readelf after changing.
  */
@@ -12,13 +15,11 @@
 #define DISABLE_BRANCH_PROFILING
 
 #include 
-#include 
-#include 
+#include 
 #include 
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -26,6 +27,12 @@
 
 #define gtod ((vsyscall_gtod_data))
 
+extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
+extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
+extern time_t __vdso_time(time_t *t);
+
+#ifndef BUILD_VDSO32
+
 static notrace cycle_t vread_hpet(void)
 {
return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 
HPET_COUNTER);
@@ -118,6 +125,59 @@ static notrace cycle_t vread_pvclock(int *mode)
 }
 #endif
 
+#else
+
+extern u8 hpet_page
+   __attribute__((visibility("hidden")));
+
+#ifdef CONFIG_HPET_TIMER
+static notrace cycle_t vread_hpet(void)
+{
+   return readl((const void __iomem *)(_page + HPET_COUNTER));
+}
+#endif
+
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+   long ret;
+
+   asm(
+   "mov %%ebx, %%edx \n"
+   "mov %2, %%ebx \n"
+   "call VDSO32_vsyscall \n"
+   "mov %%edx, %%ebx \n"
+   : "=a" (ret)
+   : "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
+   : "memory", "edx");
+  

[tip:x86/vdso] x86, vdso: Add 32-bit VDSO time support for the 64-bit kernel

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  249adfe2c86766eaa739d342525e55a96bf9efa7
Gitweb: http://git.kernel.org/tip/249adfe2c86766eaa739d342525e55a96bf9efa7
Author: Stefani Seibold 
AuthorDate: Sun, 16 Feb 2014 22:52:47 +0100
Committer:  H. Peter Anvin 
CommitDate: Sun, 16 Feb 2014 15:08:29 -0800

x86, vdso: Add 32-bit VDSO time support for the 64-bit kernel

This patch add the VDSO time support for the IA32 Emulation Layer.

Due the nature of the kernel headers and the LP64 compiler where the
size of a long and a pointer differs against a 32 bit compiler, there
is some type hacking necessary for optimal performance.

The vsyscall_gtod_data struture must be a rearranged to serve 32- and
64-bit code access at the same time:

- The seqcount_t was replaced by an unsigned, this makes the
  vsyscall_gtod_data intedepend of kernel configuration and internal functions.
- All kernel internal structures are replaced by fix size elements
  which works for 32- and 64-bit access
- The inner struct clock was removed to pack the whole struct.

The "unsigned seq" would be handled by functions derivated from seqcount_t.

Signed-off-by: Stefani Seibold 
Link: 
http://lkml.kernel.org/r/1392587568-7325-10-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin 
---
 arch/x86/include/asm/vgtod.h  | 69 ---
 arch/x86/include/asm/vvar.h   |  5 +++
 arch/x86/kernel/vsyscall_gtod.c   | 34 +++--
 arch/x86/vdso/vclock_gettime.c| 68 +-
 arch/x86/vdso/vdso32/vclock_gettime.c | 33 +
 5 files changed, 149 insertions(+), 60 deletions(-)

diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 46e24d3..abb9e45 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -4,27 +4,70 @@
 #include 
 #include 
 
+#ifdef CONFIG_X86_64
+typedef u64 gtod_long_t;
+#else
+typedef u32 gtod_long_t;
+#endif
+/*
+ * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
+ * so be carefull by modifying this structure.
+ */
 struct vsyscall_gtod_data {
-   seqcount_t  seq;
+   unsigned seq;
 
-   struct { /* extract of a clocksource struct */
-   int vclock_mode;
-   cycle_t cycle_last;
-   cycle_t mask;
-   u32 mult;
-   u32 shift;
-   } clock;
+   int vclock_mode;
+   cycle_t cycle_last;
+   cycle_t mask;
+   u32 mult;
+   u32 shift;
 
/* open coded 'struct timespec' */
-   time_t  wall_time_sec;
u64 wall_time_snsec;
+   gtod_long_t wall_time_sec;
+   gtod_long_t monotonic_time_sec;
u64 monotonic_time_snsec;
-   time_t  monotonic_time_sec;
+   gtod_long_t wall_time_coarse_sec;
+   gtod_long_t wall_time_coarse_nsec;
+   gtod_long_t monotonic_time_coarse_sec;
+   gtod_long_t monotonic_time_coarse_nsec;
 
-   struct timezone sys_tz;
-   struct timespec wall_time_coarse;
-   struct timespec monotonic_time_coarse;
+   int tz_minuteswest;
+   int tz_dsttime;
 };
 extern struct vsyscall_gtod_data vsyscall_gtod_data;
 
+static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
+{
+   unsigned ret;
+
+repeat:
+   ret = ACCESS_ONCE(s->seq);
+   if (unlikely(ret & 1)) {
+   cpu_relax();
+   goto repeat;
+   }
+   smp_rmb();
+   return ret;
+}
+
+static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
+   unsigned start)
+{
+   smp_rmb();
+   return unlikely(s->seq != start);
+}
+
+static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
+{
+   ++s->seq;
+   smp_wmb();
+}
+
+static inline void gtod_write_end(struct vsyscall_gtod_data *s)
+{
+   smp_wmb();
+   ++s->seq;
+}
+
 #endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 52c79ff..081d909 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -16,6 +16,9 @@
  * you mess up, the linker will catch it.)
  */
 
+#ifndef _ASM_X86_VVAR_H
+#define _ASM_X86_VVAR_H
+
 #if defined(__VVAR_KERNEL_LDS)
 
 /* The kernel linker script defines its own magic to put vvars in the
@@ -64,3 +67,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode)
 DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
 
 #undef DECLARE_VVAR
+
+#endif
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index b5a943d..973dcc4 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -4,6 +4,7 @@
  *
  *  Modified for x86 32 bit architecture by
  *  Stefani Seibold 
+ *  sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
  *
  *  Thanks to h...@transmeta.com for some useful hint.
  *  Spe

[tip:x86/vdso] x86, vdso: Make vsyscall_gtod_data handling x86 generic

2014-02-16 Thread tip-bot for Stefani Seibold
Commit-ID:  0d3ad8c4e6246637b289c22dfe12e3dbae516aef
Gitweb: http://git.kernel.org/tip/0d3ad8c4e6246637b289c22dfe12e3dbae516aef
Author: Stefani Seibold 
AuthorDate: Sun, 16 Feb 2014 22:52:39 +0100
Committer:  H. Peter Anvin 
CommitDate: Sun, 16 Feb 2014 15:04:06 -0800

x86, vdso: Make vsyscall_gtod_data handling x86 generic

This patch move the vsyscall_gtod_data handling out of vsyscall_64.c
into an additonal file vsyscall_gtod.c to make the functionality
available for the x86 32-bit kernel.

It also adds a new vsyscall_32.c which sets up the VVAR page.

Signed-off-by: Stefani Seibold 
Link: 
http://lkml.kernel.org/r/1392587568-7325-2-git-send-email-stef...@seibold.net
Signed-off-by: H. Peter Anvin 
---
 arch/x86/Kconfig   |  4 +--
 arch/x86/include/asm/clocksource.h |  4 ---
 arch/x86/include/asm/fixmap.h  |  2 ++
 arch/x86/include/asm/vvar.h| 12 ++--
 arch/x86/kernel/Makefile   |  3 +-
 arch/x86/kernel/hpet.c |  4 ---
 arch/x86/kernel/setup.c|  2 --
 arch/x86/kernel/tsc.c  |  2 --
 arch/x86/kernel/vmlinux.lds.S  |  3 --
 arch/x86/kernel/vsyscall_32.c  | 20 +
 arch/x86/kernel/vsyscall_64.c  | 45 -
 arch/x86/kernel/vsyscall_gtod.c| 59 ++
 arch/x86/tools/relocs.c|  2 +-
 13 files changed, 95 insertions(+), 67 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0af5250..0da3b39 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -107,9 +107,9 @@ config X86
select HAVE_ARCH_SOFT_DIRTY
select CLOCKSOURCE_WATCHDOG
select GENERIC_CLOCKEVENTS
-   select ARCH_CLOCKSOURCE_DATA if X86_64
+   select ARCH_CLOCKSOURCE_DATA
select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && 
X86_LOCAL_APIC)
-   select GENERIC_TIME_VSYSCALL if X86_64
+   select GENERIC_TIME_VSYSCALL
select KTIME_SCALAR if X86_32
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
diff --git a/arch/x86/include/asm/clocksource.h 
b/arch/x86/include/asm/clocksource.h
index 16a57f4..eda81dc 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -3,8 +3,6 @@
 #ifndef _ASM_X86_CLOCKSOURCE_H
 #define _ASM_X86_CLOCKSOURCE_H
 
-#ifdef CONFIG_X86_64
-
 #define VCLOCK_NONE 0  /* No vDSO clock available. */
 #define VCLOCK_TSC  1  /* vDSO should use vread_tsc.   */
 #define VCLOCK_HPET 2  /* vDSO should use vread_hpet.  */
@@ -14,6 +12,4 @@ struct arch_clocksource_data {
int vclock_mode;
 };
 
-#endif /* CONFIG_X86_64 */
-
 #endif /* _ASM_X86_CLOCKSOURCE_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 7252cd3..094d0cc 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -75,6 +75,8 @@ enum fixed_addresses {
 #ifdef CONFIG_X86_32
FIX_HOLE,
FIX_VDSO,
+   VVAR_PAGE,
+   VSYSCALL_HPET,
 #else
VSYSCALL_LAST_PAGE,
VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index d76ac40..0a534ea 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -16,9 +16,6 @@
  * you mess up, the linker will catch it.)
  */
 
-/* Base address of vvars.  This is not ABI. */
-#define VVAR_ADDRESS (-10*1024*1024 - 4096)
-
 #if defined(__VVAR_KERNEL_LDS)
 
 /* The kernel linker script defines its own magic to put vvars in the
@@ -29,6 +26,15 @@
 
 #else
 
+extern char __vvar_page;
+
+/* Base address of vvars.  This is not ABI. */
+#ifdef CONFIG_X86_64
+#define VVAR_ADDRESS (-10*1024*1024 - 4096)
+#else
+#define VVAR_ADDRESS (&__vvar_page)
+#endif
+
 #define DECLARE_VVAR(offset, type, name)   \
static type const * const vvaraddr_ ## name =   \
(void *)(VVAR_ADDRESS + (offset));
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index cb648c8..3282eda 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -26,7 +26,8 @@ obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-y  += probe_roms.o
 obj-$(CONFIG_X86_32)   += i386_ksyms_32.o
 obj-$(CONFIG_X86_64)   += sys_x86_64.o x8664_ksyms_64.o
-obj-y  += syscall_$(BITS).o
+obj-y  += syscall_$(BITS).o vsyscall_gtod.o
+obj-$(CONFIG_X86_32)   += vsyscall_32.o
 obj-$(CONFIG_X86_64)   += vsyscall_64.o
 obj-$(CONFIG_X86_64)   += vsyscall_emu_64.o
 obj-$(CONFIG_SYSFS)+= ksysfs.o
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index da85a8e..54263f0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -74,9 +74,7 @@ static inline void hpet_writel(unsigned int d, unsigned int a)
 static inline void hpet_set_mapping(void)
 {
hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
-#ifdef CONFIG_X86_64
__set_fixmap(V

<    1   2   3   4   5   6   >