Re: [Xen-devel] [PATCH 1/7] introduce time managment in xtf

2018-04-09 Thread Roger Pau Monné
On Wed, Apr 04, 2018 at 03:50:48PM +, Pawel Wieczorkiewicz wrote:
> From: Paul Semel 
> 
> this file is introduce to be able to implement an inter domain
> communication protocol over xenstore. For synchronization purpose, we do
> really want to be able to "control" time
> 
> common/time.c: since_boot_time gets the time in nanoseconds from the
> moment the VM has booted
> 
> Signed-off-by: Paul Semel 
> 
> cr https://code.amazon.com/reviews/CR-786223

This seems like some Amazon internal tracking, which shouldn't be sent
upstream (the webpage is not even accessible from the outside).

> ---
>  build/files.mk |  1 +
>  common/time.c  | 87 
> ++
>  include/xtf/time.h | 35 ++
>  3 files changed, 123 insertions(+)
>  create mode 100644 common/time.c
>  create mode 100644 include/xtf/time.h
> 
> diff --git a/build/files.mk b/build/files.mk
> index 46b42d6..55ed1ca 100644
> --- a/build/files.mk
> +++ b/build/files.mk
> @@ -16,6 +16,7 @@ obj-perarch += $(ROOT)/common/libc/vsnprintf.o
>  obj-perarch += $(ROOT)/common/report.o
>  obj-perarch += $(ROOT)/common/setup.o
>  obj-perarch += $(ROOT)/common/xenbus.o
> +obj-perarch += $(ROOT)/common/time.o
>  
>  obj-perenv += $(ROOT)/arch/x86/decode.o
>  obj-perenv += $(ROOT)/arch/x86/desc.o
> diff --git a/common/time.c b/common/time.c
> new file mode 100644
> index 000..11ac168
> --- /dev/null
> +++ b/common/time.c
> @@ -0,0 +1,87 @@
> +#include 
> +#include 
> +#include 
> +
> +/* This function was taken from mini-os source code */
> +/* It returns ((delta << shift) * mul_frac) >> 32 */
> +static inline uint64_t scale_delta(uint64_t delta, uint32_t mul_frac, int 
> shift)
> +{
> +uint64_t product;
> +#ifdef __i386__
> +uint32_t tmp1, tmp2;
> +#endif
> +
> +if ( shift < 0 )
> +delta >>= -shift;
> +else
> +delta <<= shift;
> +
> +#ifdef __i386__
> +__asm__ (
> +"mul  %5   ; "
> +"mov  %4,%%eax ; "
> +"mov  %%edx,%4 ; "
> +"mul  %5   ; "
> +"add  %4,%%eax ; "
> +"xor  %5,%5; "
> +"adc  %5,%%edx ; "
> +: "=A" (product), "=r" (tmp1), "=r" (tmp2)
> +: "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)), "2" 
> (mul_frac) );
> +#else
> +__asm__ (
> +"mul %%rdx ; shrd $32,%%rdx,%%rax"
> +: "=a" (product) : "0" (delta), "d" ((uint64_t)mul_frac) );
> +#endif
> +
> +return product;
> +}
> +
> +
> +#if defined(__i386__)
> +uint32_t since_boot_time(void)
> +#else
> +uint64_t since_boot_time(void)
> +#endif

Why is the return type different for 32 vs 64bits?

I see no reason to not return a 64bit value in both cases IMO, and
that would remove the need for all the ifdefery below.

> +{
> +uint64_t tsc;
> +uint32_t version, wc_version;
> +#if defined(__i386__)
> +uint32_t system_time;
> +#else
> +uint64_t system_time;
> +#endif
> +uint64_t old_tsc;
> +
> +do
> +{
> +do
> +{
> +wc_version = shared_info.wc_version ;
> +version = shared_info.vcpu_info[0].time.version;
> +} while ( (version & 1) == 1 || (wc_version & 1) == 1);
> +
> +system_time = shared_info.vcpu_info[0].time.system_time;
> +old_tsc = shared_info.vcpu_info[0].time.tsc_timestamp;
> +} while (
> +version != shared_info.vcpu_info[0].time.version ||
> +wc_version != shared_info.wc_version
> +);
> +
> +rdtscp(tsc);
> +
> +system_time += scale_delta(tsc - old_tsc,
> +   
> shared_info.vcpu_info[0].time.tsc_to_system_mul,
> +   shared_info.vcpu_info[0].time.tsc_shift);

This seems way more complicated that what's actually needed. First of
all you don't need to check wc_version at all if you are only fetching
the vcpu_time_info fields (wc_version is for the wallclock).

Then AFAICT you are also missing the barriers (or using something like
Linux's ACCESS_ONCE):

https://github.com/freebsd/freebsd/blob/master/sys/x86/x86/pvclock.c#L141

> +
> +return system_time;
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xtf/time.h b/include/xtf/time.h
> new file mode 100644
> index 000..15cbd48
> --- /dev/null
> +++ b/include/xtf/time.h
> @@ -0,0 +1,35 @@
> +/**
> + * @file include/xtf/time.h
> + *
> + * Time management
> + */
> +#ifndef XTF_TIME_H
> +# define XTF_TIME_H
> +
> +#include 
> +
> +#define rdtscp(tsc) {\

Why is this called rdtscp when you are actually using rdtsc? (and not
rdtscp?)

> +uint32_t lo, hi;\
> +__asm__ volatile("rdtsc": "=a"(lo), "=d"(hi));\
> +tsc = ((uint64_t)hi << 32) | lo;\

rdtsc is not a serializing instruction, see:


Re: [Xen-devel] [PATCH 1/7] introduce time managment in xtf

2018-04-08 Thread Paul Semel

On 04/07/2018 10:58 PM, Paul Semel wrote:
On 04/07/2018 10:39 PM, Andrew Cooper wrot> However, both of your patches 
have (different) barrier issues, and

different (mis)uses of the shared memory clocks, which will need to be
addressed. One general comment for the full series is to not bother
trying to make time 32bits in a 32bit build.  XTF is an entirely
self-contained binary, and I don't much care for legacy behaviours for
legacy sake.





Alright, so I took a look at all of this. So, if I understood it well,
you want me to drop all the `uint32_t` part to only have 64 bits.
But are you sure that it works the same on 32bits architecture ?


At the end of the day, all you are passing is units of nanoseconds of
larger.  The parameter passing won't be identical between 32 or 64bit
builds, but everything else will be fine.



Also, I have another question. I don't see any place where I have
memory barrier issues, but I am certainely missing something. Can you
elaborate on this one ?


Your code does:


+    do
+    {
+    do
+    {
+    wc_version = shared_info.wc_version ;
+    version = shared_info.vcpu_info[0].time.version;
+    } while ( (version & 1) == 1 || (wc_version & 1) == 1);
+
+    system_time = shared_info.vcpu_info[0].time.system_time;
+    old_tsc = shared_info.vcpu_info[0].time.tsc_timestamp;
+    } while (
+    version != shared_info.vcpu_info[0].time.version ||
+    wc_version != shared_info.wc_version
+    );


First of all, I'm not sure why you are looking at wc_version.  You are
only reading data out of shared_info.vcpu_info[0].time so you should
only need to check time.version

That said, it is critical to force two things:
1) The first read of version is strictly before
system_time/tsc_timestamp, and the second read is afterwards.
2) That the compiler can't optimise the code by making repeated reads of
shared memory.

1 is a correctness issue and C, whose memory model is that there are no
unknown updates to memory, sees that it hasn't written to time.version,
and therefore can optimises the entire outer do loop to a single iteration.

2 is a security issue and we've had many XSAs in the past.  The worst
example IIRC was a compiler which managed to use a jump table, using a
32bit integer index read from shared memory.  When using shared memory,
it is critical to force the compiler to read a value and stash it
locally (either in a live register, or on the stack), then audit its
value, then act upon the value.

This way, no optimisation the compiler can make code which will result
in a repeated read of memory, where a malicious advisory could change
the value between reads.

1 strictly needs memory barriers to make work, and in this case
smp_rmb().  Things tend to work on x86 because of the fairly restricted
memory model, but architectures such as ARM with weaker memory models
typically tends to explode in funny ways.

2 could be fixed with ACCESS_ONCE() to explicitly tell the compiler that
this is a volatile access and the value in memory may no longer be the same.

A sample (which I think is correct, but you never quite know for sure
with barriers!) is:


 do {
 uint32_t ver1, ver2;

 do {
 ver1 = shared_info.vcpu_info[0].time.version;
 smp_rmb();
 } while ( (ver1 & 1) == 1 );

 system_time = shared_info.vcpu_info[0].time.system_time;
 old_tsc = shared_info.vcpu_info[0].time.tsc_timestamp;
 smp_rmb();
 ver2 = shared_info.vcpu_info[0].time.version;
 smp_rmb();

 } while ( ver1 != ver2 );


The smp_rmb() are full CPU memory barriers (enforcing the ordering of
reads even from remote cores), and compiler barriers (preventing the
compiler from reodering memory accesses across the barrier).

Therefore, ver1 and ver2 are forced to be kept in registers, or spilled
onto the stack.

~Andrew



Thank you very much for replying ! I completely missed this function, I do 
really apologize about it !


Effectively, there is serious issues here. As far as I can remember, I inspired 
myself by another OS code to write this function (as I didn't know really much 
how the time thing was working in Xen).


Thanks for writing this piece of code, I think I will use it and add some 
ACCESS_ONCE on my reads of the different variables in the shared memory 
(version, system_time etc..).


Anyway, I already got rid of the 32 bits part, so that even 32 bits 
architectures are using 64 bits integer (by making sure to use the `divmod64` 
function ) for the division and modulos.


I will change this part of the code this night or maybe tomorrow and send you 
another version of the patch !


So, to be completely secure, I guess we would need some locking mechanisms for 
64 bits shared memory accesses on 32 bits arch. Am I right ?


For the moment, I only used the ACCESS_ONCE mechanism, but that's obviously not 
a "once" access on 32 bits. We have something 

Re: [Xen-devel] [PATCH 1/7] introduce time managment in xtf

2018-04-07 Thread Paul Semel
On 04/07/2018 10:39 PM, Andrew Cooper wrot> However, both of your 
patches have (different) barrier issues, and

different (mis)uses of the shared memory clocks, which will need to be
addressed. One general comment for the full series is to not bother
trying to make time 32bits in a 32bit build.  XTF is an entirely
self-contained binary, and I don't much care for legacy behaviours for
legacy sake.





Alright, so I took a look at all of this. So, if I understood it well,
you want me to drop all the `uint32_t` part to only have 64 bits.
But are you sure that it works the same on 32bits architecture ?


At the end of the day, all you are passing is units of nanoseconds of
larger.  The parameter passing won't be identical between 32 or 64bit
builds, but everything else will be fine.



Also, I have another question. I don't see any place where I have
memory barrier issues, but I am certainely missing something. Can you
elaborate on this one ?


Your code does:


+do
+{
+do
+{
+wc_version = shared_info.wc_version ;
+version = shared_info.vcpu_info[0].time.version;
+} while ( (version & 1) == 1 || (wc_version & 1) == 1);
+
+system_time = shared_info.vcpu_info[0].time.system_time;
+old_tsc = shared_info.vcpu_info[0].time.tsc_timestamp;
+} while (
+version != shared_info.vcpu_info[0].time.version ||
+wc_version != shared_info.wc_version
+);


First of all, I'm not sure why you are looking at wc_version.  You are
only reading data out of shared_info.vcpu_info[0].time so you should
only need to check time.version

That said, it is critical to force two things:
1) The first read of version is strictly before
system_time/tsc_timestamp, and the second read is afterwards.
2) That the compiler can't optimise the code by making repeated reads of
shared memory.

1 is a correctness issue and C, whose memory model is that there are no
unknown updates to memory, sees that it hasn't written to time.version,
and therefore can optimises the entire outer do loop to a single iteration.

2 is a security issue and we've had many XSAs in the past.  The worst
example IIRC was a compiler which managed to use a jump table, using a
32bit integer index read from shared memory.  When using shared memory,
it is critical to force the compiler to read a value and stash it
locally (either in a live register, or on the stack), then audit its
value, then act upon the value.

This way, no optimisation the compiler can make code which will result
in a repeated read of memory, where a malicious advisory could change
the value between reads.

1 strictly needs memory barriers to make work, and in this case
smp_rmb().  Things tend to work on x86 because of the fairly restricted
memory model, but architectures such as ARM with weaker memory models
typically tends to explode in funny ways.

2 could be fixed with ACCESS_ONCE() to explicitly tell the compiler that
this is a volatile access and the value in memory may no longer be the same.

A sample (which I think is correct, but you never quite know for sure
with barriers!) is:


     do {
     uint32_t ver1, ver2;

     do {
     ver1 = shared_info.vcpu_info[0].time.version;
 smp_rmb();
     } while ( (ver1 & 1) == 1 );

     system_time = shared_info.vcpu_info[0].time.system_time;
     old_tsc = shared_info.vcpu_info[0].time.tsc_timestamp;
     smp_rmb();
     ver2 = shared_info.vcpu_info[0].time.version;
     smp_rmb();

     } while ( ver1 != ver2 );


The smp_rmb() are full CPU memory barriers (enforcing the ordering of
reads even from remote cores), and compiler barriers (preventing the
compiler from reodering memory accesses across the barrier).

Therefore, ver1 and ver2 are forced to be kept in registers, or spilled
onto the stack.

~Andrew



Thank you very much for replying ! I completely missed this function, I 
do really apologize about it !


Effectively, there is serious issues here. As far as I can remember, I 
inspired myself by another OS code to write this function (as I didn't 
know really much how the time thing was working in Xen).


Thanks for writing this piece of code, I think I will use it and add 
some ACCESS_ONCE on my reads of the different variables in the shared 
memory (version, system_time etc..).


Anyway, I already got rid of the 32 bits part, so that even 32 bits 
architectures are using 64 bits integer (by making sure to use the 
`divmod64` function ) for the division and modulos.


I will change this part of the code this night or maybe tomorrow and 
send you another version of the patch !


--
Paul

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH 1/7] introduce time managment in xtf

2018-04-04 Thread Pawel Wieczorkiewicz
From: Paul Semel 

this file is introduce to be able to implement an inter domain
communication protocol over xenstore. For synchronization purpose, we do
really want to be able to "control" time

common/time.c: since_boot_time gets the time in nanoseconds from the
moment the VM has booted

Signed-off-by: Paul Semel 

cr https://code.amazon.com/reviews/CR-786223
---
 build/files.mk |  1 +
 common/time.c  | 87 ++
 include/xtf/time.h | 35 ++
 3 files changed, 123 insertions(+)
 create mode 100644 common/time.c
 create mode 100644 include/xtf/time.h

diff --git a/build/files.mk b/build/files.mk
index 46b42d6..55ed1ca 100644
--- a/build/files.mk
+++ b/build/files.mk
@@ -16,6 +16,7 @@ obj-perarch += $(ROOT)/common/libc/vsnprintf.o
 obj-perarch += $(ROOT)/common/report.o
 obj-perarch += $(ROOT)/common/setup.o
 obj-perarch += $(ROOT)/common/xenbus.o
+obj-perarch += $(ROOT)/common/time.o
 
 obj-perenv += $(ROOT)/arch/x86/decode.o
 obj-perenv += $(ROOT)/arch/x86/desc.o
diff --git a/common/time.c b/common/time.c
new file mode 100644
index 000..11ac168
--- /dev/null
+++ b/common/time.c
@@ -0,0 +1,87 @@
+#include 
+#include 
+#include 
+
+/* This function was taken from mini-os source code */
+/* It returns ((delta << shift) * mul_frac) >> 32 */
+static inline uint64_t scale_delta(uint64_t delta, uint32_t mul_frac, int 
shift)
+{
+uint64_t product;
+#ifdef __i386__
+uint32_t tmp1, tmp2;
+#endif
+
+if ( shift < 0 )
+delta >>= -shift;
+else
+delta <<= shift;
+
+#ifdef __i386__
+__asm__ (
+"mul  %5   ; "
+"mov  %4,%%eax ; "
+"mov  %%edx,%4 ; "
+"mul  %5   ; "
+"add  %4,%%eax ; "
+"xor  %5,%5; "
+"adc  %5,%%edx ; "
+: "=A" (product), "=r" (tmp1), "=r" (tmp2)
+: "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)), "2" 
(mul_frac) );
+#else
+__asm__ (
+"mul %%rdx ; shrd $32,%%rdx,%%rax"
+: "=a" (product) : "0" (delta), "d" ((uint64_t)mul_frac) );
+#endif
+
+return product;
+}
+
+
+#if defined(__i386__)
+uint32_t since_boot_time(void)
+#else
+uint64_t since_boot_time(void)
+#endif
+{
+uint64_t tsc;
+uint32_t version, wc_version;
+#if defined(__i386__)
+uint32_t system_time;
+#else
+uint64_t system_time;
+#endif
+uint64_t old_tsc;
+
+do
+{
+do
+{
+wc_version = shared_info.wc_version ;
+version = shared_info.vcpu_info[0].time.version;
+} while ( (version & 1) == 1 || (wc_version & 1) == 1);
+
+system_time = shared_info.vcpu_info[0].time.system_time;
+old_tsc = shared_info.vcpu_info[0].time.tsc_timestamp;
+} while (
+version != shared_info.vcpu_info[0].time.version ||
+wc_version != shared_info.wc_version
+);
+
+rdtscp(tsc);
+
+system_time += scale_delta(tsc - old_tsc,
+   shared_info.vcpu_info[0].time.tsc_to_system_mul,
+   shared_info.vcpu_info[0].time.tsc_shift);
+
+return system_time;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/xtf/time.h b/include/xtf/time.h
new file mode 100644
index 000..15cbd48
--- /dev/null
+++ b/include/xtf/time.h
@@ -0,0 +1,35 @@
+/**
+ * @file include/xtf/time.h
+ *
+ * Time management
+ */
+#ifndef XTF_TIME_H
+# define XTF_TIME_H
+
+#include 
+
+#define rdtscp(tsc) {\
+uint32_t lo, hi;\
+__asm__ volatile("rdtsc": "=a"(lo), "=d"(hi));\
+tsc = ((uint64_t)hi << 32) | lo;\
+}
+
+
+#if defined(__i386__)
+/* Time from boot in nanoseconds */
+uint32_t since_boot_time(void);
+#else
+uint64_t since_boot_time(void);
+#endif
+
+#endif /* XTF_TIME_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-- 
2.16.2

Amazon Development Center Germany GmbH
Berlin - Dresden - Aachen
main office: Krausenstr. 38, 10117 Berlin
Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger
Ust-ID: DE289237879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel