From a6361c78bf774f2b4dbeeaf4147c286cff4ae5a4 Mon Sep 17 00:00:00 2001
From: Igor Tsimbalist <igor.v.tsimbalist@intel.com>
Date: Tue, 7 Nov 2017 17:00:24 +0300
Subject: [PATCH 21/22] Add extra field to gtm_jmpbuf on x86 only

Expand the gtm_jmpbuf structure by one word field to add
Intel CET support further. The code in sjlj.S already
allocates more space on the stack then gtm_jmpbuf needs.
Use this extra space to absorb the new field.

The structure is allocated on the stack in such a way
that eip/rsp field is overlapped with return address on
the stack. Locate the new field right before eip/rsp so
code that accesses buffer fields relative to address of
gtm_jmpbuf has its offsets unchanged.

The libtool_VERSION is updated for x86 due to extending
the gtm_jmpbuf structure.

    * libitm/config/x86/target.h: Add new field (ssp).
    * libitm/config/x86/sjlj.S: Change offsets.
    * libitm/configure.tgt: Update libtool_VERSION.
---
 libitm/config/x86/sjlj.S   | 44 ++++++++++++++++++++++++--------------------
 libitm/config/x86/target.h |  2 ++
 libitm/configure.tgt       | 12 ++++++++++++
 3 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/libitm/config/x86/sjlj.S b/libitm/config/x86/sjlj.S
index 21ca9d7..1c8597a 100644
--- a/libitm/config/x86/sjlj.S
+++ b/libitm/config/x86/sjlj.S
@@ -126,20 +126,22 @@ SYM(_ITM_beginTransaction):
 	/* Store edi for future HTM fast path retries.  We use a stack slot
 	   lower than the jmpbuf so that the jmpbuf's rip field will overlap
 	   with the proper return address on the stack.  */
-	movl	%edi, 8(%rsp)
+	movl	%edi, (%rsp)
 	/* Save the jmpbuf for any non-HTM-fastpath execution method.
 	   Because rsp-based addressing is 1 byte larger and we've got rax
 	   handy, use it.  */
-	movq	%rax, -64(%rax)
-	movq	%rbx, -56(%rax)
-	movq	%rbp, -48(%rax)
-	movq	%r12, -40(%rax)
-	movq	%r13, -32(%rax)
-	movq	%r14, -24(%rax)
-	movq	%r15, -16(%rax)
-	leaq	-64(%rax), %rsi
+	movq	%rax, -72(%rax)
+	movq	%rbx, -64(%rax)
+	movq	%rbp, -56(%rax)
+	movq	%r12, -48(%rax)
+	movq	%r13, -40(%rax)
+	movq	%r14, -32(%rax)
+	movq	%r15, -24(%rax)
+	xorq	%rdx, %rdx
+	movq	%rdx, -16(%rax)
+	leaq	-72(%rax), %rsi
 	call	SYM(GTM_begin_transaction)
-	movl	8(%rsp), %edi
+	movl	(%rsp), %edi
 	addq	$72, %rsp
 	cfi_adjust_cfa_offset(-72)
 #ifdef HAVE_AS_RTM
@@ -162,12 +164,14 @@ SYM(_ITM_beginTransaction):
 	movl	4(%esp), %eax
 	subl	$28, %esp
 	cfi_def_cfa_offset(32)
-	movl	%ecx, 8(%esp)
-	movl	%ebx, 12(%esp)
-	movl	%esi, 16(%esp)
-	movl	%edi, 20(%esp)
-	movl	%ebp, 24(%esp)
-	leal	8(%esp), %edx
+	movl	%ecx, 4(%esp)
+	movl	%ebx, 8(%esp)
+	movl	%esi, 12(%esp)
+	movl	%edi, 16(%esp)
+	movl	%ebp, 20(%esp)
+	xorl	%edx, %edx
+	movl	%edx, 24(%eax)
+	leal	4(%esp), %edx
 #if defined HAVE_ATTRIBUTE_VISIBILITY || !defined __PIC__
 	call	SYM(GTM_begin_transaction)
 #elif defined __ELF__
@@ -203,10 +207,10 @@ SYM(GTM_longjmp):
 	movq	48(%rsi), %r15
 	movl	%edi, %eax
 	cfi_def_cfa(%rsi, 0)
-	cfi_offset(%rip, 56)
+	cfi_offset(%rip, 64)
 	cfi_register(%rsp, %rcx)
 	movq	%rcx, %rsp
-	jmp	*56(%rsi)
+	jmp	*64(%rsi)
 #else
 	movl	(%edx), %ecx
 	movl	4(%edx), %ebx
@@ -214,10 +218,10 @@ SYM(GTM_longjmp):
 	movl	12(%edx), %edi
 	movl	16(%edx), %ebp
 	cfi_def_cfa(%edx, 0)
-	cfi_offset(%eip, 20)
+	cfi_offset(%eip, 24)
 	cfi_register(%esp, %ecx)
 	movl	%ecx, %esp
-	jmp	*20(%edx)
+	jmp	*24(%edx)
 #endif
 	cfi_endproc
 
diff --git a/libitm/config/x86/target.h b/libitm/config/x86/target.h
index 1b79dc0..5a4b820 100644
--- a/libitm/config/x86/target.h
+++ b/libitm/config/x86/target.h
@@ -39,12 +39,14 @@ typedef struct gtm_jmpbuf
   unsigned long long r13;
   unsigned long long r14;
   unsigned long long r15;
+  unsigned long long ssp;
   unsigned long long rip;
 #else
   unsigned long ebx;
   unsigned long esi;
   unsigned long edi;
   unsigned long ebp;
+  unsigned long ssp;
   unsigned long eip;
 #endif
 } gtm_jmpbuf;
diff --git a/libitm/configure.tgt b/libitm/configure.tgt
index 4ea71c8..49e4109 100644
--- a/libitm/configure.tgt
+++ b/libitm/configure.tgt
@@ -149,3 +149,15 @@ case "${target}" in
 	UNSUPPORTED=1
 	;;
 esac
+
+# Update libtool_VERSION since the size of struct gtm_jmpbuf is
+# changed for x86.
+case "${host}" in
+
+  # For x86, we use slots in the TCB head for most of our TLS.
+  # The setup of those slots in beginTransaction can afford to
+  # use the global-dynamic model.
+  i[456]86-*-* | x86_64-*-*)
+	libtool_VERSION=2:0:0
+	;;
+esac
-- 
1.8.3.1

