commit lua51-luajit for openSUSE:Factory

root Mon, 25 Feb 2019 08:58:54 -0800

Hello community,

here is the log from the commit of package lua51-luajit for openSUSE:Factory 
checked in at 2019-02-25 17:58:20
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/lua51-luajit (Old)
 and      /work/SRC/openSUSE:Factory/.lua51-luajit.new.28833 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "lua51-luajit"

Mon Feb 25 17:58:20 2019 rev:9 rq:678836 version:2.1.0~beta2

Changes:
--------
--- /work/SRC/openSUSE:Factory/lua51-luajit/lua51-luajit.changes        
2018-02-27 16:59:46.776092684 +0100
+++ /work/SRC/openSUSE:Factory/.lua51-luajit.new.28833/lua51-luajit.changes     
2019-02-25 17:58:34.586224262 +0100
@@ -1,0 +2,20 @@
+Thu Feb 21 17:12:55 UTC 2019 - Michal Suchanek <[email protected]>
+
+- Add ppc64le support from https://github.com/PPC64/LuaJIT (boo#1126363).
+  * 0001-PPC64-Fix-sradi-machine-code-offsets.patch
+  * 0002-PPC64-Add-method-for-external-branch-by-using-got-fo.patch
+  * 0003-PPC64-Add-LJ_GC64-mode-interpreter-for-ppc.patch
+  * 0004-PPC64-Add-special-instructions-for-PIC-code-setup.patch
+  * 0005-PPC64-Add-ffi-support.patch
+  * 0006-PPC64-Enable-support-for-ppc64-little-endian.patch
+  * 0007-PPC64-Fix-external-branches-that-should-address-on-R.patch
+  * 0008-luajit-2.1-fix-fp-parameter-passing-for-ppc64.patch
+  * 0009-PPC64-Fix-tab-indentation-from-last-commit.patch
+  * 0010-PPC64-Define-13-FPs-regs-as-arguments.patch
+  * 0011-PPC64-Fix-indentation-code-style.patch
+  * 0012-Fix-debug-information-for-PPC64.patch
+  * 0013-Fix-TOC-pointer-value-on-ffi-callback-handling.patch
+  * 0014-Improve-readability-of-a-load-instruction.patch
+  * 0015-Fix-remaining-unwind-values-on-vm-frames.patch
+
+-------------------------------------------------------------------

New:
----
  0001-PPC64-Fix-sradi-machine-code-offsets.patch
  0002-PPC64-Add-method-for-external-branch-by-using-got-fo.patch
  0003-PPC64-Add-LJ_GC64-mode-interpreter-for-ppc.patch
  0004-PPC64-Add-special-instructions-for-PIC-code-setup.patch
  0005-PPC64-Add-ffi-support.patch
  0006-PPC64-Enable-support-for-ppc64-little-endian.patch
  0007-PPC64-Fix-external-branches-that-should-address-on-R.patch
  0008-luajit-2.1-fix-fp-parameter-passing-for-ppc64.patch
  0009-PPC64-Fix-tab-indentation-from-last-commit.patch
  0010-PPC64-Define-13-FPs-regs-as-arguments.patch
  0011-PPC64-Fix-indentation-code-style.patch
  0012-Fix-debug-information-for-PPC64.patch
  0013-Fix-TOC-pointer-value-on-ffi-callback-handling.patch
  0014-Improve-readability-of-a-load-instruction.patch
  0015-Fix-remaining-unwind-values-on-vm-frames.patch

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ lua51-luajit.spec ++++++
--- /var/tmp/diff_new_pack.xsJpjk/_old  2019-02-25 17:58:35.646223226 +0100
+++ /var/tmp/diff_new_pack.xsJpjk/_new  2019-02-25 17:58:35.646223226 +0100
@@ -1,7 +1,7 @@
 #
 # spec file for package lua51-luajit
 #
-# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany.
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -12,7 +12,7 @@
 # license that conforms to the Open Source Definition (Version 1.9)
 # published by the Open Source Initiative.
 
-# Please submit bugfixes or comments via http://bugs.opensuse.org/
+# Please submit bugfixes or comments via https://bugs.opensuse.org/
 #
 
 
@@ -32,6 +32,21 @@
 Source0:        http://luajit.org/download/LuaJIT-%{realver}.tar.gz
 Source1:        baselibs.conf
 Patch0:         luajit-lua-versioned.patch
+Patch1:         0001-PPC64-Fix-sradi-machine-code-offsets.patch
+Patch2:         0002-PPC64-Add-method-for-external-branch-by-using-got-fo.patch
+Patch3:         0003-PPC64-Add-LJ_GC64-mode-interpreter-for-ppc.patch
+Patch4:         0004-PPC64-Add-special-instructions-for-PIC-code-setup.patch
+Patch5:         0005-PPC64-Add-ffi-support.patch
+Patch6:         0006-PPC64-Enable-support-for-ppc64-little-endian.patch
+Patch7:         0007-PPC64-Fix-external-branches-that-should-address-on-R.patch
+Patch8:         0008-luajit-2.1-fix-fp-parameter-passing-for-ppc64.patch
+Patch9:         0009-PPC64-Fix-tab-indentation-from-last-commit.patch
+Patch10:        0010-PPC64-Define-13-FPs-regs-as-arguments.patch
+Patch11:        0011-PPC64-Fix-indentation-code-style.patch
+Patch12:        0012-Fix-debug-information-for-PPC64.patch
+Patch13:        0013-Fix-TOC-pointer-value-on-ffi-callback-handling.patch
+Patch14:        0014-Improve-readability-of-a-load-instruction.patch
+Patch15:        0015-Fix-remaining-unwind-values-on-vm-frames.patch
 BuildRequires:  pkgconfig
 Requires:       libluajit-%{lua_suffix}-%{lib_suffix} = %{version}
 Requires(post): update-alternatives
@@ -39,7 +54,7 @@
 Conflicts:      luajit
 Provides:       luajit = %{version}
 # lj_arch.h do not support ppc64/ppc64le/s390/s390x
-ExcludeArch:    ppc64 ppc64le s390 s390x
+ExcludeArch:    ppc64 s390 s390x
 
 %description
 A Just-In-Time Compiler for Lua language
@@ -64,7 +79,8 @@
 
 %prep
 %setup -q -n LuaJIT-%{realver}
-%patch0 -p1
+%autopatch -p1
+
 # Fix variables
 sed -i "s,PREFIX= %{_prefix}/local,PREFIX= %{_prefix}," Makefile
 

++++++ 0001-PPC64-Fix-sradi-machine-code-offsets.patch ++++++
>From d2ba9ab5700fdf86853fd6f6de71d3adb216b634 Mon Sep 17 00:00:00 2001
From: Gustavo Serra Scalet <[email protected]>
Date: Tue, 2 Jun 2015 14:36:20 -0300
Subject: [PATCH 01/15] PPC64: Fix sradi machine code offsets

---
 dynasm/dasm_ppc.lua | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
index f73974dd7f9e..b294d1063eed 100644
--- a/dynasm/dasm_ppc.lua
+++ b/dynasm/dasm_ppc.lua
@@ -764,7 +764,7 @@ map_op = {
   lfddx_3 =    "7c000646FRR",
   stvepx_3 =   "7c00064eVRR",
   srawi_3 =    "7c000670RR~A.",
-  sradi_3 =    "7c000674RR~H.",
+  sradi_3 =    "7c000674RR~f.",
   eieio_0 =    "7c0006ac",
   lfiwax_3 =   "7c0006aeFR0R",
   divdeuo_3 =  "7c000712RRR.",
@@ -1718,7 +1718,12 @@ op_template = function(params, template, nparams)
     elseif p == "G" then
       op = op + parse_imm(params[n], 8, 12, 0, false); n = n + 1
     elseif p == "H" then
-      op = op + parse_shiftmask(params[n], true); n = n + 1
+      v = parse_imm(params[n], 6, 0, 0, false);
+      op = op + shl(band(v,31), 11)+shl(shr(v,5), 1);
+      n = n + 1;
+    elseif p == "f" then
+      v = tonumber(params[n]);
+      op = op + shl(band(v,31), 11)+shl(shr(v,5), 1);
     elseif p == "M" then
       op = op + parse_shiftmask(params[n], false); n = n + 1
     elseif p == "J" or p == "K" then
-- 
2.20.1

++++++ 0002-PPC64-Add-method-for-external-branch-by-using-got-fo.patch ++++++
>From 3a13a0544b77753b66846079ce0a4afb37d6cc43 Mon Sep 17 00:00:00 2001
From: Gustavo Serra Scalet <[email protected]>
Date: Tue, 2 Jun 2015 15:54:46 -0300
Subject: [PATCH 02/15] PPC64: Add method for external branch by using @got for
 PIC

---
 src/host/buildvm_asm.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index ffd14903c64c..7b6c8b2dff2f 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -145,11 +145,9 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, 
int n,
            (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
   } else if ((ins >> 26) == 18) {
 #if LJ_ARCH_PPC64
-    const char *suffix = strchr(sym, '@');
-    if (suffix && suffix[1] == 'h') {
-      fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym);
-    } else if (suffix && suffix[1] == 'l') {
-      fprintf(ctx->fp, "\tld 12, %s\n", sym);
+    char *suffix = strchr(sym, '@');
+    if (suffix) {
+      fprintf(ctx->fp, "\tld 12, %s(2)\n", sym);
     } else
 #endif
     fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym);
@@ -252,6 +250,7 @@ void emit_asm(BuildCtx *ctx)
   fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
 #if LJ_ARCH_PPC64
   fprintf(ctx->fp, "\t.abiversion 2\n");
+  fprintf(ctx->fp, "\t.section\t\t\".toc\",\"aw\"\n");
 #endif
   fprintf(ctx->fp, "\t.text\n");
   emit_asm_align(ctx, 4);
-- 
2.20.1

++++++ 0003-PPC64-Add-LJ_GC64-mode-interpreter-for-ppc.patch ++++++
++++ 4548 lines (skipped)

++++++ 0004-PPC64-Add-special-instructions-for-PIC-code-setup.patch ++++++
>From cad7da065e75dc1bc4be6c5ec13233f6be4a2540 Mon Sep 17 00:00:00 2001
From: Gustavo Serra Scalet <[email protected]>
Date: Tue, 2 Jun 2015 14:35:00 -0300
Subject: [PATCH 04/15] PPC64: Add special instructions for PIC code setup

In order to support to the following instruction described by ABI,
dynasm needed to be updated:

"""
The following code might appear in a PIC code setup sequence to compute
the distance from a function entry point to the TOC base:
addis 2,12,.TOC.-func@ha
addi 2,2,.TOC.-func@l
"""

Power Architecture 64-Bit ELF V2 ABI Specification, version 1.0, page 99
Source: 
http://openpowerfoundation.org/technical/technical-resources/technical-specifications/
---
 dynasm/dasm_ppc.lua    |  2 ++
 src/host/buildvm_asm.c | 10 ++++++++++
 2 files changed, 12 insertions(+)

diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
index b294d1063eed..4dc39da6a308 100644
--- a/dynasm/dasm_ppc.lua
+++ b/dynasm/dasm_ppc.lua
@@ -257,9 +257,11 @@ map_op = {
   addic_3 =    "30000000RRI",
   ["addic._3"] = "34000000RRI",
   addi_3 =     "38000000RR0I",
+  addil_3 =    "38000000RR0J",
   li_2 =       "38000000RI",
   la_2 =       "38000000RD",
   addis_3 =    "3c000000RR0I",
+  addisl_3 =   "3c000000RR0J",
   lis_2 =      "3c000000RI",
   lus_2 =      "3c000000RU",
   bc_3 =       "40000000AAK",
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index 7b6c8b2dff2f..259ab9b6b094 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -143,6 +143,16 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, 
int n,
   if ((ins >> 26) == 16) {
     fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
            (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
+#if LJ_ARCH_PPC64
+  } else if ((ins >> 26) == 14) {
+    if (strcmp(sym, "TOC") < 0) {
+      fprintf(ctx->fp, "\taddi 2,2,%s\n", sym);
+    }
+  } else if ((ins >> 26) == 15) {
+    if (strcmp(sym, "TOC") < 0) {
+      fprintf(ctx->fp, "\taddis 2,12,%s\n", sym);
+    }
+#endif
   } else if ((ins >> 26) == 18) {
 #if LJ_ARCH_PPC64
     char *suffix = strchr(sym, '@');
-- 
2.20.1

++++++ 0005-PPC64-Add-ffi-support.patch ++++++
>From 9786fcf845c109af47c28ccc50cfea836fefe96d Mon Sep 17 00:00:00 2001
From: Gustavo Serra Scalet <[email protected]>
Date: Tue, 2 Jun 2015 15:15:37 -0300
Subject: [PATCH 05/15] PPC64: Add ffi support

---
 src/lj_ccall.c      | 32 ++++++++++++++++++++++++++++++++
 src/lj_ccall.h      |  9 +++++++++
 src/lj_ccallback.c  | 15 +++++++++++++++
 src/lj_target_ppc.h |  9 +++++++++
 4 files changed, 65 insertions(+)

diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 5c252e5b6830..babf6a29ed05 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -387,6 +387,37 @@
 #define CCALL_HANDLE_COMPLEXARG \
   /* Pass complex by value in 2 or 4 GPRs. */
 
+#if LJ_ARCH_PPC64
+#define CCALL_HANDLE_REGARG \
+  if (isva) {  /* only GPRs will be used on C ellipsis operator */ \
+    goto gpr; \
+  } \
+  else { \
+    if (isfp) {  /* Try to pass argument in FPRs. */ \
+      if (nfpr + 1 <= CCALL_NARG_FPR) { \
+       dp = &cc->fpr[nfpr]; \
+       nfpr += 1; \
+       ngpr += 1;  /* align GPRs */ \
+       d = ctype_get(cts, CTID_DOUBLE);  /* FPRs always hold doubles. */ \
+       goto done; \
+      } \
+    } else {  /* Try to pass argument in GPRs. */ \
+  gpr: \
+      if (n > 1) { \
+       lua_assert(n == 2 || n == 4);  /* int64_t or complex (float). */ \
+       if (ctype_isinteger(d->info)) \
+         ngpr = (ngpr + 1u) & ~1u;  /* Align int64_t to regpair. */ \
+       else if (ngpr + n > maxgpr) \
+         ngpr = maxgpr;  /* Prevent reordering. */ \
+      } \
+      if (ngpr + n <= maxgpr) { \
+       dp = &cc->gpr[ngpr]; \
+       ngpr += n; \
+       goto done; \
+      } \
+    } \
+  }
+#else  /* 32 bits */
 #define CCALL_HANDLE_REGARG \
   if (isfp) {  /* Try to pass argument in FPRs. */ \
     if (nfpr + 1 <= CCALL_NARG_FPR) { \
@@ -409,6 +440,7 @@
       goto done; \
     } \
   }
+#endif
 
 #define CCALL_HANDLE_RET \
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 59f664817a29..3b26e8f201b1 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -85,12 +85,21 @@ typedef union FPRArg {
 
 #elif LJ_TARGET_PPC
 
+#if LJ_ARCH_PPC64
+#define CCALL_NARG_GPR         8
+#define CCALL_NARG_FPR         8
+#define CCALL_NRET_GPR         4       /* For complex double. */
+#define CCALL_NRET_FPR         1
+#define CCALL_SPS_EXTRA                14
+#define CCALL_SPS_FREE         0
+#else
 #define CCALL_NARG_GPR         8
 #define CCALL_NARG_FPR         8
 #define CCALL_NRET_GPR         4       /* For complex double. */
 #define CCALL_NRET_FPR         1
 #define CCALL_SPS_EXTRA                4
 #define CCALL_SPS_FREE         0
+#endif
 
 typedef intptr_t GPRArg;
 typedef double FPRArg;
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index 846827b119b4..e44d077d4345 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -61,7 +61,11 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
 
 #elif LJ_TARGET_PPC
 
+#if LJ_ARCH_PPC64
+#define CALLBACK_MCODE_HEAD            40
+#else  /* PPC 32bits */
 #define CALLBACK_MCODE_HEAD            24
+#endif
 
 #elif LJ_TARGET_MIPS32
 
@@ -193,10 +197,21 @@ static void callback_mcode_init(global_State *g, uint32_t 
*page)
   uint32_t *p = page;
   void *target = (void *)lj_vm_ffi_callback;
   MSize slot;
+#if LJ_ARCH_PPC64
+  *p++ = PPCI_LI | PPCF_T(RID_TMP) | ((((intptr_t)target) >> 32) & 0xffff);
+  *p++ = PPCI_LI | PPCF_T(RID_R12) | ((((intptr_t)g) >> 32) & 0xffff);
+  *p++ = PPCI_RLDICR | PPCF_T(RID_TMP) | PPCF_A(RID_TMP) | PPCF_SH(32) | 
PPCF_M6(63-32);  /* sldi */
+  *p++ = PPCI_RLDICR | PPCF_T(RID_R12) | PPCF_A(RID_R12) | PPCF_SH(32) | 
PPCF_M6(63-32);  /* sldi */
+  *p++ = PPCI_ORIS | PPCF_A(RID_TMP) | PPCF_T(RID_TMP) | ((((intptr_t)target) 
>> 16) & 0xffff);
+  *p++ = PPCI_ORIS | PPCF_A(RID_R12) | PPCF_T(RID_R12) | ((((intptr_t)g) >> 
16) & 0xffff);
+  *p++ = PPCI_ORI | PPCF_A(RID_TMP) | PPCF_T(RID_TMP) | (((intptr_t)target) & 
0xffff);
+  *p++ = PPCI_ORI | PPCF_A(RID_R12) | PPCF_T(RID_R12) | (((intptr_t)g) & 
0xffff);
+#else  /* PPC 32bits */
   *p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16);
   *p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16);
   *p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 
0xffff);
   *p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff);
+#endif
   *p++ = PPCI_MTCTR | PPCF_T(RID_TMP);
   *p++ = PPCI_BCTR;
   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
index c5c991a377af..8b8827655259 100644
--- a/src/lj_target_ppc.h
+++ b/src/lj_target_ppc.h
@@ -131,6 +131,8 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t 
*p, uint32_t exitno)
 #define PPCF_C(r)      ((r) << 6)
 #define PPCF_MB(n)     ((n) << 6)
 #define PPCF_ME(n)     ((n) << 1)
+#define PPCF_SH(n)     ((((n) & 31) << (11+1)) | (((n) & 32) >> (5-1)))
+#define PPCF_M6(n)     ((((n) & 31) << (5+1)) | (((n) & 32) << (11-5)))
 #define PPCF_Y         0x00200000
 #define PPCF_DOT       0x00000001
 
@@ -200,6 +202,13 @@ typedef enum PPCIns {
   PPCI_RLWINM = 0x54000000,
   PPCI_RLWIMI = 0x50000000,
 
+  PPCI_RLDICL = 0x78000000,
+  PPCI_RLDICR = 0x78000004,
+  PPCI_RLDIC = 0x78000008,
+  PPCI_RLDIMI = 0x7800000c,
+  PPCI_RLDCL = 0x78000010,
+  PPCI_RLDCR = 0x78000012,
+
   PPCI_B = 0x48000000,
   PPCI_BL = 0x48000001,
   PPCI_BC = 0x40800000,
-- 
2.20.1

++++++ 0006-PPC64-Enable-support-for-ppc64-little-endian.patch ++++++
>From 47afac81d171f736a1ec486692893911404ed189 Mon Sep 17 00:00:00 2001
From: Gustavo Serra Scalet <[email protected]>
Date: Tue, 2 Jun 2015 14:49:19 -0300
Subject: [PATCH 06/15] PPC64: Enable support for ppc64 little endian

---
 src/lj_arch.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lj_arch.h b/src/lj_arch.h
index c8d7138e0e93..c27ca6a5197d 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -424,8 +424,8 @@
 #if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
 #error "No support for little-endian PPC32"
 #endif
-#if LJ_ARCH_PPC64
-#error "No support for PowerPC 64 bit mode (yet)"
+#if LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_BE
+#error "No support for big-endian PPC64"
 #endif
 #ifdef __NO_FPRS__
 #error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
-- 
2.20.1

++++++ 0007-PPC64-Fix-external-branches-that-should-address-on-R.patch ++++++
>From 948eaf198b91b8e977ad043e8d574aa78e77276d Mon Sep 17 00:00:00 2001
From: Gustavo Serra Scalet <[email protected]>
Date: Fri, 19 Feb 2016 15:09:18 -0200
Subject: [PATCH 07/15] PPC64: Fix external branches that should address on R12

The TOC register was not set correctly when branching with other
registers as the PIC code setup uses the R12 reference in order to set
the R2 (TOC register). This is only acknowledged by using LuaJIT as a
library, as torch uses on it qtlua/qlua subproject.
---
 src/vm_ppc64.dasc | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/vm_ppc64.dasc b/src/vm_ppc64.dasc
index 45e5af910a29..b68cf36249a4 100644
--- a/src/vm_ppc64.dasc
+++ b/src/vm_ppc64.dasc
@@ -59,7 +59,7 @@
 |.define RA,           r20     // Callee-save.
 |.define RB,           r10
 |.define RC,           r11
-|.define RD,           r12
+|.define RD,           r12     // Also used as function linkage register
 |.define INS,          r7      // Overlaps CARG5.
 |
 |.define TMP0,         r0
@@ -696,7 +696,8 @@ static void build_subroutines(BuildCtx *ctx)
   |    std TMP1, SAVE_CFRAME
   |    std sp, L->cframe               // Add our C frame to cframe chain.
   |     std L, DISPATCH_GL(cur_L)(DISPATCH)
-  |  mtctr CARG4
+  |  mr r12, CARG4                     // keep r12 for function linkage.
+  |  mtctr r12
   |  bctrl                     // (lua_State *L, lua_CFunction func, void *ud)
   |  mr. BASE, CRET1
   |   li PC, FRAME_CP
@@ -2059,7 +2060,8 @@ static void build_subroutines(BuildCtx *ctx)
   |    std TMP1, L->top
   |   mr CARG1, L
   |  bgt >5                            // Need to grow stack.
-  |  mtctr TMP3
+  |  mr r12, TMP3                      // keep r12 for function linkage.
+  |  mtctr r12
   |  bctrl                             // (lua_State *L)
   |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
   |  ld BASE, L->base
@@ -4326,7 +4328,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
     |   cmpld TMP1, TMP2
     |    std RC, L->top
     |     li_vmstate C
-    |  mtctr RD
+    |  mtctr RD                                // RD is r12, the function 
linkage register
     if (op == BC_FUNCCW) {
       |  ld CARG2, CFUNC:RB->f
     }
-- 
2.20.1

++++++ 0008-luajit-2.1-fix-fp-parameter-passing-for-ppc64.patch ++++++
>From a06714652b81b97ad4922bdc5b0cfc7b5977257c Mon Sep 17 00:00:00 2001
From: "Brian W. Hart" <[email protected]>
Date: Wed, 31 Aug 2016 11:04:24 -0500
Subject: [PATCH 08/15] luajit-2.1: fix fp parameter passing for ppc64

The POWER 64-bit LE ABI calls for floating point function
arguments beyond the 8th to be passed via floating point
registers and also to reserve a slot in the parameter save
area on the stack. The PPC CCALL_HANDLE_REGARG correctly
includes the values in FPRs, but neglects to reserve
corresponding slots when spilling into the save area.
---
 src/lj_ccall.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index babf6a29ed05..1adc01173fd1 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -397,8 +397,13 @@
       if (nfpr + 1 <= CCALL_NARG_FPR) { \
        dp = &cc->fpr[nfpr]; \
        nfpr += 1; \
-       ngpr += 1;  /* align GPRs */ \
        d = ctype_get(cts, CTID_DOUBLE);  /* FPRs always hold doubles. */ \
+       if (ngpr + 1 <= maxgpr) \
+         ngpr += 1;  /* align GPRs */ \
+       else if (nsp + 1 <= CCALL_MAXSTACK) \
+         nsp += 1; /* align save area slots */ \
+        else \
+          goto err_nyi; /* Too many args */ \
        goto done; \
       } \
     } else {  /* Try to pass argument in GPRs. */ \
-- 
2.20.1

++++++ 0009-PPC64-Fix-tab-indentation-from-last-commit.patch ++++++
>From 83532a417beb97db552cab2d04339fa53c693305 Mon Sep 17 00:00:00 2001
From: Gustavo Serra Scalet <[email protected]>
Date: Mon, 5 Sep 2016 10:30:08 -0300
Subject: [PATCH 09/15] PPC64: Fix tab indentation from last commit

---
 src/lj_ccall.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 1adc01173fd1..e369e2ab2867 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -402,8 +402,8 @@
          ngpr += 1;  /* align GPRs */ \
        else if (nsp + 1 <= CCALL_MAXSTACK) \
          nsp += 1; /* align save area slots */ \
-        else \
-          goto err_nyi; /* Too many args */ \
+       else \
+         goto err_nyi; /* Too many args */ \
        goto done; \
       } \
     } else {  /* Try to pass argument in GPRs. */ \
-- 
2.20.1

++++++ 0010-PPC64-Define-13-FPs-regs-as-arguments.patch ++++++
>From 1e8532daeae19294df997b2dab70737f8f95e55c Mon Sep 17 00:00:00 2001
From: Gustavo Serra Scalet <[email protected]>
Date: Tue, 6 Sep 2016 13:04:16 -0300
Subject: [PATCH 10/15] PPC64: Define 13 FPs regs as arguments

ABI mandates 13 but only 8 was implemented.
---
 src/lj_ccall.h    | 2 +-
 src/lj_ctype.h    | 2 +-
 src/vm_ppc64.dasc | 5 +++++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 3b26e8f201b1..a222e526279e 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -87,7 +87,7 @@ typedef union FPRArg {
 
 #if LJ_ARCH_PPC64
 #define CCALL_NARG_GPR         8
-#define CCALL_NARG_FPR         8
+#define CCALL_NARG_FPR         13
 #define CCALL_NRET_GPR         4       /* For complex double. */
 #define CCALL_NRET_FPR         1
 #define CCALL_SPS_EXTRA                14
diff --git a/src/lj_ctype.h b/src/lj_ctype.h
index 0c220a888668..ed974bb9bfa3 100644
--- a/src/lj_ctype.h
+++ b/src/lj_ctype.h
@@ -153,7 +153,7 @@ typedef struct CType {
 
 /* Simplify target-specific configuration. Checked in lj_ccall.h. */
 #define CCALL_MAX_GPR          8
-#define CCALL_MAX_FPR          8
+#define CCALL_MAX_FPR          13
 
 typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg;
 
diff --git a/src/vm_ppc64.dasc b/src/vm_ppc64.dasc
index b68cf36249a4..215eb2dca592 100644
--- a/src/vm_ppc64.dasc
+++ b/src/vm_ppc64.dasc
@@ -2394,6 +2394,11 @@ static void build_subroutines(BuildCtx *ctx)
   |  lfd f6, CCSTATE->fpr[5]
   |  lfd f7, CCSTATE->fpr[6]
   |  lfd f8, CCSTATE->fpr[7]
+  |  lfd f9, CCSTATE->fpr[8]
+  |  lfd f10, CCSTATE->fpr[9]
+  |  lfd f11, CCSTATE->fpr[10]
+  |  lfd f12, CCSTATE->fpr[11]
+  |  lfd f13, CCSTATE->fpr[12]
   |3:
   |  ld r12, CCSTATE->func
   |  ld CARG2, CCSTATE->gpr[1]
-- 
2.20.1

++++++ 0011-PPC64-Fix-indentation-code-style.patch ++++++
>From 376925a6c542269fe9355d48a40d25a184099968 Mon Sep 17 00:00:00 2001
From: Gustavo Serra Scalet <[email protected]>
Date: Tue, 6 Sep 2016 13:07:07 -0300
Subject: [PATCH 11/15] PPC64: Fix indentation code style

---
 src/vm_ppc64.dasc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vm_ppc64.dasc b/src/vm_ppc64.dasc
index 215eb2dca592..d7e6bb6ab530 100644
--- a/src/vm_ppc64.dasc
+++ b/src/vm_ppc64.dasc
@@ -110,7 +110,7 @@
 |.define SAVE_TOC,     24(sp)  // TOC save area.
 |// Next frame lr:     16(sp)
 |.define SAVE_CR,      8(sp)   // 64 bit CR save.
-|// Back chain for sp: 0(sp)   <-- sp while in interpreter
+|// Back chain for sp: 0(sp)   <-- sp while in interpreter
 |
 |.define TMPD_BLO,     40(sp)  // LSB
 |.define TMPD,         TMPD_LO // base address of TMPD doubleword
-- 
2.20.1

++++++ 0012-Fix-debug-information-for-PPC64.patch ++++++
>From 76d561731a88cde95f171e01820c7af9edc62ead Mon Sep 17 00:00:00 2001
From: Gustavo Serra Scalet <[email protected]>
Date: Tue, 29 Aug 2017 17:45:47 -0300
Subject: [PATCH 12/15] Fix debug information for PPC64

Removed the complicated handling of lj_vm_ffi_call (it was a variable
size frame) and now backtrace works all over (e.g:)

 #0  0x00003fffb7d4875c in __libc_send (fd=32, buf=0x3fffb09a0028, len=8192, 
flags=0) at ../sysdeps/unix/sysv/linux/send.c:31
 #1  0x00003fffb7bea214 in socket_send (ps=0x3fffb7bc7778, data=0x3fffb09a0028 
'A' <repeats 200 times>..., count=8192, sent=0x3fffffffee60, tm=0x3fffb7bc97d8) 
at usocket.c:205
 #2  0x00003fffb7be4ef8 in sendraw (buf=0x3fffb7bc77a0, data=0x3fffb09a0028 'A' 
<repeats 200 times>..., count=52428800, sent=0x3fffffffeee8) at buffer.c:176
 #3  0x00003fffb7be4960 in buffer_meth_send (L=0x3fffb7f6d280, 
buf=0x3fffb7bc77a0) at buffer.c:87
 #4  0x00003fffb7bec3f4 in meth_send (L=0x3fffb7f6d280) at tcp.c:130
 #5  0x0000000010042d44 in lj_BC_FUNCC ()
 #6  0x0000000010043f24 in lj_ff_coroutine_resume ()
 #7  0x000000001001d7d4 in lua_pcall (L=0x3fffb7f60378, nargs=0, nresults=-1, 
errfunc=2) at lj_api.c:1129
 #8  0x00000000100045e8 in docall (L=0x3fffb7f60378, narg=0, clear=0) at 
luajit.c:121
 #9  0x00000000100053ec in handle_script (L=0x3fffb7f60378, 
argx=0x3ffffffffa40) at luajit.c:291
 #10 0x0000000010006600 in pmain (L=0x3fffb7f60378) at luajit.c:551
 #11 0x0000000010042d44 in lj_BC_FUNCC ()
 #12 0x000000001001da40 in lua_cpcall (L=0x3fffb7f60378, func=0x10006334 
<pmain>, ud=0x0) at lj_api.c:1153
 #13 0x00000000100067a4 in main (argc=2, argv=0x3ffffffffa38) at luajit.c:580
---
 src/vm_ppc64.dasc | 118 +++++++---------------------------------------
 1 file changed, 17 insertions(+), 101 deletions(-)

diff --git a/src/vm_ppc64.dasc b/src/vm_ppc64.dasc
index d7e6bb6ab530..5d15d01ac820 100644
--- a/src/vm_ppc64.dasc
+++ b/src/vm_ppc64.dasc
@@ -2360,17 +2360,15 @@ static void build_subroutines(BuildCtx *ctx)
   |  // Caveat: needs special frame unwinding, see below.
   |.if FFI
   |  .type CCSTATE, CCallState, CARG1
-  |  lwz TMP1, CCSTATE->spadj
   |    mflr TMP0
   |   lbz CARG2, CCSTATE->nsp
   |   lbz CARG3, CCSTATE->nfpr
-  |  neg TMP1, TMP1
   |    std TMP0, 16(sp)
   |   cmpdi cr1, CARG3, 0
   |   std TOCREG, 24(sp)
   |  mr TMP2, sp
   |   addic. CARG2, CARG2, -1
-  |  stdux sp, sp, TMP1
+  |  stdu sp, -CFRAME_SPACE(sp)
   |   crnot 4*cr1+eq, 4*cr1+eq         // For vararg calls.
   |  std r14, -8(TMP2)
   |  std CCSTATE, -16(TMP2)
@@ -4381,8 +4379,7 @@ static int build_backend(BuildCtx *ctx)
 /* Emit pseudo frame-info for all assembler functions. */
 static void emit_asm_debug(BuildCtx *ctx)
 {
-  int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
-  int i, lr_offset = -16 >> 2;
+  int i;
   switch (ctx->mode) {
   case BUILD_elfasm:
     fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
@@ -4394,11 +4391,12 @@ static void emit_asm_debug(BuildCtx *ctx)
        "\t.byte 0x1\n"                 /* Version */
        "\t.string \"\"\n"              /* augmentation */
        "\t.uleb128 0x1\n"              /* code_alignment_factor */
-       "\t.sleb128 -4\n"               /* data_alignment_factor */
+       "\t.sleb128 -8\n"               /* data_alignment_factor */
        "\t.byte 65\n"                  /* return_address_register (LR) */
-       "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"     /* DW_CFA_def_cfa */
+       "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 %d\n"    /* DW_CFA_def_cfa */
        "\t.align 2\n"
-       ".LECIE0:\n\n");
+       ".LECIE0:\n\n",
+        CFRAME_SIZE);
     fprintf(ctx->fp,
        ".LSFDE0:\n"                    /* Frame Description Entry (FDE) */
        "\t.long .LEFDE0-.LASFDE0\n"    /* length */
@@ -4407,11 +4405,17 @@ static void emit_asm_debug(BuildCtx *ctx)
        "\t.long .Lbegin\n"             /* initial_location */
        "\t.long %d\n"                  /* address_range */
        "\t.byte 0xe\n\t.uleb128 %d\n"  /* DW_CFA_def_cfa_offset */
-       /* DW_CFA_offset_extended_sf */
-       "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 %d\n"
-       /* DW_CFA_offset_extended */
-       "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n",
-       fcofs, CFRAME_SIZE, lr_offset);
+       /* DW_CFA_offset_extended_sf (TOC) */
+       "\t.byte 0x11\n\t.uleb128 2\n\t.sleb128 %d\n"
+
+        /* DW_CFA_val_expression of size 9 for LR register */
+       "\t.byte 0x16\n\t.uleb128 65\n\t.uleb128 9\n"
+       "\t.byte 0x70\n\t.uleb128 1\n\t.sleb128 0\n"    /* DW_OP_breg */
+       "\t.byte 0x06\n"                /* DW_OP_deref */
+       "\t.byte 0x11\n\t.sleb128 16\n" /* DW_OP_consts */
+       "\t.byte 0x22\n"                /* DW_OP_plus */
+       "\t.byte 0x06\n",               /* DW_OP_deref */
+       (int)ctx->codesz, CFRAME_SIZE, 24 / -8);
     for (i = 14; i <= 31; i++)
       fprintf(ctx->fp,
        "\t.byte %d\n\t.uleb128 %d\n"   /* DW_CFA_offset from r14 to r31 */
@@ -4420,94 +4424,6 @@ static void emit_asm_debug(BuildCtx *ctx)
     fprintf(ctx->fp,
        "\t.align 2\n"
        ".LEFDE0:\n\n");
-#if LJ_HASFFI
-    fprintf(ctx->fp,
-       ".LSFDE1:\n"                    /* Frame Description Entry (FDE) */
-       "\t.long .LEFDE1-.LASFDE1\n"    /* length */
-       ".LASFDE1:\n"
-       "\t.long .Lframe0\n"            /* CIE_ptr */
-       "\t.long lj_vm_ffi_call\n"      /* initial_location */
-       "\t.long %d\n"                  /* address_range */
-       /* DW_CFA_offset_extended_sf */
-       "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 %d\n"
-       "\t.byte 0x8e\n\t.uleb128 2\n"  /* DW_CFA_offset */
-       "\t.byte 0xd\n\t.uleb128 0xe\n" /* DW_CFA_def_cfa_register */
-       "\t.align 2\n"
-       ".LEFDE1:\n\n", (int)ctx->codesz - fcofs, lr_offset);
-#endif
-#if !LJ_NO_UNWIND
-    fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
-    fprintf(ctx->fp,
-       ".Lframe1:\n"
-       "\t.long .LECIE1-.LSCIE1\n"     /* length */
-       ".LSCIE1:\n"                    /* Common Information Entry (CIE) */
-       "\t.long 0\n"                   /* CIE_Id */
-       "\t.byte 0x1\n"                 /* Version */
-       "\t.string \"zPR\"\n"           /* augmentation string */
-       "\t.uleb128 0x1\n"              /* code_alignment_factor */
-       "\t.sleb128 -4\n"               /* data_alignment_factor */
-       "\t.byte 65\n"                  /* return_address_register (LR) */
-       "\t.uleb128 6\n"                        /* augmentation length */
-       "\t.byte 0x1b\n"                        /* pcrel|sdata4 */
-       "\t.long lj_err_unwind_dwarf-.\n"
-       "\t.byte 0x1b\n"                        /* pcrel|sdata4 */
-       "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"     /* DW_CFA_def_cfa */
-       "\t.align 2\n"
-       ".LECIE1:\n\n");
-    fprintf(ctx->fp,
-       ".LSFDE2:\n"
-       "\t.long .LEFDE2-.LASFDE2\n"
-       ".LASFDE2:\n"
-       "\t.long .LASFDE2-.Lframe1\n"
-       "\t.long .Lbegin-.\n"
-       "\t.long %d\n"
-       "\t.uleb128 0\n"                        /* augmentation length */
-       "\t.byte 0xe\n\t.uleb128 %d\n"          /* DW_CFA_def_cfa_offset */
-       /* DW_CFA_offset_extended_sf */
-       "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 %d\n"
-       /* DW_CFA_offset_extended */
-       "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n",
-       fcofs, CFRAME_SIZE, lr_offset);
-    for (i = 14; i <= 31; i++)
-      fprintf(ctx->fp,
-       "\t.byte %d\n\t.uleb128 %d\n"   /* DW_CFA_offset from r14 to r31 */
-       "\t.byte %d\n\t.uleb128 %d\n",  /* DW_CFA_offset from f14 to f31 */
-       0x80+i, 38+2*(31-i), 0x80+32+i, 2+2*(31-i));
-    fprintf(ctx->fp,
-       "\t.align 2\n"
-       ".LEFDE2:\n\n");
-#if LJ_HASFFI
-    fprintf(ctx->fp,
-       ".Lframe2:\n"
-       "\t.long .LECIE2-.LSCIE2\n"
-       ".LSCIE2:\n"                    /* Common Information Entry (CIE) */
-       "\t.long 0\n"                   /* CIE_Id */
-       "\t.byte 0x1\n"                 /* Version */
-       "\t.string \"zR\"\n"            /* augmentation string */
-       "\t.uleb128 0x1\n"              /* code_alignment_factor */
-       "\t.sleb128 -4\n"               /* data_alignment_factor */
-       "\t.byte 65\n"                  /* return_address_register (LR) */
-       "\t.uleb128 1\n"                        /* augmentation length */
-       "\t.byte 0x1b\n"                        /* pcrel|sdata4 */
-       "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"     /* DW_CFA_def_cfa */
-       "\t.align 2\n"
-       ".LECIE2:\n\n");
-    fprintf(ctx->fp,
-       ".LSFDE3:\n"
-       "\t.long .LEFDE3-.LASFDE3\n"
-       ".LASFDE3:\n"
-       "\t.long .LASFDE3-.Lframe2\n"
-       "\t.long lj_vm_ffi_call-.\n"
-       "\t.long %d\n"
-       "\t.uleb128 0\n"                        /* augmentation length */
-       /* DW_CFA_offset_extended_sf */
-       "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 %d\n"
-       "\t.byte 0x8e\n\t.uleb128 2\n"  /* DW_CFA_offset */
-       "\t.byte 0xd\n\t.uleb128 0xe\n" /* DW_CFA_def_cfa_register */
-       "\t.align 2\n"
-       ".LEFDE3:\n\n", (int)ctx->codesz - fcofs, lr_offset);
-#endif
-#endif
     break;
   default:
     break;
-- 
2.20.1

++++++ 0013-Fix-TOC-pointer-value-on-ffi-callback-handling.patch ++++++
>From f286f26af6368947ad975753de77a4f8c7d105e0 Mon Sep 17 00:00:00 2001
From: Gustavo Serra Scalet <[email protected]>
Date: Wed, 30 Aug 2017 11:03:20 -0300
Subject: [PATCH 13/15] Fix TOC pointer value on ffi callback handling

---
 src/lj_ccallback.c | 18 ++++++++++--------
 src/vm_ppc64.dasc  |  3 ++-
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index e44d077d4345..edb8736539d8 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -198,21 +198,23 @@ static void callback_mcode_init(global_State *g, uint32_t 
*page)
   void *target = (void *)lj_vm_ffi_callback;
   MSize slot;
 #if LJ_ARCH_PPC64
-  *p++ = PPCI_LI | PPCF_T(RID_TMP) | ((((intptr_t)target) >> 32) & 0xffff);
-  *p++ = PPCI_LI | PPCF_T(RID_R12) | ((((intptr_t)g) >> 32) & 0xffff);
-  *p++ = PPCI_RLDICR | PPCF_T(RID_TMP) | PPCF_A(RID_TMP) | PPCF_SH(32) | 
PPCF_M6(63-32);  /* sldi */
+  // Store on R0 the global state and point R12 to the function so TOC is 
calculated correctly.
+  *p++ = PPCI_LI | PPCF_T(RID_R12) | ((((intptr_t)target) >> 32) & 0xffff);
+  *p++ = PPCI_LI | PPCF_T(RID_TMP) | ((((intptr_t)g) >> 32) & 0xffff);
   *p++ = PPCI_RLDICR | PPCF_T(RID_R12) | PPCF_A(RID_R12) | PPCF_SH(32) | 
PPCF_M6(63-32);  /* sldi */
-  *p++ = PPCI_ORIS | PPCF_A(RID_TMP) | PPCF_T(RID_TMP) | ((((intptr_t)target) 
>> 16) & 0xffff);
-  *p++ = PPCI_ORIS | PPCF_A(RID_R12) | PPCF_T(RID_R12) | ((((intptr_t)g) >> 
16) & 0xffff);
-  *p++ = PPCI_ORI | PPCF_A(RID_TMP) | PPCF_T(RID_TMP) | (((intptr_t)target) & 
0xffff);
-  *p++ = PPCI_ORI | PPCF_A(RID_R12) | PPCF_T(RID_R12) | (((intptr_t)g) & 
0xffff);
+  *p++ = PPCI_RLDICR | PPCF_T(RID_TMP) | PPCF_A(RID_TMP) | PPCF_SH(32) | 
PPCF_M6(63-32);  /* sldi */
+  *p++ = PPCI_ORIS | PPCF_A(RID_R12) | PPCF_T(RID_R12) | ((((intptr_t)target) 
>> 16) & 0xffff);
+  *p++ = PPCI_ORIS | PPCF_A(RID_TMP) | PPCF_T(RID_TMP) | ((((intptr_t)g) >> 
16) & 0xffff);
+  *p++ = PPCI_ORI | PPCF_A(RID_R12) | PPCF_T(RID_R12) | (((intptr_t)target) & 
0xffff);
+  *p++ = PPCI_ORI | PPCF_A(RID_TMP) | PPCF_T(RID_TMP) | (((intptr_t)g) & 
0xffff);
+  *p++ = PPCI_MTCTR | PPCF_T(RID_R12);
 #else  /* PPC 32bits */
   *p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16);
   *p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16);
   *p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 
0xffff);
   *p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff);
-#endif
   *p++ = PPCI_MTCTR | PPCF_T(RID_TMP);
+#endif
   *p++ = PPCI_BCTR;
   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
     *p++ = PPCI_LI | PPCF_T(RID_R11) | slot;
diff --git a/src/vm_ppc64.dasc b/src/vm_ppc64.dasc
index 5d15d01ac820..cd38d544b673 100644
--- a/src/vm_ppc64.dasc
+++ b/src/vm_ppc64.dasc
@@ -2286,11 +2286,12 @@ static void build_subroutines(BuildCtx *ctx)
   |//-- FFI helper functions -----------------------------------------------
   |//-----------------------------------------------------------------------
   |
-  |// Handler for callback functions. Callback slot number in r11, g in r12.
+  |// Handler for callback functions. Callback slot number in r11, g in r0.
   |->vm_ffi_callback:
   |.if FFI
   |.type CTSTATE, CTState, PC
   |  pic_code_setup vm_ffi_callback
+  |  mr r12, r0 // Use r12 as saveregs overwrites r0
   |  saveregs
   |  ld CTSTATE, GL:r12->ctype_state
   |   addi DISPATCH, r12, GG_G2DISP
-- 
2.20.1

++++++ 0014-Improve-readability-of-a-load-instruction.patch ++++++
>From 9f233a7b3ef4f4ced0ee8da6dc3a58d058c10617 Mon Sep 17 00:00:00 2001
From: Gustavo Serra Scalet <[email protected]>
Date: Wed, 30 Aug 2017 13:25:29 -0300
Subject: [PATCH 14/15] Improve readability of a load instruction

---
 src/vm_ppc64.dasc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/vm_ppc64.dasc b/src/vm_ppc64.dasc
index cd38d544b673..b92de564f4d0 100644
--- a/src/vm_ppc64.dasc
+++ b/src/vm_ppc64.dasc
@@ -1488,9 +1488,8 @@ static void build_subroutines(BuildCtx *ctx)
   |.endif
   |  lbz TMP0, L:CARG1->status
   |   ld TMP1, L:CARG1->cframe
-  |     la TMP3, L:CARG1->base
-  |     ld TMP2, 0(TMP3)
-  |     ld CARG2, 8(TMP3)
+  |     ld TMP2, L:CARG1->base
+  |     ld CARG2, L:CARG1->top
   |  cmpldi cr0, TMP0, LUA_YIELD
   |     add TMP3, CARG2, TMP0
   |   cmpldi cr1, TMP1, 0
-- 
2.20.1

++++++ 0015-Fix-remaining-unwind-values-on-vm-frames.patch ++++++
>From 5866b0a1ed6abe9eab60332905eca2bbc80f1478 Mon Sep 17 00:00:00 2001
From: Gustavo Serra Scalet <[email protected]>
Date: Mon, 4 Sep 2017 10:51:51 -0300
Subject: [PATCH 15/15] Fix remaining unwind values on vm frames

Despite unwind working for vm frames, the change sent before was not
fully correct. By analysing the DWARF code with
"readelf --debug-dump=frames luajit" I noticed it was not well formated.
---
 src/vm_ppc64.dasc | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/src/vm_ppc64.dasc b/src/vm_ppc64.dasc
index b92de564f4d0..dde11b954540 100644
--- a/src/vm_ppc64.dasc
+++ b/src/vm_ppc64.dasc
@@ -4390,39 +4390,40 @@ static void emit_asm_debug(BuildCtx *ctx)
        "\t.long 0xffffffff\n"          /* CIE_Id */
        "\t.byte 0x1\n"                 /* Version */
        "\t.string \"\"\n"              /* augmentation */
-       "\t.uleb128 0x1\n"              /* code_alignment_factor */
+       "\t.uleb128 4\n"                /* code_alignment_factor */
        "\t.sleb128 -8\n"               /* data_alignment_factor */
        "\t.byte 65\n"                  /* return_address_register (LR) */
-       "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 %d\n"    /* DW_CFA_def_cfa */
-       "\t.align 2\n"
-       ".LECIE0:\n\n",
-        CFRAME_SIZE);
+       /* DW_CFA_def_cfa */
+       "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
+       "\t.align 3\n"
+       ".LECIE0:\n\n");
     fprintf(ctx->fp,
        ".LSFDE0:\n"                    /* Frame Description Entry (FDE) */
        "\t.long .LEFDE0-.LASFDE0\n"    /* length */
        ".LASFDE0:\n"
        "\t.long .Lframe0\n"            /* CIE_ptr */
-       "\t.long .Lbegin\n"             /* initial_location */
-       "\t.long %d\n"                  /* address_range */
-       "\t.byte 0xe\n\t.uleb128 %d\n"  /* DW_CFA_def_cfa_offset */
-       /* DW_CFA_offset_extended_sf (TOC) */
-       "\t.byte 0x11\n\t.uleb128 2\n\t.sleb128 %d\n"
+       "\t.quad .Lbegin\n"             /* initial_location */
+       "\t.quad %d\n"                  /* address_range */
 
-        /* DW_CFA_val_expression of size 9 for LR register */
-       "\t.byte 0x16\n\t.uleb128 65\n\t.uleb128 9\n"
-       "\t.byte 0x70\n\t.uleb128 1\n\t.sleb128 0\n"    /* DW_OP_breg */
-       "\t.byte 0x06\n"                /* DW_OP_deref */
-       "\t.byte 0x11\n\t.sleb128 16\n" /* DW_OP_consts */
-       "\t.byte 0x22\n"                /* DW_OP_plus */
-       "\t.byte 0x06\n",               /* DW_OP_deref */
-       (int)ctx->codesz, CFRAME_SIZE, 24 / -8);
+       /* DW_CFA_def_cfa_offset */
+       "\t.byte 0xe\n\t.uleb128 %d\n"
+
+       /* DW_CFA_offset_extended_sf $lr */
+       "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 %d\n"
+
+       /* DW_CFA_offset (TOC) */
+       "\t.byte 0x82\n\t.uleb128 %d\n",
+
+       (int)ctx->codesz, CFRAME_SIZE, 16/-8, (24-400)/-8);
     for (i = 14; i <= 31; i++)
       fprintf(ctx->fp,
        "\t.byte %d\n\t.uleb128 %d\n"   /* DW_CFA_offset from r14 to r31 */
        "\t.byte %d\n\t.uleb128 %d\n",  /* DW_CFA_offset from f14 to f31 */
-       0x80+i, 38+2*(31-i), 0x80+32+i, 2+2*(31-i));
+       // SAVE_GPR_ SAVE_FPR_
+       0x80+i, (112-400)/-8 + 14-i, 0x80+32+i, (256-400)/-8 + 14-i);
+
     fprintf(ctx->fp,
-       "\t.align 2\n"
+       "\t.align 3\n"
        ".LEFDE0:\n\n");
     break;
   default:
-- 
2.20.1

commit lua51-luajit for openSUSE:Factory

Reply via email to