v3:
* moves files to x86_64/fastcwd.cc and aarch64/fastcwd.cc
* reflows commit message in patch 3
* adds comments & assert to x86_64/fastcwd.cc
* removes Windows 8.0 case (I didn't realize Windows 8 was no longer supported)
* adds tracking of lea destination register to rcx for call to
RtlEnterCriticalSection
* switches from memcmp to comparing the members of opr that are relevant.
* minor formatting tweaks to aarch64/fastcwd.cc
I tested x86_64 code on every released Windows version from 9600 to 26100.
Interestingly, the machine code of the "use_cwd" function
(RtlpReferenceCurrentDirectory) didn't seem to change until 26100.
(I previously tested the prototype aarch64 code on 16299, 19045, 22631,
and 26100, but only 22000+ supports x86_64 emulation).
Jeremy Drake (5):
Cygwin: factor out find_fast_cwd_pointer to arch-specific file.
Cygwin: vendor libudis86 1.7.2/libudis86
Cygwin: patch libudis86 to build as part of Cygwin
Cygwin: use udis86 to find fast cwd pointer on x64
Cygwin: add find_fast_cwd_pointer_aarch64.
winsup/cygwin/Makefile.am | 14 +-
winsup/cygwin/aarch64/fastcwd.cc | 203 +
winsup/cygwin/path.cc | 145 +-
winsup/cygwin/udis86/decode.c | 1113 ++++
winsup/cygwin/udis86/decode.h | 195 +
winsup/cygwin/udis86/extern.h | 109 +
winsup/cygwin/udis86/itab.c | 8404 ++++++++++++++++++++++++++++++
winsup/cygwin/udis86/itab.h | 680 +++
winsup/cygwin/udis86/types.h | 260 +
winsup/cygwin/udis86/udint.h | 91 +
winsup/cygwin/udis86/udis86.c | 464 ++
winsup/cygwin/x86_64/fastcwd.cc | 200 +
12 files changed, 11755 insertions(+), 123 deletions(-)
create mode 100644 winsup/cygwin/aarch64/fastcwd.cc
create mode 100644 winsup/cygwin/udis86/decode.c
create mode 100644 winsup/cygwin/udis86/decode.h
create mode 100644 winsup/cygwin/udis86/extern.h
create mode 100644 winsup/cygwin/udis86/itab.c
create mode 100644 winsup/cygwin/udis86/itab.h
create mode 100644 winsup/cygwin/udis86/types.h
create mode 100644 winsup/cygwin/udis86/udint.h
create mode 100644 winsup/cygwin/udis86/udis86.c
create mode 100644 winsup/cygwin/x86_64/fastcwd.cc
Range-diff against v2:
1: 25a8b233f5 ! 1: a1c9f722d7 Cygwin: factor out find_fast_cwd_pointer to
arch-specific file.
@@ winsup/cygwin/Makefile.am: LIB_NAME=libcygwin.a
if TARGET_X86_64
TARGET_FILES= \
x86_64/bcopy.S \
-+ x86_64/fastcwd_x86_64.cc \
++ x86_64/fastcwd.cc \
x86_64/memchr.S \
x86_64/memcpy.S \
x86_64/memmove.S \
@@ winsup/cygwin/path.cc: find_fast_cwd ()
small_printf ("Cygwin WARNING:\n"
" Couldn't compute FAST_CWD pointer. This typically occurs if you're
using\n"
- ## winsup/cygwin/x86_64/fastcwd_x86_64.cc (new) ##
+ ## winsup/cygwin/x86_64/fastcwd.cc (new) ##
@@
-+/* fastcwd_x86_64.cc: find fast cwd pointer on x86_64 hosts.
++/* x86_64/fastcwd.cc: find fast cwd pointer on x86_64 hosts.
+
+ This file is part of Cygwin.
+
2: faa2688d1f = 2: 1c290dbc53 Cygwin: vendor libudis86 1.7.2/libudis86
3: 04f7a44f59 ! 3: bd2dca35eb Cygwin: patch libudis86 to build as part of
Cygwin
@@ Metadata
## Commit message ##
Cygwin: patch libudis86 to build as part of Cygwin
- This ifdefs out the large table of
- opcode strings (and the function that references it) since we're only
- interested in walking machine code, not generating disassembly, and
- makes a couple of other tables "const" so that they end up in .rdata
- instead of .data.
+ This ifdefs out the large table of opcode strings (and the function
that
+ references it) since we're only interested in walking machine code, not
+ generating disassembly, and makes a couple of other tables "const" so
+ that they end up in .rdata instead of .data.
Signed-off-by: Jeremy Drake <[email protected]>
4: 0f06e96562 ! 4: 140a61c9e1 Cygwin: use udis86 to find fast cwd pointer on
x64
@@ Commit message
Signed-off-by: Jeremy Drake <[email protected]>
- ## winsup/cygwin/x86_64/fastcwd_x86_64.cc ##
+ ## winsup/cygwin/x86_64/fastcwd.cc ##
@@
details. */
#include "winsup.h"
++#include <assert.h>
+#include "udis86/types.h"
+#include "udis86/extern.h"
class fcwd_access_t;
-#define peek32(x) (*(int32_t *)(x))
++/* Helper function to get the absolute address of an rip-relative
instruction
++ by summing the current instruction's pc (rip), the current
instruction's
++ length, and the signed 32-bit displacement in the operand.
Optionally, an
++ additional offset is subtracted to deal with the case where a member
of a
++ struct is being referenced by the instruction but the address of the
struct
++ is desired.
++*/
+static inline const void *
+rip_rel_offset (const ud_t *ud_obj, const ud_operand_t *opr, int
sub_off=0)
+{
++ assert ((opr->type == UD_OP_JIMM && opr->size == 32) ||
++ (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
++ opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32));
++
+ return (const void *) (ud_insn_off (ud_obj) + ud_insn_len (ud_obj) +
+ opr->lval.sdword - sub_off);
+}
/* This function scans the code in ntdll.dll to find the address of the
global variable used to access the CWD. While the pointer is global,
-@@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
+@@ winsup/cygwin/x86_64/fastcwd.cc: find_fast_cwd_pointer_x86_64 ()
GetProcAddress (ntdll, "RtlEnterCriticalSection");
if (!get_dir || !ent_crit)
return NULL;
++ /* Initialize udis86 */
+ ud_t ud_obj;
+ ud_init (&ud_obj);
++ /* Set 64-bit mode */
+ ud_set_mode (&ud_obj, 64);
+ ud_set_input_buffer (&ud_obj, get_dir, 80);
-+ ud_set_pc (&ud_obj, (const uint64_t) get_dir);
-+ const ud_operand_t *opr;
++ /* Set pc (rip) so that subsequent calls to ud_insn_off will return the
pc of
++ the instruction, saving us the hassle of tracking it ourselves */
++ ud_set_pc (&ud_obj, (uint64_t) get_dir);
++ const ud_operand_t *opr, *opr0;
+ ud_mnemonic_code_t insn;
++ ud_type_t reg = UD_NONE;
/* Search first relative call instruction in RtlGetCurrentDirectory_U.
*/
- const uint8_t *rcall = (const uint8_t *) memchr (get_dir, 0xe8, 80);
- if (!rcall)
@@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
- performs some other actions, not important to us. */
- const uint8_t *use_cwd = rcall + 5 + peek32 (rcall + 1);
+ ud_set_input_buffer (&ud_obj, use_cwd, 120);
-+ ud_set_pc (&ud_obj, (const uint64_t) use_cwd);
++ ud_set_pc (&ud_obj, (uint64_t) use_cwd);
+
/* Next we search for the locking mechanism and perform a sanity check.
- On Pre-Windows 8 we basically look for the RtlEnterCriticalSection
call.
@@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
- memmem ((const char *) use_cwd, 80,
- "\xf0\x0f\xba\x35", 4);
- if (lock)
-+ On Pre- (or Post-) Windows 8 we basically look for the
-+ RtlEnterCriticalSection call. Windows 8 does not call
-+ RtlEnterCriticalSection. The code manipulates the FastPebLock
manually,
-+ probably because RtlEnterCriticalSection has been converted to an
inline
-+ function. Either way, we test if the code uses the FastPebLock. */
++ we basically look for the RtlEnterCriticalSection call and test if
the
++ code uses the FastPebLock. */
+ PRTL_CRITICAL_SECTION lockaddr = NULL;
+
-+ /* both cases have an `lea rel(%rip)` on the lock */
+ while (ud_disassemble (&ud_obj) &&
+ (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
+ insn != UD_Ijmp)
@@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
- "\x48\x8b\x1d", 3);
+ if (insn == UD_Ilea)
+ {
-+ /* this seems to follow intel syntax, in that operand 0 is the
++ /* udis86 seems to follow intel syntax, in that operand 0 is the
+ dest and 1 is the src */
++ opr0 = ud_insn_opr (&ud_obj, 0);
+ opr = ud_insn_opr (&ud_obj, 1);
+ if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
-+ opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32)
++ opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32 &&
++ opr0->type == UD_OP_REG && opr0->size == 64)
+ {
+ lockaddr = (PRTL_CRITICAL_SECTION) rip_rel_offset (&ud_obj, opr);
++ reg = opr0->base;
+ break;
+ }
+ }
@@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
+ if (lockaddr != NtCurrentTeb ()->Peb->FastPebLock)
+ return NULL;
+
-+ /* Next is either the `callq RtlEnterCriticalSection', or on Windows 8,
-+ a `lock btr` */
++ /* Find where the lock address is loaded into rcx as the first
parameter of
++ a function call */
+ bool found = false;
-+ while (ud_disassemble (&ud_obj) &&
-+ (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
-+ insn != UD_Ijmp)
++ if (reg != UD_R_RCX)
{
- /* Usually the callq RtlEnterCriticalSection follows right after
- fetching the lock address. */
@@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
- lock = (const uint8_t *) memmem ((const char *) use_cwd, 80,
- "\x48\x8d\x0d", 3);
- if (!lock)
-+ if (insn == UD_Icall)
++ while (ud_disassemble (&ud_obj) &&
++ (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
++ insn != UD_Ijmp)
{
- /* Windows 8.1 Preview calls `lea rel(rip),%r12' then some unrelated
- ops, then `mov %r12,%rcx', then `callq RtlEnterCriticalSection'. */
- lock = (const uint8_t *) memmem ((const char *) use_cwd, 80,
- "\x4c\x8d\x25", 3);
- call_rtl_offset = 14;
-+ opr = ud_insn_opr (&ud_obj, 0);
-+ if (opr->type == UD_OP_JIMM && opr->size == 32)
++ if (insn == UD_Imov)
+ {
-+ if (ent_crit != rip_rel_offset (&ud_obj, opr))
-+ return NULL;
-+ found = true;
-+ break;
++ opr0 = ud_insn_opr (&ud_obj, 0);
++ opr = ud_insn_opr (&ud_obj, 1);
++ if (opr->type == UD_OP_REG && opr->size == 64 &&
++ opr->base == reg && opr0->type == UD_OP_REG &&
++ opr0->size == 64 && opr0->base == UD_R_RCX)
++ {
++ found = true;
++ break;
++ }
+ }
}
--
++ if (!found)
++ return NULL;
++ }
+
- if (!lock)
-+ else if (insn == UD_Ibtr && ud_obj.pfx_lock)
++ /* Next is the `callq RtlEnterCriticalSection' */
++ found = false;
++ while (ud_disassemble (&ud_obj) &&
++ (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
++ insn != UD_Ijmp)
++ {
++ if (insn == UD_Icall)
{
- /* A recent Windows 11 Preview calls `lea rel(rip),%r13' then
- some unrelated instructions, then `callq RtlEnterCriticalSection'.
@@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
- lock = (const uint8_t *) memmem ((const char *) use_cwd, 80,
- "\x4c\x8d\x2d", 3);
- call_rtl_offset = 24;
-+ /* for Windows 8 */
+ opr = ud_insn_opr (&ud_obj, 0);
-+ if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
-+ opr->index == UD_NONE && opr->scale == 0 && opr->offset == 32 &&
-+ opr->size == 32)
++ if (opr->type == UD_OP_JIMM && opr->size == 32)
+ {
-+ if (lockaddr != rip_rel_offset (&ud_obj, opr,
-+ offsetof (RTL_CRITICAL_SECTION, LockCount)))
++ if (ent_crit != rip_rel_offset (&ud_obj, opr))
+ return NULL;
+ found = true;
+ break;
@@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
- if (!lock)
+ fcwd_access_t **f_cwd_ptr = NULL;
-+ ud_type_t reg = UD_NONE;
-+ /* now we're looking for a movq rel(%rip) */
++ /* now we're looking for a mov rel(%rip), %<reg64> */
+ while (ud_disassemble (&ud_obj) &&
+ (insn = ud_insn_mnemonic (&ud_obj)) != UD_Iret &&
+ insn != UD_Ijmp)
@@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
+ if (insn == UD_Imov)
{
- return NULL;
-+ const ud_operand_t *opr0 = ud_insn_opr (&ud_obj, 0);
++ opr0 = ud_insn_opr (&ud_obj, 0);
+ opr = ud_insn_opr (&ud_obj, 1);
-+ if (opr->type == UD_OP_MEM && opr->base == UD_R_RIP &&
-+ opr->index == UD_NONE && opr->scale == 0 &&
-+ opr->offset == 32 && opr->size == 64 &&
-+ opr0->type == UD_OP_REG)
++ if (opr->type == UD_OP_MEM && opr->size == 64 &&
++ opr->base == UD_R_RIP && opr->index == UD_NONE &&
++ opr->scale == 0 && opr->offset == 32 &&
++ opr0->type == UD_OP_REG && opr0->size == 64)
+ {
+ f_cwd_ptr = (fcwd_access_t **) rip_rel_offset (&ud_obj, opr);
+ reg = opr0->base;
@@ winsup/cygwin/x86_64/fastcwd_x86_64.cc: find_fast_cwd_pointer_x86_64 ()
- movrbx = lock + 5;
}
- if (!movrbx)
-- return NULL;
- /* Check that the next instruction tests if the fetched value is NULL.
*/
++ /* Check that the next instruction is a test. */
++ if (!f_cwd_ptr || !ud_disassemble (&ud_obj) ||
++ ud_insn_mnemonic (&ud_obj) != UD_Itest)
+ return NULL;
+- /* Check that the next instruction tests if the fetched value is NULL.
*/
- const uint8_t *testrbx = (const uint8_t *)
- memmem (movrbx + 7, 3, "\x48\x85\xdb", 3);
- if (!testrbx)
-+ if (!f_cwd_ptr || !ud_disassemble (&ud_obj) ||
-+ ud_insn_mnemonic (&ud_obj) != UD_Itest)
-+ return NULL;
+
-+ opr = ud_insn_opr (&ud_obj, 0);
-+ if (opr->type != UD_OP_REG || opr->base != reg ||
-+ memcmp (opr, ud_insn_opr (&ud_obj, 1), offsetof (ud_operand_t,
_legacy)))
++ /* ... and that it's testing the same register that the mov above
loaded the
++ f_cwd_ptr into against itself */
++ opr0 = ud_insn_opr (&ud_obj, 0);
++ opr = ud_insn_opr (&ud_obj, 1);
++ if (opr->type != UD_OP_REG || opr->size != 64 || opr->base != reg ||
++ opr0->type != opr->type || opr0->size != 64 || opr0->base !=
opr->base)
return NULL;
- /* Compute address of the fcwd_access_t ** pointer. */
- return (fcwd_access_t **) (testrbx + peek32 (movrbx + 3));
5: e3adc20c9f ! 5: 87f2bcf895 Cygwin: add find_fast_cwd_pointer_aarch64.
@@ Commit message
Signed-off-by: Jeremy Drake <[email protected]>
## winsup/cygwin/Makefile.am ##
-@@ winsup/cygwin/Makefile.am: DLL_FILES= \
- exceptions.cc \
- exec.cc \
- external.cc \
-+ fastcwd_aarch64.cc \
- fcntl.cc \
- fenv.c \
- flock.cc \
+@@ winsup/cygwin/Makefile.am: if TARGET_X86_64
+ TARGET_FILES= \
+ x86_64/bcopy.S \
+ x86_64/fastcwd.cc \
++ aarch64/fastcwd.cc \
+ x86_64/memchr.S \
+ x86_64/memcpy.S \
+ x86_64/memmove.S \
- ## winsup/cygwin/fastcwd_aarch64.cc (new) ##
+ ## winsup/cygwin/aarch64/fastcwd.cc (new) ##
@@
-+/* fastcwd_aarch64.cc: find the fast cwd pointer on aarch64 hosts.
++/* aarch64/fastcwd.cc: find the fast cwd pointer on aarch64 hosts.
+
+ This file is part of Cygwin.
+
@@ winsup/cygwin/fastcwd_aarch64.cc (new)
+ Cygwin license. Please consult the file "CYGWIN_LICENSE" for
+ details. */
+
-+/* You might well wonder why this file is not in an aarch64
target-specific
-+ directory, like fastcwd_x86_64.cc. It turns out that this code works
when
-+ built for i686, x86_64, or aarch64 with just the small #if/#elif block
in
++/* You might well wonder why this file is included in x86_64 target files
++ in Makefile.am. It turns out that this code works when built for i686,
++ x86_64, or aarch64 with just the small #if/#elif block in
+ GetArm64ProcAddress below caring which. */
+
+#include "winsup.h"
-+#include "assert.h"
++#include <assert.h>
+
+class fcwd_access_t;
+
+static LPCVOID
+GetArm64ProcAddress (HMODULE hModule, LPCSTR procname)
+{
-+ const BYTE * proc = (const BYTE *) GetProcAddress (hModule, procname);
++ const BYTE *proc = (const BYTE *) GetProcAddress (hModule, procname);
+#if defined (__aarch64__)
+ return proc;
+#else
@@ winsup/cygwin/fastcwd_aarch64.cc (new)
+
+/* this would work for either bl or b, but we only use it for bl */
+static inline LPCVOID
-+extract_bl_target (const uint32_t * pc)
++extract_bl_target (const uint32_t *pc)
+{
+ assert (IS_INSN (pc, bl) || IS_INSN (pc, b));
+ int32_t offset = *pc & ~bl_mask;
@@ winsup/cygwin/fastcwd_aarch64.cc (new)
+}
+
+static inline uint64_t
-+extract_adrp_address (const uint32_t * pc)
++extract_adrp_address (const uint32_t *pc)
+{
+ assert (IS_INSN (pc, adrp));
+ uint64_t adrp_base = (uint64_t) pc & ~0xFFF;
--
2.48.1.windows.1