Modified: tomcat/jk/trunk/native/iis/pcre/sljit/sljitNativeX86_common.c
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/sljit/sljitNativeX86_common.c?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
--- tomcat/jk/trunk/native/iis/pcre/sljit/sljitNativeX86_common.c (original)
+++ tomcat/jk/trunk/native/iis/pcre/sljit/sljitNativeX86_common.c Tue Nov 21 
14:37:37 2017
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmes...@freemail.hu). All rights 
reserved.
+ *    Copyright Zoltan Herczeg (hzmes...@freemail.hu). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without 
modification, are
  * permitted provided that the following conditions are met:
@@ -67,12 +67,15 @@ SLJIT_API_FUNC_ATTRIBUTE const char* slj
 #define TMP_REG1       (SLJIT_NUMBER_OF_REGISTERS + 2)
 
 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
-       0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
+       0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
 };
 
 #define CHECK_EXTRA_REGS(p, w, do) \
-       if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
-               w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * 
sizeof(sljit_sw); \
+       if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
+               if (p <= compiler->scratches) \
+                       w = compiler->saveds_offset - ((p) - SLJIT_R2) * 
(sljit_sw)sizeof(sljit_sw); \
+               else \
+                       w = compiler->locals_offset + ((p) - SLJIT_S2) * 
(sljit_sw)sizeof(sljit_sw); \
                p = SLJIT_MEM1(SLJIT_SP); \
                do; \
        }
@@ -82,28 +85,27 @@ static const sljit_u8 reg_map[SLJIT_NUMB
 /* Last register + 1. */
 #define TMP_REG1       (SLJIT_NUMBER_OF_REGISTERS + 2)
 #define TMP_REG2       (SLJIT_NUMBER_OF_REGISTERS + 3)
-#define TMP_REG3       (SLJIT_NUMBER_OF_REGISTERS + 4)
 
 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
    Note: avoid to use r12 and r13 for memory addessing
-   therefore r12 is better for SAVED_EREG than SAVED_REG. */
+   therefore r12 is better to be a higher saved register. */
 #ifndef _WIN64
-/* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
-static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
-       0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
+/* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, 
r11 */
+static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
+       0, 0, 6, 1, 7, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
 };
 /* low-map. reg_map & 0x7. */
-static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
-       0, 0, 6, 1, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 7, 1
+static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
+       0, 0, 6, 1, 7, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 1
 };
 #else
-/* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
-static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
-       0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
+/* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
+static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
+       0, 0, 2, 1, 10, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 8, 9
 };
 /* low-map. reg_map & 0x7. */
-static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
-       0, 0, 2, 1, 3,  4,  5,  5, 6,  7,  7, 6, 3, 4, 2,  0, 1
+static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
+       0, 0, 2, 1, 2,  3,  4,  5,  5, 6,  7,  7, 6, 3, 4, 0, 1
 };
 #endif
 
@@ -166,7 +168,7 @@ static const sljit_u8 reg_lmap[SLJIT_NUM
 #define CALL_i32       0xe8
 #define CALL_rm                (/* GROUP_FF */ 2 << 3)
 #define CDQ            0x99
-#define CMOVNE_r_rm    (/* GROUP_0F */ 0x45)
+#define CMOVE_r_rm     (/* GROUP_0F */ 0x44)
 #define CMP            (/* BINARY */ 7 << 3)
 #define CMP_EAX_i32    0x3d
 #define CMP_r_rm       0x3b
@@ -214,6 +216,7 @@ static const sljit_u8 reg_lmap[SLJIT_NUM
 #define POP_r          0x58
 #define POP_rm         0x8f
 #define POPF           0x9d
+#define PREFETCH       0x18
 #define PUSH_i32       0x68
 #define PUSH_r         0x50
 #define PUSH_rm                (/* GROUP_FF */ 6 << 3)
@@ -279,6 +282,29 @@ static sljit_s32 cpu_has_cmov = -1;
 #include <intrin.h>
 #endif
 
+/******************************************************/
+/*    Unaligned-store functions                       */
+/******************************************************/
+
+static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
+{
+       SLJIT_MEMCPY(addr, &value, sizeof(value));
+}
+
+static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
+{
+       SLJIT_MEMCPY(addr, &value, sizeof(value));
+}
+
+static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
+{
+       SLJIT_MEMCPY(addr, &value, sizeof(value));
+}
+
+/******************************************************/
+/*    Utility functions                               */
+/******************************************************/
+
 static void get_cpu_features(void)
 {
        sljit_u32 features;
@@ -386,13 +412,13 @@ static sljit_u8 get_jump_code(sljit_s32
        return 0;
 }
 
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 
*code_ptr, sljit_s32 type, sljit_sw executable_offset);
+#else
 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 
*code_ptr, sljit_s32 type);
-
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, 
sljit_s32 type);
 #endif
 
-static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 
*code_ptr, sljit_u8 *code, sljit_s32 type)
+static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 
*code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
 {
        sljit_s32 short_jump;
        sljit_uw label_addr;
@@ -400,7 +426,8 @@ static sljit_u8* generate_near_jump_code
        if (jump->flags & JUMP_LABEL)
                label_addr = (sljit_uw)(code + jump->u.label->size);
        else
-               label_addr = jump->u.target;
+               label_addr = jump->u.target - executable_offset;
+
        short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && 
(sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
 
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -453,6 +480,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_gen
        sljit_u8 *buf_ptr;
        sljit_u8 *buf_end;
        sljit_u8 len;
+       sljit_sw executable_offset;
+       sljit_sw jump_addr;
 
        struct sljit_label *label;
        struct sljit_jump *jump;
@@ -471,6 +500,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_gen
        label = compiler->labels;
        jump = compiler->jumps;
        const_ = compiler->consts;
+       executable_offset = SLJIT_EXEC_OFFSET(code);
+
        do {
                buf_ptr = buf->memory;
                buf_end = buf_ptr + buf->used_size;
@@ -478,40 +509,33 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_gen
                        len = *buf_ptr++;
                        if (len > 0) {
                                /* The code is already generated. */
-                               SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
+                               SLJIT_MEMCPY(code_ptr, buf_ptr, len);
                                code_ptr += len;
                                buf_ptr += len;
                        }
                        else {
-                               if (*buf_ptr >= 4) {
+                               if (*buf_ptr >= 2) {
                                        jump->addr = (sljit_uw)code_ptr;
                                        if (!(jump->flags & 
SLJIT_REWRITABLE_JUMP))
-                                               code_ptr = 
generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
-                                       else
-                                               code_ptr = 
generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
+                                               code_ptr = 
generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
+                                       else {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                                               code_ptr = 
generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
+#else
+                                               code_ptr = 
generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
+#endif
+                                       }
                                        jump = jump->next;
                                }
                                else if (*buf_ptr == 0) {
-                                       label->addr = (sljit_uw)code_ptr;
+                                       label->addr = ((sljit_uw)code_ptr) + 
executable_offset;
                                        label->size = code_ptr - code;
                                        label = label->next;
                                }
-                               else if (*buf_ptr == 1) {
+                               else { /* *buf_ptr is 1 */
                                        const_->addr = ((sljit_uw)code_ptr) - 
sizeof(sljit_sw);
                                        const_ = const_->next;
                                }
-                               else {
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-                                       *code_ptr++ = (*buf_ptr == 2) ? 
CALL_i32 : JMP_i32;
-                                       buf_ptr++;
-                                       *(sljit_sw*)code_ptr = 
*(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
-                                       code_ptr += sizeof(sljit_sw);
-                                       buf_ptr += sizeof(sljit_sw) - 1;
-#else
-                                       code_ptr = 
generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
-                                       buf_ptr += sizeof(sljit_sw);
-#endif
-                               }
                                buf_ptr++;
                        }
                } while (buf_ptr < buf_end);
@@ -525,40 +549,85 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_gen
 
        jump = compiler->jumps;
        while (jump) {
+               jump_addr = jump->addr + executable_offset;
+
                if (jump->flags & PATCH_MB) {
-                       SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - 
(jump->addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - 
(jump->addr + sizeof(sljit_s8))) <= 127);
-                       *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr 
- (jump->addr + sizeof(sljit_s8)));
+                       SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - 
(jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - 
(jump_addr + sizeof(sljit_s8))) <= 127);
+                       *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr 
- (jump_addr + sizeof(sljit_s8)));
                } else if (jump->flags & PATCH_MW) {
                        if (jump->flags & JUMP_LABEL) {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-                               *(sljit_sw*)jump->addr = 
(sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
+                               sljit_unaligned_store_sw((void*)jump->addr, 
(sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
 #else
-                               SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - 
(jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && 
(sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= 
HALFWORD_MAX);
-                               *(sljit_s32*)jump->addr = 
(sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32)));
+                               SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - 
(jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && 
(sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= 
HALFWORD_MAX);
+                               sljit_unaligned_store_s32((void*)jump->addr, 
(sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
 #endif
                        }
                        else {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-                               *(sljit_sw*)jump->addr = 
(sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
+                               sljit_unaligned_store_sw((void*)jump->addr, 
(sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
 #else
-                               SLJIT_ASSERT((sljit_sw)(jump->u.target - 
(jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target 
- (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
-                               *(sljit_s32*)jump->addr = 
(sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32)));
+                               SLJIT_ASSERT((sljit_sw)(jump->u.target - 
(jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - 
(jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
+                               sljit_unaligned_store_s32((void*)jump->addr, 
(sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
 #endif
                        }
                }
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
                else if (jump->flags & PATCH_MD)
-                       *(sljit_sw*)jump->addr = jump->u.label->addr;
+                       sljit_unaligned_store_sw((void*)jump->addr, 
jump->u.label->addr);
 #endif
 
                jump = jump->next;
        }
 
-       /* Maybe we waste some space because of short jumps. */
+       /* Some space may be wasted because of short jumps. */
        SLJIT_ASSERT(code_ptr <= code + compiler->size);
        compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_offset = executable_offset;
        compiler->executable_size = code_ptr - code;
-       return (void*)code;
+       return (void*)(code + executable_offset);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 
feature_type)
+{
+       switch (feature_type) {
+       case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+               return SLJIT_IS_FPU_AVAILABLE;
+#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+               if (cpu_has_sse2 == -1)
+                       get_cpu_features();
+               return cpu_has_sse2;
+#else /* SLJIT_DETECT_SSE2 */
+               return 1;
+#endif /* SLJIT_DETECT_SSE2 */
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       case SLJIT_HAS_VIRTUAL_REGISTERS:
+               return 1;
+#endif
+
+       case SLJIT_HAS_CLZ:
+       case SLJIT_HAS_CMOV:
+               if (cpu_has_cmov == -1)
+                       get_cpu_features();
+               return cpu_has_cmov;
+
+       case SLJIT_HAS_PREF_SHIFT_REG:
+               return 1;
+
+       case SLJIT_HAS_SSE2:
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+               if (cpu_has_sse2 == -1)
+                       get_cpu_features();
+               return cpu_has_sse2;
+#else
+               return 1;
+#endif
+
+       default:
+               return 0;
+       }
 }
 
 /* --------------------------------------------------------------------- */
@@ -581,52 +650,8 @@ static sljit_s32 emit_mov(struct sljit_c
        sljit_s32 dst, sljit_sw dstw,
        sljit_s32 src, sljit_sw srcw);
 
-static SLJIT_INLINE sljit_s32 emit_save_flags(struct sljit_compiler *compiler)
-{
-       sljit_u8 *inst;
-
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-       inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
-       FAIL_IF(!inst);
-       INC_SIZE(5);
-#else
-       inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
-       FAIL_IF(!inst);
-       INC_SIZE(6);
-       *inst++ = REX_W;
-#endif
-       *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
-       *inst++ = 0x64;
-       *inst++ = 0x24;
-       *inst++ = (sljit_u8)sizeof(sljit_sw);
-       *inst++ = PUSHF;
-       compiler->flags_saved = 1;
-       return SLJIT_SUCCESS;
-}
-
-static SLJIT_INLINE sljit_s32 emit_restore_flags(struct sljit_compiler 
*compiler, sljit_s32 keep_flags)
-{
-       sljit_u8 *inst;
-
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-       inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
-       FAIL_IF(!inst);
-       INC_SIZE(5);
-       *inst++ = POPF;
-#else
-       inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
-       FAIL_IF(!inst);
-       INC_SIZE(6);
-       *inst++ = POPF;
-       *inst++ = REX_W;
-#endif
-       *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
-       *inst++ = 0x64;
-       *inst++ = 0x24;
-       *inst++ = (sljit_u8)(-(sljit_s8)sizeof(sljit_sw));
-       compiler->flags_saved = keep_flags;
-       return SLJIT_SUCCESS;
-}
+#define EMIT_MOV(compiler, dst, dstw, src, srcw) \
+       FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
 
 #ifdef _WIN32
 #include <malloc.h>
@@ -657,15 +682,8 @@ static sljit_s32 emit_mov(struct sljit_c
 {
        sljit_u8* inst;
 
-       if (dst == SLJIT_UNUSED) {
-               /* No destination, doesn't need to setup flags. */
-               if (src & SLJIT_MEM) {
-                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, 
src, srcw);
-                       FAIL_IF(!inst);
-                       *inst = MOV_r_rm;
-               }
-               return SLJIT_SUCCESS;
-       }
+       SLJIT_ASSERT(dst != SLJIT_UNUSED);
+
        if (FAST_IS_REG(src)) {
                inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
                FAIL_IF(!inst);
@@ -687,8 +705,10 @@ static sljit_s32 emit_mov(struct sljit_c
                }
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
                if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
-                       FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
-                       inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, 
dst, dstw);
+                       /* Immediate to memory move. Only SLJIT_MOV operation 
copies
+                          an immediate directly into memory so TMP_REG1 can be 
used. */
+                       FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, 
dst, dstw);
                        FAIL_IF(!inst);
                        *inst = MOV_rm_r;
                        return SLJIT_SUCCESS;
@@ -706,7 +726,8 @@ static sljit_s32 emit_mov(struct sljit_c
                return SLJIT_SUCCESS;
        }
 
-       /* Memory to memory move. Requires two instruction. */
+       /* Memory to memory move. Only SLJIT_MOV operation copies
+          data from memory to memory so TMP_REG1 can be used. */
        inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
        FAIL_IF(!inst);
        *inst = MOV_r_rm;
@@ -716,9 +737,6 @@ static sljit_s32 emit_mov(struct sljit_c
        return SLJIT_SUCCESS;
 }
 
-#define EMIT_MOV(compiler, dst, dstw, src, srcw) \
-       FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
-
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler 
*compiler, sljit_s32 op)
 {
        sljit_u8 *inst;
@@ -748,20 +766,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
        case SLJIT_DIVMOD_SW:
        case SLJIT_DIV_UW:
        case SLJIT_DIV_SW:
-               compiler->flags_saved = 0;
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 #ifdef _WIN64
-               SLJIT_COMPILE_ASSERT(
+               SLJIT_ASSERT(
                        reg_map[SLJIT_R0] == 0
                        && reg_map[SLJIT_R1] == 2
-                       && reg_map[TMP_REG1] > 7,
-                       invalid_register_assignment_for_div_mul);
+                       && reg_map[TMP_REG1] > 7);
 #else
-               SLJIT_COMPILE_ASSERT(
+               SLJIT_ASSERT(
                        reg_map[SLJIT_R0] == 0
                        && reg_map[SLJIT_R1] < 7
-                       && reg_map[TMP_REG1] == 2,
-                       invalid_register_assignment_for_div_mul);
+                       && reg_map[TMP_REG1] == 2);
 #endif
                compiler->mode32 = op & SLJIT_I32_OP;
 #endif
@@ -885,9 +900,6 @@ static sljit_s32 emit_mov_byte(struct sl
        compiler->mode32 = 0;
 #endif
 
-       if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
-               return SLJIT_SUCCESS; /* Empty instruction. */
-
        if (src & SLJIT_IMM) {
                if (FAST_IS_REG(dst)) {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@@ -1016,6 +1028,30 @@ static sljit_s32 emit_mov_byte(struct sl
        return SLJIT_SUCCESS;
 }
 
+static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
+       sljit_s32 src, sljit_sw srcw)
+{
+       sljit_u8* inst;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = 1;
+#endif
+
+       inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
+       FAIL_IF(!inst);
+       *inst++ = GROUP_0F;
+       *inst++ = PREFETCH;
+
+       if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
+               *inst |= (3 << 3);
+       else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
+               *inst |= (2 << 3);
+       else
+               *inst |= (1 << 3);
+
+       return SLJIT_SUCCESS;
+}
+
 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
        sljit_s32 dst, sljit_sw dstw,
        sljit_s32 src, sljit_sw srcw)
@@ -1027,9 +1063,6 @@ static sljit_s32 emit_mov_half(struct sl
        compiler->mode32 = 0;
 #endif
 
-       if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
-               return SLJIT_SUCCESS; /* Empty instruction. */
-
        if (src & SLJIT_IMM) {
                if (FAST_IS_REG(dst)) {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@@ -1073,14 +1106,6 @@ static sljit_s32 emit_unary(struct sljit
 {
        sljit_u8* inst;
 
-       if (dst == SLJIT_UNUSED) {
-               EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
-               inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
-               FAIL_IF(!inst);
-               *inst++ = GROUP_F7;
-               *inst |= opcode;
-               return SLJIT_SUCCESS;
-       }
        if (dst == src && dstw == srcw) {
                /* Same input and output */
                inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
@@ -1089,14 +1114,19 @@ static sljit_s32 emit_unary(struct sljit
                *inst |= opcode;
                return SLJIT_SUCCESS;
        }
+
+       if (dst == SLJIT_UNUSED)
+               dst = TMP_REG1;
+
        if (FAST_IS_REG(dst)) {
                EMIT_MOV(compiler, dst, 0, src, srcw);
-               inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+               inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
                FAIL_IF(!inst);
                *inst++ = GROUP_F7;
                *inst |= opcode;
                return SLJIT_SUCCESS;
        }
+
        EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
        inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
        FAIL_IF(!inst);
@@ -1112,20 +1142,12 @@ static sljit_s32 emit_not_with_flags(str
 {
        sljit_u8* inst;
 
-       if (dst == SLJIT_UNUSED) {
-               EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
-               inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
-               FAIL_IF(!inst);
-               *inst++ = GROUP_F7;
-               *inst |= NOT_rm;
-               inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 
0);
-               FAIL_IF(!inst);
-               *inst = OR_r_rm;
-               return SLJIT_SUCCESS;
-       }
+       if (dst == SLJIT_UNUSED)
+               dst = TMP_REG1;
+
        if (FAST_IS_REG(dst)) {
                EMIT_MOV(compiler, dst, 0, src, srcw);
-               inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+               inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
                FAIL_IF(!inst);
                *inst++ = GROUP_F7;
                *inst |= NOT_rm;
@@ -1134,6 +1156,7 @@ static sljit_s32 emit_not_with_flags(str
                *inst = OR_r_rm;
                return SLJIT_SUCCESS;
        }
+
        EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
        inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
        FAIL_IF(!inst);
@@ -1146,6 +1169,10 @@ static sljit_s32 emit_not_with_flags(str
        return SLJIT_SUCCESS;
 }
 
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+static const sljit_sw emit_clz_arg = 32 + 31;
+#endif
+
 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
        sljit_s32 dst, sljit_sw dstw,
        sljit_s32 src, sljit_sw srcw)
@@ -1154,22 +1181,6 @@ static sljit_s32 emit_clz(struct sljit_c
        sljit_s32 dst_r;
 
        SLJIT_UNUSED_ARG(op_flags);
-       if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
-               /* Just set the zero flag. */
-               EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
-               inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
-               FAIL_IF(!inst);
-               *inst++ = GROUP_F7;
-               *inst |= NOT_rm;
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, 
SLJIT_IMM, 31, TMP_REG1, 0);
-#else
-               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, 
SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0);
-#endif
-               FAIL_IF(!inst);
-               *inst |= SHR;
-               return SLJIT_SUCCESS;
-       }
 
        if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
                EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
@@ -1177,81 +1188,53 @@ static sljit_s32 emit_clz(struct sljit_c
                srcw = 0;
        }
 
-       inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
+       if (cpu_has_cmov == -1)
+               get_cpu_features();
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+       inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
        FAIL_IF(!inst);
        *inst++ = GROUP_0F;
        *inst = BSR_r_rm;
 
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-       if (FAST_IS_REG(dst))
-               dst_r = dst;
-       else {
-               /* Find an unused temporary register. */
-               if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != 
TO_OFFS_REG(SLJIT_R0))
-                       dst_r = SLJIT_R0;
-               else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) 
!= TO_OFFS_REG(SLJIT_R1))
-                       dst_r = SLJIT_R1;
+       if (cpu_has_cmov) {
+               if (dst_r != TMP_REG1) {
+                       EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
+                       inst = emit_x86_instruction(compiler, 2, dst_r, 0, 
TMP_REG1, 0);
+               }
                else
-                       dst_r = SLJIT_R2;
-               EMIT_MOV(compiler, dst, dstw, dst_r, 0);
-       }
-       EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
-#else
-       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
-       compiler->mode32 = 0;
-       EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 
+ 63 : 32 + 31);
-       compiler->mode32 = op_flags & SLJIT_I32_OP;
-#endif
-
-       if (cpu_has_cmov == -1)
-               get_cpu_features();
+                       inst = emit_x86_instruction(compiler, 2, dst_r, 0, 
SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
 
-       if (cpu_has_cmov) {
-               inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
                FAIL_IF(!inst);
                *inst++ = GROUP_0F;
-               *inst = CMOVNE_r_rm;
-       } else {
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-               inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
-               FAIL_IF(!inst);
-               INC_SIZE(4);
+               *inst = CMOVE_r_rm;
+       }
+       else
+               FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, 
SLJIT_IMM, 32 + 31));
 
-               *inst++ = JE_i8;
-               *inst++ = 2;
-               *inst++ = MOV_r_rm;
-               *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
+       inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, 
dst_r, 0);
 #else
-               inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
-               FAIL_IF(!inst);
-               INC_SIZE(5);
+       if (cpu_has_cmov) {
+               EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & 
SLJIT_I32_OP) ? (64 + 63) : (32 + 31));
 
-               *inst++ = JE_i8;
-               *inst++ = 3;
-               *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | 
(reg_map[TMP_REG1] >= 8 ? REX_B : 0);
-               *inst++ = MOV_r_rm;
-               *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
-#endif
+               inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_0F;
+               *inst = CMOVE_r_rm;
        }
+       else
+               FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, 
SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)));
 
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-       inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, 
dst_r, 0);
-#else
        inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 
!(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
 #endif
+
        FAIL_IF(!inst);
        *(inst + 1) |= XOR;
 
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-       if (dst & SLJIT_MEM) {
-               inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
-               FAIL_IF(!inst);
-               *inst = XCHG_r_rm;
-       }
-#else
        if (dst & SLJIT_MEM)
-               EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
-#endif
+               EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
        return SLJIT_SUCCESS;
 }
 
@@ -1259,7 +1242,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
        sljit_s32 dst, sljit_sw dstw,
        sljit_s32 src, sljit_sw srcw)
 {
-       sljit_u8* inst;
        sljit_s32 update = 0;
        sljit_s32 op_flags = GET_ALL_FLAGS(op);
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@@ -1280,7 +1262,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
        compiler->mode32 = op_flags & SLJIT_I32_OP;
 #endif
 
+       if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
+               if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
+                       return emit_prefetch(compiler, op, src, srcw);
+               return SLJIT_SUCCESS;
+       }
+
        op = GET_OPCODE(op);
+
        if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
                compiler->mode32 = 0;
@@ -1338,14 +1327,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
 #endif
                }
 
-               if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg 
&& (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
-                       inst = emit_x86_instruction(compiler, 1, src & 
REG_MASK, 0, src, srcw);
-                       FAIL_IF(!inst);
-                       *inst = LEA_r_m;
-                       src &= SLJIT_MEM | 0xf;
-                       srcw = 0;
-               }
-
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
                if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == 
SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & 
SLJIT_MEM))) {
                        SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
@@ -1389,31 +1370,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
                        return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, 
TMP_REG1, 0);
 #endif
 
-               if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 
REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
-                       inst = emit_x86_instruction(compiler, 1, dst & 
REG_MASK, 0, dst, dstw);
-                       FAIL_IF(!inst);
-                       *inst = LEA_r_m;
+               if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg 
&& (src & REG_MASK)) {
+                       if ((src & OFFS_REG_MASK) != 0) {
+                               FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, 
ADD_rm_r, ADD, ADD_EAX_i32,
+                                               (src & REG_MASK), 0, (src & 
REG_MASK), 0, OFFS_REG(dst), 0));
+                       }
+                       else if (srcw != 0) {
+                               FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, 
ADD_rm_r, ADD, ADD_EAX_i32,
+                                               (src & REG_MASK), 0, (src & 
REG_MASK), 0, SLJIT_IMM, srcw));
+                       }
+               }
+
+               if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 
REG_MASK)) {
+                       if ((dst & OFFS_REG_MASK) != 0) {
+                               FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, 
ADD_rm_r, ADD, ADD_EAX_i32,
+                                               (dst & REG_MASK), 0, (dst & 
REG_MASK), 0, OFFS_REG(dst), 0));
+                       }
+                       else if (dstw != 0) {
+                               FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, 
ADD_rm_r, ADD, ADD_EAX_i32,
+                                               (dst & REG_MASK), 0, (dst & 
REG_MASK), 0, SLJIT_IMM, dstw));
+                       }
                }
                return SLJIT_SUCCESS;
        }
 
-       if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
-               compiler->flags_saved = 0;
-
        switch (op) {
        case SLJIT_NOT:
-               if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
+               if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
                        return emit_not_with_flags(compiler, dst, dstw, src, 
srcw);
                return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
 
        case SLJIT_NEG:
-               if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && 
!compiler->flags_saved)
-                       FAIL_IF(emit_save_flags(compiler));
                return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
 
        case SLJIT_CLZ:
-               if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && 
!compiler->flags_saved)
-                       FAIL_IF(emit_save_flags(compiler));
                return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
        }
 
@@ -1433,8 +1423,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
                *(inst + 1) |= (op_imm); \
        } \
        else { \
-               FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
-               inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, 
argw); \
+               FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 
: TMP_REG1, immw)); \
+               inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? 
TMP_REG2 : TMP_REG1, 0, arg, argw); \
                FAIL_IF(!inst); \
                *inst = (op_mr); \
        }
@@ -1660,7 +1650,7 @@ static sljit_s32 emit_mul(struct sljit_c
        sljit_u8* inst;
        sljit_s32 dst_r;
 
-       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+       dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
 
        /* Register destination. */
        if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
@@ -1699,7 +1689,7 @@ static sljit_s32 emit_mul(struct sljit_c
                        inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
                        FAIL_IF(!inst);
                        INC_SIZE(4);
-                       *(sljit_sw*)inst = src1w;
+                       sljit_unaligned_store_sw(inst, src1w);
                }
 #else
                else if (IS_HALFWORD(src1w)) {
@@ -1709,12 +1699,12 @@ static sljit_s32 emit_mul(struct sljit_c
                        inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
                        FAIL_IF(!inst);
                        INC_SIZE(4);
-                       *(sljit_s32*)inst = (sljit_s32)src1w;
+                       sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
                }
                else {
-                       EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
                        if (dst_r != src2)
                                EMIT_MOV(compiler, dst_r, 0, src2, src2w);
+                       FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
                        inst = emit_x86_instruction(compiler, 2, dst_r, 0, 
TMP_REG2, 0);
                        FAIL_IF(!inst);
                        *inst++ = GROUP_0F;
@@ -1742,7 +1732,7 @@ static sljit_s32 emit_mul(struct sljit_c
                        inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
                        FAIL_IF(!inst);
                        INC_SIZE(4);
-                       *(sljit_sw*)inst = src2w;
+                       sljit_unaligned_store_sw(inst, src2w);
                }
 #else
                else if (IS_HALFWORD(src2w)) {
@@ -1752,12 +1742,12 @@ static sljit_s32 emit_mul(struct sljit_c
                        inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
                        FAIL_IF(!inst);
                        INC_SIZE(4);
-                       *(sljit_s32*)inst = (sljit_s32)src2w;
+                       sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
                }
                else {
-                       EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
                        if (dst_r != src1)
                                EMIT_MOV(compiler, dst_r, 0, src1, src1w);
+                       FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
                        inst = emit_x86_instruction(compiler, 2, dst_r, 0, 
TMP_REG2, 0);
                        FAIL_IF(!inst);
                        *inst++ = GROUP_0F;
@@ -1776,13 +1766,13 @@ static sljit_s32 emit_mul(struct sljit_c
                *inst = IMUL_r_rm;
        }
 
-       if (dst_r == TMP_REG1)
+       if (dst & SLJIT_MEM)
                EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 
        return SLJIT_SUCCESS;
 }
 
-static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, sljit_s32 
keep_flags,
+static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
        sljit_s32 dst, sljit_sw dstw,
        sljit_s32 src1, sljit_sw src1w,
        sljit_s32 src2, sljit_sw src2w)
@@ -1791,12 +1781,10 @@ static sljit_s32 emit_lea_binary(struct
        sljit_s32 dst_r, done = 0;
 
        /* These cases better be left to handled by normal way. */
-       if (!keep_flags) {
-               if (dst == src1 && dstw == src1w)
-                       return SLJIT_ERR_UNSUPPORTED;
-               if (dst == src2 && dstw == src2w)
-                       return SLJIT_ERR_UNSUPPORTED;
-       }
+       if (dst == src1 && dstw == src1w)
+               return SLJIT_ERR_UNSUPPORTED;
+       if (dst == src2 && dstw == src2w)
+               return SLJIT_ERR_UNSUPPORTED;
 
        dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 
@@ -1908,7 +1896,7 @@ static sljit_s32 emit_test_binary(struct
        }
 
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-       if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < 
-128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+       if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < 
-128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 #else
        if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < 
-128)) {
 #endif
@@ -1925,8 +1913,8 @@ static sljit_s32 emit_test_binary(struct
                                *inst = GROUP_F7;
                        }
                        else {
-                               FAIL_IF(emit_load_imm64(compiler, TMP_REG2, 
src2w));
-                               inst = emit_x86_instruction(compiler, 1, 
TMP_REG2, 0, src1, src1w);
+                               FAIL_IF(emit_load_imm64(compiler, TMP_REG1, 
src2w));
+                               inst = emit_x86_instruction(compiler, 1, 
TMP_REG1, 0, src1, src1w);
                                FAIL_IF(!inst);
                                *inst = TEST_rm_r;
                        }
@@ -1954,8 +1942,8 @@ static sljit_s32 emit_test_binary(struct
                                *inst = GROUP_F7;
                        }
                        else {
-                               FAIL_IF(emit_load_imm64(compiler, TMP_REG2, 
src1w));
-                               inst = emit_x86_instruction(compiler, 1, 
TMP_REG2, 0, src2, src2w);
+                               FAIL_IF(emit_load_imm64(compiler, TMP_REG1, 
src1w));
+                               inst = emit_x86_instruction(compiler, 1, 
TMP_REG1, 0, src2, src2w);
                                FAIL_IF(!inst);
                                *inst = TEST_rm_r;
                        }
@@ -2067,25 +2055,29 @@ static sljit_s32 emit_shift(struct sljit
                EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
        }
        else {
-               /* This case is really difficult, since ecx itself may used for
-                  addressing, and we must ensure to work even in that case. */
+               /* This case is complex since ecx itself may be used for
+                  addressing, and this case must be supported as well. */
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
                EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-               EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
-#else
-               /* [esp+0] contains the flags. */
-               EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), 
SLJIT_PREF_SHIFT_REG, 0);
-#endif
+               EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, 
SLJIT_PREF_SHIFT_REG, 0);
                EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
                inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, 
SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
                FAIL_IF(!inst);
                *inst |= mode;
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, 
SLJIT_MEM1(SLJIT_SP), 0);
+               EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 #else
-               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, 
SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
-#endif
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               EMIT_MOV(compiler, TMP_REG2, 0, src2, src2w);
+               inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, 
SLJIT_PREF_SHIFT_REG, 0);
+               FAIL_IF(!inst);
+               *inst = XCHG_r_rm;
+               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, 
SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst |= mode;
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
                EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+#endif
        }
 
        return SLJIT_SUCCESS;
@@ -2144,54 +2136,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
        compiler->mode32 = op & SLJIT_I32_OP;
 #endif
 
-       if (GET_OPCODE(op) >= SLJIT_MUL) {
-               if (SLJIT_UNLIKELY(GET_FLAGS(op)))
-                       compiler->flags_saved = 0;
-               else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && 
!compiler->flags_saved)
-                       FAIL_IF(emit_save_flags(compiler));
-       }
+       if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+               return SLJIT_SUCCESS;
 
        switch (GET_OPCODE(op)) {
        case SLJIT_ADD:
-               if (!GET_FLAGS(op)) {
-                       if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, 
dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
+               if (!HAS_FLAGS(op)) {
+                       if (emit_lea_binary(compiler, dst, dstw, src1, src1w, 
src2, src2w) != SLJIT_ERR_UNSUPPORTED)
                                return compiler->error;
                }
-               else
-                       compiler->flags_saved = 0;
-               if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && 
!compiler->flags_saved)
-                       FAIL_IF(emit_save_flags(compiler));
                return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, 
ADD_EAX_i32,
                        dst, dstw, src1, src1w, src2, src2w);
        case SLJIT_ADDC:
-               if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be 
restored. */
-                       FAIL_IF(emit_restore_flags(compiler, 1));
-               else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
-                       FAIL_IF(emit_save_flags(compiler));
-               if (SLJIT_UNLIKELY(GET_FLAGS(op)))
-                       compiler->flags_saved = 0;
                return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, 
ADC_EAX_i32,
                        dst, dstw, src1, src1w, src2, src2w);
        case SLJIT_SUB:
-               if (!GET_FLAGS(op)) {
-                       if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op 
& SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != 
SLJIT_ERR_UNSUPPORTED)
+               if (!HAS_FLAGS(op)) {
+                       if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, 
dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
                                return compiler->error;
                }
-               else
-                       compiler->flags_saved = 0;
-               if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && 
!compiler->flags_saved)
-                       FAIL_IF(emit_save_flags(compiler));
+
                if (dst == SLJIT_UNUSED)
                        return emit_cmp_binary(compiler, src1, src1w, src2, 
src2w);
                return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, 
SUB_EAX_i32,
                        dst, dstw, src1, src1w, src2, src2w);
        case SLJIT_SUBC:
-               if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be 
restored. */
-                       FAIL_IF(emit_restore_flags(compiler, 1));
-               else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
-                       FAIL_IF(emit_save_flags(compiler));
-               if (SLJIT_UNLIKELY(GET_FLAGS(op)))
-                       compiler->flags_saved = 0;
                return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, 
SBB_EAX_i32,
                        dst, dstw, src1, src1w, src2, src2w);
        case SLJIT_MUL:
@@ -2208,13 +2177,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
                return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, 
XOR_EAX_i32,
                        dst, dstw, src1, src1w, src2, src2w);
        case SLJIT_SHL:
-               return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
+               return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
                        dst, dstw, src1, src1w, src2, src2w);
        case SLJIT_LSHR:
-               return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
+               return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
                        dst, dstw, src1, src1w, src2, src2w);
        case SLJIT_ASHR:
-               return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
+               return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
                        dst, dstw, src1, src1w, src2, src2w);
        }
 
@@ -2225,7 +2194,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
 {
        CHECK_REG_INDEX(check_sljit_get_register_index(reg));
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-       if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
+       if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
                return -1;
 #endif
        return reg_map[reg];
@@ -2248,7 +2217,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
        inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
        FAIL_IF(!inst);
        INC_SIZE(size);
-       SLJIT_MEMMOVE(inst, instruction, size);
+       SLJIT_MEMCPY(inst, instruction, size);
        return SLJIT_SUCCESS;
 }
 
@@ -2256,36 +2225,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
 /*  Floating point operators                                             */
 /* --------------------------------------------------------------------- */
 
-/* Alignment + 2 * 16 bytes. */
-static sljit_s32 sse2_data[3 + (4 + 4) * 2];
+/* Alignment(3) + 4 * 16 bytes. */
+static sljit_s32 sse2_data[3 + (4 * 4)];
 static sljit_s32 *sse2_buffer;
 
 static void init_compiler(void)
 {
+       /* Align to 16 bytes. */
        sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
-       /* Single precision constants. */
+
+       /* Single precision constants (each constant is 16 byte long). */
        sse2_buffer[0] = 0x80000000;
        sse2_buffer[4] = 0x7fffffff;
-       /* Double precision constants. */
+       /* Double precision constants (each constant is 16 byte long). */
        sse2_buffer[8] = 0;
        sse2_buffer[9] = 0x80000000;
        sse2_buffer[12] = 0xffffffff;
        sse2_buffer[13] = 0x7fffffff;
 }
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
-       return SLJIT_IS_FPU_AVAILABLE;
-#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
-       if (cpu_has_sse2 == -1)
-               get_cpu_features();
-       return cpu_has_sse2;
-#else /* SLJIT_DETECT_SSE2 */
-       return 1;
-#endif /* SLJIT_DETECT_SSE2 */
-}
-
 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
        sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
 {
@@ -2326,7 +2284,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit
        sljit_s32 dst, sljit_sw dstw,
        sljit_s32 src, sljit_sw srcw)
 {
-       sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+       sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
        sljit_u8 *inst;
 
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -2339,7 +2297,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit
        *inst++ = GROUP_0F;
        *inst = CVTTSD2SI_r_xm;
 
-       if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
+       if (dst & SLJIT_MEM)
                return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
        return SLJIT_SUCCESS;
 }
@@ -2383,7 +2341,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit
        sljit_s32 src1, sljit_sw src1w,
        sljit_s32 src2, sljit_sw src2w)
 {
-       compiler->flags_saved = 0;
        if (!FAST_IS_REG(src1)) {
                FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, 
src1, src1w));
                src1 = TMP_FREG;
@@ -2432,7 +2389,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
                return SLJIT_SUCCESS;
        }
 
-       if (SLOW_IS_REG(dst)) {
+       if (FAST_IS_REG(dst)) {
                dst_r = dst;
                if (dst != src)
                        FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, 
dst_r, src, srcw));
@@ -2530,11 +2487,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_la
        CHECK_ERROR_PTR();
        CHECK_PTR(check_sljit_emit_label(compiler));
 
-       /* We should restore the flags before the label,
-          since other taken jumps has their own flags as well. */
-       if (SLJIT_UNLIKELY(compiler->flags_saved))
-               PTR_FAIL_IF(emit_restore_flags(compiler, 0));
-
        if (compiler->last_label && compiler->last_label->size == 
compiler->size)
                return compiler->last_label;
 
@@ -2559,12 +2511,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_ju
        CHECK_ERROR_PTR();
        CHECK_PTR(check_sljit_emit_jump(compiler, type));
 
-       if (SLJIT_UNLIKELY(compiler->flags_saved)) {
-               if ((type & 0xff) <= SLJIT_JUMP)
-                       PTR_FAIL_IF(emit_restore_flags(compiler, 0));
-               compiler->flags_saved = 0;
-       }
-
        jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct 
sljit_jump));
        PTR_FAIL_IF_NULL(jump);
        set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
@@ -2584,10 +2530,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_ju
        PTR_FAIL_IF_NULL(inst);
 
        *inst++ = 0;
-       *inst++ = type + 4;
+       *inst++ = type + 2;
        return jump;
 }
 
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#ifndef _WIN64
+#define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R3)
+#else
+#define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R2)
+#endif
+#endif
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler 
*compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
 {
        sljit_u8 *inst;
@@ -2599,12 +2553,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
 
        CHECK_EXTRA_REGS(src, srcw, (void)0);
 
-       if (SLJIT_UNLIKELY(compiler->flags_saved)) {
-               if (type <= SLJIT_JUMP)
-                       FAIL_IF(emit_restore_flags(compiler, 0));
-               compiler->flags_saved = 0;
-       }
-
        if (type >= SLJIT_CALL1) {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
@@ -2615,11 +2563,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
                if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
                        srcw += sizeof(sljit_sw);
 #endif
-#endif
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
-               if (src == SLJIT_R2) {
-                       EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
-                       src = TMP_REG1;
+#else
+               if ((src & SLJIT_MEM) || IS_REG_CHANGED_BY_CALL(src, type)) {
+                       EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
+                       src = TMP_REG2;
                }
 #endif
                FAIL_IF(call_with_args(compiler, type));
@@ -2642,7 +2589,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
                FAIL_IF_NULL(inst);
 
                *inst++ = 0;
-               *inst++ = type + 4;
+               *inst++ = type + 2;
        }
        else {
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -2659,37 +2606,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler 
*compiler, sljit_s32 op,
        sljit_s32 dst, sljit_sw dstw,
-       sljit_s32 src, sljit_sw srcw,
        sljit_s32 type)
 {
        sljit_u8 *inst;
        sljit_u8 cond_set = 0;
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
        sljit_s32 reg;
-#else
-       /* CHECK_EXTRA_REGS migh overwrite these values. */
+#endif
+       /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these 
values. */
        sljit_s32 dst_save = dst;
        sljit_sw dstw_save = dstw;
-#endif
 
        CHECK_ERROR();
-       CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, 
type));
-       SLJIT_UNUSED_ARG(srcw);
-
-       if (dst == SLJIT_UNUSED)
-               return SLJIT_SUCCESS;
+       CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
 
        ADJUST_LOCAL_OFFSET(dst, dstw);
        CHECK_EXTRA_REGS(dst, dstw, (void)0);
-       if (SLJIT_UNLIKELY(compiler->flags_saved))
-               FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
 
        type &= 0xff;
        /* setcc = jcc + 0x10. */
        cond_set = get_jump_code(type) + 0x10;
 
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-       if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && 
FAST_IS_REG(dst) && dst == src) {
+       if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && 
FAST_IS_REG(dst)) {
                inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
                FAIL_IF(!inst);
                INC_SIZE(4 + 3);
@@ -2704,7 +2643,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
                return SLJIT_SUCCESS;
        }
 
-       reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
+       reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
 
        inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
        FAIL_IF(!inst);
@@ -2715,6 +2654,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
        *inst++ = cond_set;
        *inst++ = MOD_REG | reg_lmap[reg];
        *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
+       /* The movzx instruction does not affect flags. */
        *inst++ = GROUP_0F;
        *inst++ = MOVZX_r_rm8;
        *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
@@ -2726,12 +2666,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
                compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
                return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
        }
+
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
                || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
        compiler->skip_checks = 1;
 #endif
-       return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
-#else /* SLJIT_CONFIG_X86_64 */
+       return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, 
dstw_save, TMP_REG1, 0);
+
+#else
+       /* The SLJIT_CONFIG_X86_32 code path starts here. */
        if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
                if (reg_map[dst] <= 4) {
                        /* Low byte is accessible. */
@@ -2785,8 +2728,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
                return SLJIT_SUCCESS;
        }
 
-       if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && 
FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
-               SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, 
scratch_reg1_must_be_eax);
+       if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && 
FAST_IS_REG(dst) && reg_map[dst] <= 4) {
+               SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
+
                if (dst != SLJIT_R0) {
                        inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 
1);
                        FAIL_IF(!inst);
@@ -2845,6 +2789,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
 #endif /* SLJIT_CONFIG_X86_64 */
 }
 
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler 
*compiler, sljit_s32 type,
+       sljit_s32 dst_reg,
+       sljit_s32 src, sljit_sw srcw)
+{
+       sljit_u8* inst;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       dst_reg &= ~SLJIT_I32_OP;
+
+       if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && 
dst_reg <= SLJIT_S3))
+               return sljit_emit_cmov_generic(compiler, type, dst_reg, src, 
srcw);
+#else
+       if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
+               return sljit_emit_cmov_generic(compiler, type, dst_reg, src, 
srcw);
+#endif
+
+       /* ADJUST_LOCAL_OFFSET is not needed. */
+       CHECK_EXTRA_REGS(src, srcw, (void)0);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = dst_reg & SLJIT_I32_OP;
+       dst_reg &= ~SLJIT_I32_OP;
+#endif
+
+       if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+               EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
+       FAIL_IF(!inst);
+       *inst++ = GROUP_0F;
+       *inst = get_jump_code(type & 0xff) - 0x40;
+       return SLJIT_SUCCESS;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler 
*compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
 {
        CHECK_ERROR();
@@ -2863,16 +2847,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit
        if (NOT_HALFWORD(offset)) {
                FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
-               SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, 
dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
+               SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, 
TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
                return compiler->error;
 #else
-               return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, 
SLJIT_SP, 0, TMP_REG1, 0);
+               return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, 
TMP_REG1, 0);
 #endif
        }
 #endif
 
        if (offset != 0)
-               return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, 
SLJIT_SP, 0, SLJIT_IMM, offset);
+               return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, 
SLJIT_IMM, offset);
        return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
 }
 
@@ -2896,14 +2880,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_co
 
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
        compiler->mode32 = 0;
-       reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+       reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
 
        if (emit_load_imm64(compiler, reg, init_value))
                return NULL;
 #else
-       if (dst == SLJIT_UNUSED)
-               dst = TMP_REG1;
-
        if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
                return NULL;
 #endif
@@ -2923,82 +2904,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_co
        return const_;
 }
 
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw 
new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw 
new_target, sljit_sw executable_offset)
 {
+       SLJIT_UNUSED_ARG(executable_offset);
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-       *(sljit_sw*)addr = new_addr - (addr + 4);
+       sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - 
(sljit_uw)executable_offset);
 #else
-       *(sljit_uw*)addr = new_addr;
+       sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
 #endif
 }
 
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw 
new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw 
new_constant, sljit_sw executable_offset)
 {
-       *(sljit_sw*)addr = new_constant;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void)
-{
-#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
-       if (cpu_has_sse2 == -1)
-               get_cpu_features();
-       return cpu_has_sse2;
-#else
-       return 1;
-#endif
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void)
-{
-       if (cpu_has_cmov == -1)
-               get_cpu_features();
-       return cpu_has_cmov;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler 
*compiler,
-       sljit_s32 type,
-       sljit_s32 dst_reg,
-       sljit_s32 src, sljit_sw srcw)
-{
-       sljit_u8* inst;
-
-       CHECK_ERROR();
-#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
-       CHECK_ARGUMENT(sljit_x86_is_cmov_available());
-       CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
-       CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= 
SLJIT_ORDERED_F64);
-       CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
-       FUNCTION_CHECK_SRC(src, srcw);
-#endif
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
-       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
-               fprintf(compiler->verbose, "  x86_cmov%s %s%s, ",
-                       !(dst_reg & SLJIT_I32_OP) ? "" : ".i",
-                       jump_names[type & 0xff], JUMP_POSTFIX(type));
-               sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP);
-               fprintf(compiler->verbose, ", ");
-               sljit_verbose_param(compiler, src, srcw);
-               fprintf(compiler->verbose, "\n");
-       }
-#endif
-
-       ADJUST_LOCAL_OFFSET(src, srcw);
-       CHECK_EXTRA_REGS(src, srcw, (void)0);
-
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-       compiler->mode32 = dst_reg & SLJIT_I32_OP;
-#endif
-       dst_reg &= ~SLJIT_I32_OP;
-
-       if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
-               EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
-               src = TMP_REG1;
-               srcw = 0;
-       }
-
-       inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
-       FAIL_IF(!inst);
-       *inst++ = GROUP_0F;
-       *inst = get_jump_code(type & 0xff) - 0x40;
-       return SLJIT_SUCCESS;
+       SLJIT_UNUSED_ARG(executable_offset);
+       sljit_unaligned_store_sw((void*)addr, new_constant);
 }

Modified: tomcat/jk/trunk/native/iis/pcre/sljit/sljitUtils.c
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/sljit/sljitUtils.c?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
--- tomcat/jk/trunk/native/iis/pcre/sljit/sljitUtils.c (original)
+++ tomcat/jk/trunk/native/iis/pcre/sljit/sljitUtils.c Tue Nov 21 14:37:37 2017
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmes...@freemail.hu). All rights 
reserved.
+ *    Copyright Zoltan Herczeg (hzmes...@freemail.hu). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without 
modification, are
  * permitted provided that the following conditions are met:
@@ -206,10 +206,7 @@ static sljit_sw sljit_page_align = 0;
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL 
sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data)
 {
        struct sljit_stack *stack;
-       union {
-               void *ptr;
-               sljit_uw uw;
-       } base;
+       void *ptr;
 #ifdef _WIN32
        SYSTEM_INFO si;
 #endif
@@ -233,29 +230,29 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_st
        }
 #endif
 
-       /* Align limit and max_limit. */
-       max_limit = (max_limit + sljit_page_align) & ~sljit_page_align;
-
        stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), 
allocator_data);
        if (!stack)
                return NULL;
 
+       /* Align max_limit. */
+       max_limit = (max_limit + sljit_page_align) & ~sljit_page_align;
+
 #ifdef _WIN32
-       base.ptr = VirtualAlloc(NULL, max_limit, MEM_RESERVE, PAGE_READWRITE);
-       if (!base.ptr) {
+       ptr = VirtualAlloc(NULL, max_limit, MEM_RESERVE, PAGE_READWRITE);
+       if (!ptr) {
                SLJIT_FREE(stack, allocator_data);
                return NULL;
        }
-       stack->base = base.uw;
+       stack->max_limit = (sljit_u8 *)ptr;
+       stack->base = stack->max_limit + max_limit;
        stack->limit = stack->base;
-       stack->max_limit = stack->base + max_limit;
-       if (sljit_stack_resize(stack, stack->base + limit)) {
+       if (sljit_stack_resize(stack, stack->base - limit)) {
                sljit_free_stack(stack, allocator_data);
                return NULL;
        }
 #else
 #ifdef MAP_ANON
-       base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE | 
MAP_ANON, -1, 0);
+       ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE | 
MAP_ANON, -1, 0);
 #else
        if (dev_zero < 0) {
                if (open_dev_zero()) {
@@ -263,15 +260,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_st
                        return NULL;
                }
        }
-       base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE, 
dev_zero, 0);
+       ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE, 
dev_zero, 0);
 #endif
-       if (base.ptr == MAP_FAILED) {
+       if (ptr == MAP_FAILED) {
                SLJIT_FREE(stack, allocator_data);
                return NULL;
        }
-       stack->base = base.uw;
-       stack->limit = stack->base + limit;
-       stack->max_limit = stack->base + max_limit;
+       stack->max_limit = (sljit_u8 *)ptr;
+       stack->base = stack->max_limit + max_limit;
+       stack->limit = stack->base - limit;
 #endif
        stack->top = stack->base;
        return stack;
@@ -279,53 +276,53 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_st
 
 #undef PAGE_ALIGN
 
-SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* 
stack, void *allocator_data)
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack 
*stack, void *allocator_data)
 {
        SLJIT_UNUSED_ARG(allocator_data);
 #ifdef _WIN32
-       VirtualFree((void*)stack->base, 0, MEM_RELEASE);
+       VirtualFree((void*)stack->max_limit, 0, MEM_RELEASE);
 #else
-       munmap((void*)stack->base, stack->max_limit - stack->base);
+       munmap((void*)stack->max_limit, stack->base - stack->max_limit);
 #endif
        SLJIT_FREE(stack, allocator_data);
 }
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct 
sljit_stack* stack, sljit_uw new_limit)
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct 
sljit_stack *stack, sljit_u8 *new_limit)
 {
        sljit_uw aligned_old_limit;
        sljit_uw aligned_new_limit;
 
-       if ((new_limit > stack->max_limit) || (new_limit < stack->base))
+       if ((new_limit < stack->max_limit) || (new_limit >= stack->base))
                return -1;
 #ifdef _WIN32
-       aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align;
-       aligned_old_limit = (stack->limit + sljit_page_align) & 
~sljit_page_align;
+       aligned_new_limit = (sljit_uw)new_limit & ~sljit_page_align;
+       aligned_old_limit = ((sljit_uw)stack->limit) & ~sljit_page_align;
        if (aligned_new_limit != aligned_old_limit) {
-               if (aligned_new_limit > aligned_old_limit) {
-                       if (!VirtualAlloc((void*)aligned_old_limit, 
aligned_new_limit - aligned_old_limit, MEM_COMMIT, PAGE_READWRITE))
+               if (aligned_new_limit < aligned_old_limit) {
+                       if (!VirtualAlloc((void*)aligned_new_limit, 
aligned_old_limit - aligned_new_limit, MEM_COMMIT, PAGE_READWRITE))
                                return -1;
                }
                else {
-                       if (!VirtualFree((void*)aligned_new_limit, 
aligned_old_limit - aligned_new_limit, MEM_DECOMMIT))
+                       if (!VirtualFree((void*)aligned_old_limit, 
aligned_new_limit - aligned_old_limit, MEM_DECOMMIT))
                                return -1;
                }
        }
        stack->limit = new_limit;
        return 0;
 #else
-       if (new_limit >= stack->limit) {
+       if (new_limit <= stack->limit) {
                stack->limit = new_limit;
                return 0;
        }
-       aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align;
-       aligned_old_limit = (stack->limit + sljit_page_align) & 
~sljit_page_align;
+       aligned_new_limit = (sljit_uw)new_limit & ~sljit_page_align;
+       aligned_old_limit = ((sljit_uw)stack->limit) & ~sljit_page_align;
        /* If madvise is available, we release the unnecessary space. */
 #if defined(MADV_DONTNEED)
-       if (aligned_new_limit < aligned_old_limit)
-               madvise((void*)aligned_new_limit, aligned_old_limit - 
aligned_new_limit, MADV_DONTNEED);
+       if (aligned_new_limit > aligned_old_limit)
+               madvise((void*)aligned_old_limit, aligned_new_limit - 
aligned_old_limit, MADV_DONTNEED);
 #elif defined(POSIX_MADV_DONTNEED)
-       if (aligned_new_limit < aligned_old_limit)
-               posix_madvise((void*)aligned_new_limit, aligned_old_limit - 
aligned_new_limit, POSIX_MADV_DONTNEED);
+       if (aligned_new_limit > aligned_old_limit)
+               posix_madvise((void*)aligned_old_limit, aligned_new_limit - 
aligned_old_limit, POSIX_MADV_DONTNEED);
 #endif
        stack->limit = new_limit;
        return 0;

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testinput1
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testinput1?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testinput12
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testinput12?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testinput15
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testinput15?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testinput16
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testinput16?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testinput19
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testinput19?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testinput2
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testinput2?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testinput6
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testinput6?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testinput7
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testinput7?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testinput8
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testinput8?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testoutput1
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testoutput1?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testoutput12
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testoutput12?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testoutput15
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testoutput15?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testoutput16
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testoutput16?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testoutput19
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testoutput19?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testoutput2
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testoutput2?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testoutput6
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testoutput6?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testoutput7
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testoutput7?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.

Modified: tomcat/jk/trunk/native/iis/pcre/testdata/testoutput8
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/iis/pcre/testdata/testoutput8?rev=1815927&r1=1815926&r2=1815927&view=diff
==============================================================================
Binary files - no diff available.



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to