Jim Keniston wrote:
> On Tue, 2009-02-10 at 10:12 +0530, Ananth N Mavinakayanahalli wrote:
>> On Mon, Feb 09, 2009 at 06:05:56PM -0500, Masami Hiramatsu wrote:
>>> Jim Keniston wrote:
>>>> On Fri, 2009-02-06 at 15:49 -0500, Masami Hiramatsu wrote:
>>>>> Hi Jim,
>>>>>
>>>>> I'm also interested in the instruction decoder.
>>>>> If you don't mind, could we share the API specification?
>>>>> I'd like to port djprobe on it.
>>>> I'm enclosing the little x86 instruction-analysis protoype I hacked
>>>> together (insn_x86.*), along with a copy of systemtap's
>>>> runtime/uprobes2/uprobes_x86.c, which I modified to use it.
>>> Hmm, actually, djprobe needs both of the length and the type of
>>> instructions, since it has to know how many bytes must be copied
>>> and be replaced by a long jump.
>>>
>>>> But again, we haven't really settled on an API.  For example, my x86
>>>> prototype doesn't collect all the info that kvm needs.  We're thinking
>>>> that adapting some existing code (like kvm in the x86 case) might be
>>>> more palatable to LKML.
>>> Sure, since kvm and emulators have to fetch the values of src/dst
>>> for the emulation, they need actual register values. On the other hand,
>>> the disasm/*probe have to analysis code before hitting, so they
>>> don't know the actual value of the registers.
>>>
>>> So, I think we should split x86_decode_insn() into 2 parts, static
>>> analysis and emulation preparation.
>>>
>>> For example:
>>> 1) analyzing code statically (x86_analyze_insn)
>>>    - just decoding an instruction
>>>    - this phase may consist of several sub-functions.
>>>
>>> 2) preparing emulation (x86_evaluate_insn)
>>>    - evaluating src/dst based on current(vcpu) registers
>>>
>>> 3) executing emulation (x86_emulate_insn)
>>>    - emulating an analyzed instruction
>> Right, that surely sounds like the way to go. However, we've been
>> cautioned that the instruction emulation area of the kvm code is very
>> performance sensitive. But, there is no harm in prototyping the above
>> and then worrying about any optimizations so there isn't a performance
>> issue -- in any case, I guess [ku]probes are very infrequent users of
>> this compared to KVM.
>>
>> Ananth
> 
> Hi, Masami.
> 
> Ananth, Srikar, Maneesh, and I talked about this last night.  While I
> was on vacation, Srikar did further investigation into adapting x86
> kvm's instuction analysis for more general use, and he's not optimistic.
> For the short term, at least (i.e., between now and the Linux Foundation
> Collaboration Summit in April), we're going to proceed based on the
> prototype I developed.
> 
> As you noted, djprobes needs instruction lengths, and my prototype
> doesn't provide that info.  (Uprobes computes instruction lengths for
> rip-relative x86_64 instructions, but that's only a subset of what you
> need.)  Are you interested in extending/enhancing my prototype to make
> it useful for djprobes?  If so, I'd be happy to consult.

Here are a patch against your code and an example code for
instruction length decoder.
Curiously, KVM's instruction decoder does not completely
cover all instructions(especially, Jcc/test...).
I had to refer Intel manuals.

Moreover, even with this patch, the decoder is incomplete.
- this doesn't cover 3bytes opcode yet.
- this doesn't decode sib, displacement and immediate.
- might have some bugs :-(


Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America) Inc.
Software Solutions Division

e-mail: mhira...@redhat.com

Index: insn_x86.h
===================================================================
--- insn_x86.h  (revision 1510)
+++ insn_x86.h  (working copy)
@@ -66,6 +66,10 @@
        struct insn_field displacement;
        struct insn_field immediate;
 
+       u8 op_bytes;
+       u8 ad_bytes;
+       u8 length;
+
        const u8 *kaddr;        /* kernel address of insn (copy) to analyze */
        const u8 *next_byte;
        bool x86_64;
@@ -75,6 +79,7 @@
 extern void insn_get_prefixes(struct insn *insn);
 extern void insn_get_opcode(struct insn *insn);
 extern void insn_get_modrm(struct insn *insn);
+extern void insn_get_length(struct insn *insn);
 
 #ifdef CONFIG_X86_64
 extern bool insn_rip_relative(struct insn *insn);
Index: insn_x86.c
===================================================================
--- insn_x86.c  (revision 1510)
+++ insn_x86.c  (working copy)
@@ -17,7 +17,7 @@
  *
  * Copyright (C) IBM Corporation, 2002, 2004, 2009
  */
-
+#include <linux/module.h>
 #include <linux/string.h>
 // #include <asm/insn.h>
 #include "insn_x86.h"
@@ -34,6 +34,11 @@
        insn->kaddr = kaddr;
        insn->next_byte = kaddr;
        insn->x86_64 = x86_64;
+       insn->op_bytes = 4;
+       if (x86_64)
+               insn->ad_bytes = 8;
+       else
+               insn->ad_bytes = 4;
 }
 EXPORT_SYMBOL_GPL(insn_init);
 
@@ -79,10 +84,51 @@
                        break;
                prefixes->value |= pfx;
        }
+       if (prefixes->value & X86_PFX_OPNDSZ) {
+               /* oprand size switches 2/4 */
+               insn->op_bytes ^= 6;
+       }
+       if (prefixes->value & X86_PFX_ADDRSZ) {
+               /* address size switches 2/4 or 4/8 */
+#ifdef CONFIG_X86_64
+               if (insn->x86_64)
+                       insn->op_bytes ^= 12;
+               else
+#endif
+                       insn->op_bytes ^= 6;
+       }
+#ifdef CONFIG_X86_64
+       if (prefixes->value & X86_PFX_REXW)
+               insn->op_bytes = 8;
+#endif
        prefixes->got = true;
 }
 EXPORT_SYMBOL_GPL(insn_get_prefixes);
 
+static bool __insn_is_stack(struct insn *insn)
+{
+       u8 reg;
+       if (insn->opcode.nbytes == 2)
+               return 0;
+
+       switch(insn->opcode1) {
+       case 0x68:
+       case 0x6a:
+       case 0x9c:
+       case 0x9d:
+       case 0xc5:
+       case 0xe8:
+               return 1;
+       }
+       reg = ((*insn->next_byte) >> 3) & 7;
+       if ((insn->opcode1 & 0xf0) == 0x50 ||
+           (insn->opcode1 == 0x1a && reg == 0) ||
+           (insn->opcode1 == 0xff && (reg & 1) == 0 && reg != 0)) {
+               return 1;
+       }
+       return 0;
+}
+
 /**
  * insn_get_opcode - collect opcode(s)
  * @insn:      &struct insn containing instruction
@@ -108,6 +154,8 @@
                opcode->nbytes = 1;
        opcode->value = insn->opcode1;
        opcode->got = true;
+       if (insn->x86_64 && __insn_is_stack(insn))
+               insn->op_bytes = 8;
 }
 EXPORT_SYMBOL_GPL(insn_get_opcode);
 
@@ -208,3 +256,115 @@
 }
 EXPORT_SYMBOL_GPL(insn_rip_relative);
 #endif
+
+/**
+ *
+ * insn_get_length() - Get the length of instruction
+ * @insn:      &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.
+ */
+void insn_get_length(struct insn *insn)
+{
+       u8 modrm;
+       u8 mod = 0, reg = 0, rm = 0, sib;
+       const u8 *next_byte;
+       if (insn->length)
+               return;
+       if (!insn->modrm.got)
+               insn_get_modrm(insn);
+       next_byte = insn->next_byte;
+
+       if (insn->modrm.nbytes) {
+               modrm = insn->modrm.value;
+               mod = (modrm & 0xc0) >> 6;
+               reg = (modrm & 0x38) >> 3;
+               rm = (modrm & 0x07);
+               if (mod == 3)
+                       goto decode_src;
+               if (insn->ad_bytes == 2) {
+                       if (mod == 1)
+                               next_byte++;
+                       else if (mod == 2)
+                               next_byte += 2;
+                       else if (rm == 6)
+                               next_byte += 2;
+               } else {
+                       if (rm == 4) {
+                               sib = *(next_byte++);
+                               insn->sib.value = sib;
+                               insn->sib.nbytes = 1;
+                               insn->sib.got = 1;
+                               if ((sib & 7) == 5 && mod == 0)
+                                       next_byte += 4;
+                       }
+                       if (mod == 1)
+                               next_byte++;
+                       else if (mod == 2)
+                               next_byte += 4;
+                       else if (rm == 5)
+                               next_byte += 4;
+               }
+       } else if (insn->opcode.nbytes == 1)
+               if (0xa0 <= insn->opcode1 && insn->opcode1 < 0xa4)
+                       next_byte += insn->ad_bytes;
+decode_src:
+       if (insn->opcode.nbytes == 1) {
+               switch (insn->opcode1) {
+               case 0x05:
+               case 0x25:
+               case 0x3d:
+               case 0x68: // pushl
+               case 0x69: // imul
+               case 0x9a: /* long call */
+               case 0xa9: // test
+               case 0xc7:
+               case 0xe8:
+               case 0xe9:
+               case 0xea: /* long jump */
+               case 0x82: /* Group */
+                       goto imm_common;
+               case 0x04:
+               case 0x24:
+               case 0x3c:
+               case 0x6a: //pushb
+               case 0x6b: //imul
+               case 0xa8: //testb
+               case 0xeb:
+               case 0xc0:
+               case 0xc1:
+               case 0xc6:
+               case 0x80: /* Group */
+               case 0x81: /* Group */
+               case 0x83: /* Group */
+                       goto immbyte_common;
+               }
+               if ((insn->opcode1 & 0xf8) == 0xb8 ||
+                   (insn->opcode1 == 0xf7 && reg == 0) ) {
+imm_common:
+                       next_byte += (insn->op_bytes == 8) ? 4 : insn->op_bytes;
+               } else if ((insn->opcode1 & 0xf8) == 0xb0 || // 
+                          (insn->opcode1 & 0xf0) == 0x70 || // Jcc
+                          (insn->opcode1 & 0xf8) == 0xe0 || // loop/in/out
+                           (insn->opcode1 == 0xf6 && reg == 0)) {
+immbyte_common:
+                       next_byte++;
+               }
+       } else {
+               switch (insn->opcode2) {
+               case 0xa4:
+               case 0xac:
+               case 0xba:
+               case 0x0f: // 3dnow
+               case 0x3a: // ssse3
+                       next_byte++;
+                       break;
+               default:
+                       if ((insn->opcode2 & 0xf0) == 0x80)
+                               next_byte += (insn->op_bytes == 8) ? 4 : 
insn->op_bytes;
+               }
+       }
+       insn->length = (u8)(next_byte - insn->kaddr);
+}
+EXPORT_SYMBOL_GPL(insn_get_length);
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
//#include <asm/insn_x86.h>
#include "insn_x86.h"

MODULE_LICENSE("GPL");

static unsigned long addr = 0;
module_param(addr, ulong, 0444);
static unsigned long num = 10;
module_param(num, ulong, 0444);

int __gen_init(void)
{
        int i;
        struct insn insn;
        if (addr) {
                printk("decode instruction start from %lx\n", addr);
                for (i = 0; i < num; i++) {
                        insn_init(&insn, (u8 *)addr, (sizeof(long) == 8));
                        insn_get_length(&insn);
                        printk("0x%lx: %d [op:", addr, insn.length);
                        printk("%02x", insn.opcode1);
                        if (insn.opcode.nbytes == 2)
                                printk(" %02x", insn.opcode2);
                        printk("]");
                        if (insn.modrm.nbytes)
                                printk("[modrm:%02x]", insn.modrm.value);
                        printk(" next_byte=%p\n", insn.next_byte);
                        addr += insn.length;
                }
                printk("decode instruction end\n");
        }
        return 0;
}

void __gen_exit(void)
{
        printk("test1 unloaded\n");
}

module_init(__gen_init);
module_exit(__gen_exit);

Reply via email to