The uncommitted patches I have are attached, and my command line is
below if you want to try it yourself.
build/X86_FS/m5.opt configs/example/fs.py --timing
--kernel=/dist/m5/system/binaries/x86_64-vmlinux-2.6.22.9
Gabe Black wrote:
> I just did. Surprisingly when I upgraded gcc to 4.3.something I forgot
> to source /env/profile so I was still using 4.1. When I really upgraded
> to 4.3, it still tries to execute the vtable, but since there's a
> different address it gets an undefined opcode exception instead what I'm
> assuming is a page fault. Also, I don't think that part is marked no
> execute. I think the first two bytes of the address coincidentally
> decodes to an instruction that does something, and then the bytes after
> that decode to something that causes a page fault. That would be why the
> page fault happens with the rip (PC) a few bytes into the address.
> (looks at the manual) The address is 0x4e1f74, and 74 is jz with a byte
> offset. zf wasn't set, so that'd be a noop. 0x4e is an REX prefix, and
> the zeros afterwards I think become add (%r8), %rax. Since %r8 is zero,
> that's dereferencing zero -> page fault -> seg fault.
>
> Gabe
>
> nathan binkert wrote:
>
>> Have you tried a newer version of gcc to see what the code looks like?
>>
>> Nate
>>
>> On Mon, Aug 24, 2009 at 10:06 PM, Gabe Black<[email protected]> wrote:
>>
>>
>>> This appears to be a gcc bug. I will now explain why. If you don't care,
>>> stop reading. If you do care and you see some place where I'm wrong,
>>> please, please let me know.
>>>
>>>
>>>
>>> The interesting part of the function in question disassembles to the
>>> following:
>>>
>>> 0x0000000000d85fc3 <_ZN16SimpleTimingPort10recvTimingEP6Packet+155>:
>>> mov 0x55ab4e(%rip),%rax # 0x12e0b18 <curTick>
>>> 0x0000000000d85fca <_ZN16SimpleTimingPort10recvTimingEP6Packet+162>:
>>> mov %rax,%rdx
>>> 0x0000000000d85fcd <_ZN16SimpleTimingPort10recvTimingEP6Packet+165>:
>>> add -0x8(%rbp),%rdx
>>> 0x0000000000d85fd1 <_ZN16SimpleTimingPort10recvTimingEP6Packet+169>:
>>> mov -0x20(%rbp),%rsi
>>> 0x0000000000d85fd5 <_ZN16SimpleTimingPort10recvTimingEP6Packet+173>:
>>> mov -0x18(%rbp),%rdi
>>> 0x0000000000d85fd9 <_ZN16SimpleTimingPort10recvTimingEP6Packet+177>:
>>> callq 0xd85d68 <_ZN16SimpleTimingPort15schedSendTimingEP6Packetl>
>>> 0x0000000000d85fde <_ZN16SimpleTimingPort10recvTimingEP6Packet+182>:
>>> jmp 0xd85ffb <_ZN16SimpleTimingPort10recvTimingEP6Packet+211>
>>> 0x0000000000d85fe0 <_ZN16SimpleTimingPort10recvTimingEP6Packet+184>:
>>> cmpq $0x0,-0x20(%rbp)
>>> 0x0000000000d85fe5 <_ZN16SimpleTimingPort10recvTimingEP6Packet+189>:
>>> je 0xd85ffb <_ZN16SimpleTimingPort10recvTimingEP6Packet+211>
>>> 0x0000000000d85fe7 <_ZN16SimpleTimingPort10recvTimingEP6Packet+191>:
>>> mov -0x20(%rbp),%rax
>>> 0x0000000000d85feb <_ZN16SimpleTimingPort10recvTimingEP6Packet+195>:
>>> mov (%rax),%rax
>>> 0x0000000000d85fee <_ZN16SimpleTimingPort10recvTimingEP6Packet+198>:
>>> add $0x8,%rax
>>> 0x0000000000d85ff2 <_ZN16SimpleTimingPort10recvTimingEP6Packet+202>:
>>> mov (%rax),%rax
>>> 0x0000000000d85ff5 <_ZN16SimpleTimingPort10recvTimingEP6Packet+205>:
>>> mov -0x20(%rbp),%rdi
>>> 0x0000000000d85ff9 <_ZN16SimpleTimingPort10recvTimingEP6Packet+209>:
>>> callq *%rax
>>> 0x0000000000d85ffb <_ZN16SimpleTimingPort10recvTimingEP6Packet+211>:
>>> movl $0x1,-0x24(%rbp)
>>> 0x0000000000d86002 <_ZN16SimpleTimingPort10recvTimingEP6Packet+218>:
>>> mov -0x24(%rbp),%eax
>>> 0x0000000000d86005 <_ZN16SimpleTimingPort10recvTimingEP6Packet+221>:
>>> leaveq
>>> 0x0000000000d86006 <_ZN16SimpleTimingPort10recvTimingEP6Packet+222>: retq
>>>
>>> The part where it has a heart attack is at +209 where it tries to call
>>> through the value in memory pointed to by %rax. If you look above that a
>>> few instructions at +191, you'll see where it gets a value off of the
>>> stack using %rbp, the frame pointer, and puts that into %rax. That value
>>> is the pointer pkt.
>>>
>>> (gdb) p pkt
>>> $7 = (PacketPtr) 0x1bd6f40
>>> (gdb) p/x *(uint64_t)($rbp - 0x20)
>>> $10 = 0x1bd6f40
>>>
>>> Because pkts are reference counting pointers, %rax actually points to a
>>> structure that contains the pointer to the real packet. The instruction
>>> at +202 removes that level of indirection. Next, the line at +198 adds 8
>>> to %rax, making it point to the vtable corresponding to the Printable
>>> base class. You can see that here after all the static members.
>>>
>>> (gdb) p *pkt
>>> $11 = {<FastAlloc> = {_vptr.FastAlloc = 0x1bd7060, static Max_Alloc_Size
>>> = 512, static Log2_Alloc_Quantum = 3, static Alloc_Quantum = 8, static
>>> Num_Buckets = 65, static Num_Structs_Per_New = <optimized out>, static
>>> freeLists = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2912c50, 0x0,
>>> 0x1bcf358, 0x2b7e8f0, 0x1bd61a0,
>>> 0x1bd6f40, 0x0 <repeats 52 times>}}, <Printable> =
>>> {_vptr.Printable = 0xdd7d70}, static PUBLIC_FLAGS = <optimized out>,
>>> static PRIVATE_FLAGS = <optimized out>, static COPY_FLAGS = 15, static
>>> SHARED = 1, static EXPRESS_SNOOP = 2, static SUPPLY_EXCLUSIVE = 4,
>>> static MEM_INHIBIT = 8, static VALID_ADDR = 256,
>>> static VALID_SIZE = 512, static VALID_SRC = 1024, static VALID_DST =
>>> 2048, static STATIC_DATA = 4096, static DYNAMIC_DATA = 8192, static
>>> ARRAY_DATA = 16384, flags = {_flags = 3840}, cmd = {static commandInfo =
>>> 0x12e6080, cmd = MemCmd::MessageResp}, req = 0x2b7e8f0, data = 0x0, addr
>>> = 11529215046068469760,
>>> size = 4, src = 0, dest = 8, origCmd = {static commandInfo =
>>> 0x12e6080, cmd = MemCmd::MessageReq}, time = 231966339456, finishTime =
>>> 231966444000, firstWordTime = 231966445000, static Broadcast = -1,
>>> senderState = 0x0}
>>>
>>> To make sure it's pointed at the right thing,
>>>
>>> (gdb) p/x *(uint64_t *)((uint8_t *)pkt + 8)
>>> $13 = 0xdd7d70
>>>
>>> Next, we can see that %rax is again dereferenced at +202. This is
>>> extracting the pointer to the virtual destructor of Printable from its
>>> vtable.
>>>
>>> (gdb) x/gx *(uint64_t *)((uint8_t *)pkt + 8)
>>> 0xdd7d70 <_ZTV9Printable+16>: 0x00000000004e1f74
>>>
>>> (gdb) disassemble 0x00000000004e1f74
>>> Dump of assembler code for function ~Printable:
>>> 0x00000000004e1f74 <~Printable+0>: push %rbp
>>> 0x00000000004e1f75 <~Printable+1>: mov %rsp,%rbp
>>> 0x00000000004e1f78 <~Printable+4>: sub $0x10,%rsp
>>> 0x00000000004e1f7c <~Printable+8>: mov %rdi,-0x8(%rbp)
>>> 0x00000000004e1f80 <~Printable+12>: mov $0xdd7d70,%edx
>>> 0x00000000004e1f85 <~Printable+17>: mov -0x8(%rbp),%rax
>>> 0x00000000004e1f89 <~Printable+21>: mov %rdx,(%rax)
>>> 0x00000000004e1f8c <~Printable+24>: mov $0x0,%eax
>>> 0x00000000004e1f91 <~Printable+29>: test %al,%al
>>> 0x00000000004e1f93 <~Printable+31>: je 0x4e1f9e <~Printable+42>
>>> 0x00000000004e1f95 <~Printable+33>: mov -0x8(%rbp),%rdi
>>> 0x00000000004e1f99 <~Printable+37>: callq 0x409340 <_zd...@plt>
>>> 0x00000000004e1f9e <~Printable+42>: leaveq
>>> 0x00000000004e1f9f <~Printable+43>: retq
>>> End of assembler dump.
>>>
>>> Now %rax holds the value 0xdd7d70, the pointer to the Printable vtable
>>> plus offset 0 which holds the pointer to the desctructor.
>>>
>>> (gdb) info registers
>>> rax 0xdd7d70 14515568
>>> rbx 0x1731f10 24321808
>>> rcx 0x2d43c20 47463456
>>> rdx 0xc 12
>>> rsi 0x60 96
>>> rdi 0x1bd6f40 29192000
>>> rbp 0x7fff2cbc0fd0 0x7fff2cbc0fd0
>>> rsp 0x7fff2cbc0fa0 0x7fff2cbc0fa0
>>> r8 0x0 0
>>> r9 0x0 0
>>> r10 0x1bc7f30 29130544
>>> r11 0x7fff2cbc0cf0 140733943909616
>>> r12 0x7f5824b4ecb0 140016549686448
>>> r13 0x1bd3f80 29179776
>>> r14 0x1731f10 24321808
>>> r15 0x7f58243844a0 140016541516960
>>> rip 0xd85ffb 0xd85ffb <SimpleTimingPort::recvTiming(Packet*)+211>
>>> eflags 0x10202 [ IF RF ]
>>> cs 0x33 51
>>> ss 0x2b 43
>>> ds 0x0 0
>>> es 0x0 0
>>> fs 0x0 0
>>> gs 0x0 0
>>> fctrl 0x37f 895
>>> fstat 0x0 0
>>> ftag 0xffff 65535
>>> fiseg 0x0 0
>>> fioff 0x0 0
>>> foseg 0x0 0
>>> fooff 0x0 0
>>> fop 0x0 0
>>> mxcsr 0x1fa0 [ PE IM DM ZM OM UM PM ]
>>>
>>> The pkt pointer is then put into %rdi, I believe to act as the "this"
>>> pointer, and the value pointed to by %rax is called.
>>>
>>> Almost all of this is correct so far, but this is the point where things
>>> break.
>>>
>>> If we look at the encoding for the call instruction, we get the following:
>>>
>>> (gdb) x/3b (_ZN16SimpleTimingPort10recvTimingEP6Packet+209)
>>> 0xd85ff9 <_ZN16SimpleTimingPort10recvTimingEP6Packet+209>: 0xff
>>> 0xd0 0xc7
>>>
>>> Looking in table A-2 of AMD manual 3, we see that 0xff is the one byte
>>> opcode that encodes a group 5 instruction. We now need to look at the
>>> following modrm byte, 0xd0. That byte breaks down as mod=3, reg=2, and
>>> r/m=0. Looking at table A-6, we see that a reg field of 2 encodes a CALL
>>> instruction with an Ev argument. Looking in the operand syntax notation
>>> key at the top of A.1, E is for a general purpose register or memory
>>> operand specified by the ModRM byte. Looking at table A-15, we can see
>>> that with a mod field of 3, the operand is always a register value, not
>>> a the location pointed to by the register value.
>>>
>>> What that ultimately seems to mean is that gcc is using a mod value of 3
>>> instead of, for instance, 0, and is inadvertently trying to execute the
>>> vtable of Printable instead of the function it points to. That piece of
>>> memory is apparently marked no execute, so the program fortunately dies
>>> instead of going bananas. gdb is also apparently in on it too, and
>>> disassembles the call instruction to look like it's dereferencing %rax
>>> when it isn't.
>>>
>>> I would very much appreciate it if someone would explain to me why I'm
>>> wrong since it would be much easier to fix M5 than gcc. Failing that,
>>> hopefully somebody can get a hold of someone that can actually do
>>> something about this.
>>>
>>> Gabe
>>> _______________________________________________
>>> m5-dev mailing list
>>> [email protected]
>>> http://m5sim.org/mailman/listinfo/m5-dev
>>>
>>>
>>>
>>>
>> _______________________________________________
>> m5-dev mailing list
>> [email protected]
>> http://m5sim.org/mailman/listinfo/m5-dev
>>
>>
>
> _______________________________________________
> m5-dev mailing list
> [email protected]
> http://m5sim.org/mailman/listinfo/m5-dev
>
X86: Add a latency that describes how long an interrupt takes to propagate
through the IO APIC.
diff --git a/src/dev/x86/I82094AA.py b/src/dev/x86/I82094AA.py
--- a/src/dev/x86/I82094AA.py
+++ b/src/dev/x86/I82094AA.py
@@ -38,6 +38,8 @@
pio_latency = Param.Latency('1ns', "Programmed IO latency in simticks")
pio_addr = Param.Addr("Device address")
int_port = Port("Port for sending and receiving interrupt messages")
+ int_latency = Param.Latency('1ns', \
+ "Latency for an interrupt to propagate through this device.")
external_int_pic = Param.I8259(NULL, "External PIC, if any")
def pin(self, line):
diff --git a/src/dev/x86/i82094aa.cc b/src/dev/x86/i82094aa.cc
--- a/src/dev/x86/i82094aa.cc
+++ b/src/dev/x86/i82094aa.cc
@@ -36,7 +36,8 @@
#include "mem/packet_access.hh"
#include "sim/system.hh"
-X86ISA::I82094AA::I82094AA(Params *p) : PioDevice(p), IntDev(this),
+X86ISA::I82094AA::I82094AA(Params *p) : PioDevice(p),
+ IntDev(this, p->int_latency),
latency(p->pio_latency), pioAddr(p->pio_addr),
extIntPic(p->external_int_pic), lowestPriorityOffset(0)
{
diff -r e0c1c6d87649 configs/common/FSConfig.py
--- a/configs/common/FSConfig.py Sun Aug 23 14:19:14 2009 -0700
+++ b/configs/common/FSConfig.py Sun Aug 23 21:18:53 2009 -0700
@@ -174,6 +174,8 @@
mdesc.diskname = 'x86root.img'
self.readfile = mdesc.script()
+ self.mem_mode = mem_mode
+
# Physical memory
self.membus = MemBus(bus_id=1)
self.physmem = PhysicalMemory(range = AddrRange(mdesc.mem()))
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev