Hello Sébastien,

On 02/13/2012 01:33 PM, Sébastien Volle wrote:

During my investigations, I turned out that using ctypes, PyPy 1.8 is
4x slower than CPython 2.6.5.
After looking at the PyPy buglist, it's seems there are couple open issues
about ctypes so I figured I would ask you guys first before filing a new bug.

I'm pretty new to ctypes and pypy so I'm not sure I understand what's going.
My program seems to spend a lot of time in ctypes/function.py:_convert_args
though, has the following profile trace demonstrates:

this is indeed a problem (or, better, a missing feature) in pypy's ctypes implementation.

PyPy can make ctypes calls fast only in a set of "supported cases": in that case, ctypes calls take a fast path which is actually very fast, while in all the others it takes a slow path which is actually very slow :-/.

I looked at your code and I realized that there is one common case in which we fail to take the fast path, and this happens when we pass a ctypes array to a function which expects a pointer. This means that in your code all the calls to c.memcmp are slow.

This is something that we should really fix. In the meantime, you can work around the issue by manually casting the array to a c_void_p before calling the function; e.g.::

                        xx = cast(offset_eth.dst, c_void_p)
                        yy = cast(eth_brd, c_void_p)
                        if c.memcmp(xx, yy, 6) != 0:

In addition, I should point out that both in pypy and cpython the code executed inside functions is much faster than the code executed at module level: so, I put most of the code in arp.py inside a function called main(), which is then called and timed.

You can find my quickly hacked arp.py attached here. With my changes, it now takes 0.13ms vs 440.5ms on CPython, and 0.77ms vs 1092.71ms on PyPy.

On this particular test CPython is still faster than PyPy, however it might simply be that the JIT doesn't have enough time to warmup. Could you please try it on a larger cap file so that it runs at least for e.g. 5 seconds?

ciao,
Anto
from ctypes import *
from ctypes.util import find_library
import array
import pcap
import time
import sys

c = CDLL(find_library("c"))
c.memcmp.argtypes = [c_void_p, c_void_p, c_size_t]

def sbo32(pInt):
    a = array.array("L", (pInt,))
    a.byteswap()
    return a[0]

def sbo16(pInt):
    a = array.array("H", (pInt,))
    a.byteswap()
    return a[0]

class ARP(LittleEndianStructure):
    _fields_ = [("htype",   c_uint16),
                ("ptype",   c_uint16),
                ("hlen",    c_uint8),
                ("plen",    c_uint8),
                ("oper",    c_uint16),
                ("sha",     c_uint8 * 6),
                ("spa",     c_uint8 * 4),
                ("tha",     c_uint8 * 6),
                ("tpa",     c_uint8 * 4)]

class Eth(LittleEndianStructure):
    _fields_ = [("dst",     c_uint8 * 6),
                ("src",     c_uint8 * 6),
                ("type",    c_uint16)]
   
   
eth_len = sizeof(Eth)
arp_len = sizeof(ARP)
eth_brd = (c_uint8 * 6)(0xFF)
eth_arp = sbo16(0x0806)
eth_arp_op_req = sbo16(0x01)
eth_arp_op_rep = sbo16(0x02)

offset_eth_p = None
offset_arp_p = None
header_p = None

def getoffsets(header, buf):
    global header_p
    global offset_eth_p
    global offset_arp_p
    header_p = header
    print ("Packet buffer now at 0x%X" % int(buf))
    offset_eth_p = cast(buf, POINTER(Eth))
    offset_arp_p = cast(buf + eth_len, POINTER(ARP))
    
def printeth(header, eth):
    print ("Got packet, length was %d type: 0x%X src: %s dst: %s " % (
        header.contents.len, eth.contents.type,
        eth.contents.src, eth.contents.dst))

# Get default capture device
dev = pcap.lookup_dev()
handle = None
net = None
filename = "demo.pcap"

if dev is None:
    handle = pcap.open_offline(filename)
    if not handle:
        print >> sys.stderr, "Couldn't open capture device: %s" % pcap.get_error()
        sys.exit(2)
else:
    print "Will listen on %s" % dev
    
    # Attempt to open the capture device
    handle = pcap.open_live(dev, 65335, 1, 50)

net = pcap.lookup_net(dev)
if net is None:
    print >> sys.stderr, "Couldn't get IP interface for device: %s" % pcap.get_error()
    sys.exit(2)
    
# Build ARP Filter
pcap_filter = pcap.pcap_compile(handle, "arp", 0, net)
pcap.setfilter(handle, pcap_filter)


print "Capture started"

def main():
    # Grab a packet
    pb_last, header = pcap.getbuffer(handle)
    pb_curr = pb_last
    getoffsets(header, pb_curr)
    count = 0

    while True:
        if pb_curr:
            # libpcap writes to a different buffer position sometimes
            # depending on how many packets are queued.
            if pb_curr != pb_last:
                pb_last = pb_curr
                getoffsets(header, pb_curr)

            count = count + 1
            offset_eth = offset_eth_p.contents
            offset_arp = offset_arp_p.contents

            # Basic sanity check
            if header_p.len < eth_len:
                print ("Packet truncated, expected minimum of %d bytes, only got %d bytes" % (eth_len, header_p.len))
            else:
                # Typecast pointers into the offset_eth header
                if eth_arp != offset_eth.type:
                    print("Incorrect ethertype, expected 0x%X, got 0x%X" % 
                        (sbo16(eth_arp), offset_eth.type))
                else:
                    # Request
                    if offset_arp.oper == eth_arp_op_req:
                        # ARP request was not broadcast (a little weird)
                        xx = cast(offset_eth.dst, c_void_p)
                        yy = cast(eth_brd, c_void_p)
                        if c.memcmp(xx, yy, 6) != 0:
                            pass
                            #print "DIRECTED REQUEST"

                        # Ethernet SRC and hardware SRC address must always be identical
                        #if offset_eth.contents.src != offset_arp.contents.sha:
                        xx = cast(offset_eth.src, c_void_p)
                        yy = cast(offset_arp.sha, c_void_p)
                        if c.memcmp(xx, yy, 6) != 0:
                            print "SOURCE MISMATCH REQUEST"
                        else:
                            #print "ok"
                            pass
                    # Reply
                    elif offset_arp.oper == eth_arp_op_rep:

                        # I'm here I'm here! (Mostly VRRP/CARP maybe HSRP?)
                        if offset_eth.dst != eth_brd:
                            #print "GRATUITOUS REPLY"
                            pass

                        # Ethernet SRC and hardware SRC address must always be identical
                        #if offset_eth.contents.src != offset_arp.contents.sha:
                        xx = cast(offset_eth.src, c_void_p)
                        yy = cast(offset_arp.sha, c_void_p)
                        if c.memcmp(xx, yy, 6) != 0:
                            print "SOURCE MISMATCH REPLY"
                        else:
                            #print "ok"
                            pass
                    else:
                        print "INVALID ARP OPERATOR"
        else:
            break
        pb_curr = pcap.next(handle, header)
        return count

x = time.time()
count = main()
fin = time.time()

print("elapsed time  : %.2fms" % ((fin - x) * 1000))
print("Total packets : %d" % count)
print("packets/s     : %0.2f\n" % (count / ((fin - x))))
pcap.close(handle)
_______________________________________________
pypy-dev mailing list
pypy-dev@python.org
http://mail.python.org/mailman/listinfo/pypy-dev

Reply via email to