Hello Sébastien,
On 02/13/2012 01:33 PM, Sébastien Volle wrote:
During my investigations, I turned out that using ctypes, PyPy 1.8 is
4x slower than CPython 2.6.5.
After looking at the PyPy buglist, it's seems there are couple open issues
about ctypes so I figured I would ask you guys first before filing a new bug.
I'm pretty new to ctypes and pypy so I'm not sure I understand what's going.
My program seems to spend a lot of time in ctypes/function.py:_convert_args
though, has the following profile trace demonstrates:
this is indeed a problem (or, better, a missing feature) in pypy's ctypes
implementation.
PyPy can make ctypes calls fast only in a set of "supported cases": in that
case, ctypes calls take a fast path which is actually very fast, while in all
the others it takes a slow path which is actually very slow :-/.
I looked at your code and I realized that there is one common case in which we
fail to take the fast path, and this happens when we pass a ctypes array to a
function which expects a pointer. This means that in your code all the calls
to c.memcmp are slow.
This is something that we should really fix. In the meantime, you can work
around the issue by manually casting the array to a c_void_p before calling
the function; e.g.::
xx = cast(offset_eth.dst, c_void_p)
yy = cast(eth_brd, c_void_p)
if c.memcmp(xx, yy, 6) != 0:
In addition, I should point out that both in pypy and cpython the code
executed inside functions is much faster than the code executed at module
level: so, I put most of the code in arp.py inside a function called main(),
which is then called and timed.
You can find my quickly hacked arp.py attached here. With my changes, it now
takes 0.13ms vs 440.5ms on CPython, and 0.77ms vs 1092.71ms on PyPy.
On this particular test CPython is still faster than PyPy, however it might
simply be that the JIT doesn't have enough time to warmup. Could you please
try it on a larger cap file so that it runs at least for e.g. 5 seconds?
ciao,
Anto
from ctypes import *
from ctypes.util import find_library
import array
import pcap
import time
import sys
c = CDLL(find_library("c"))
c.memcmp.argtypes = [c_void_p, c_void_p, c_size_t]
def sbo32(pInt):
a = array.array("L", (pInt,))
a.byteswap()
return a[0]
def sbo16(pInt):
a = array.array("H", (pInt,))
a.byteswap()
return a[0]
class ARP(LittleEndianStructure):
_fields_ = [("htype", c_uint16),
("ptype", c_uint16),
("hlen", c_uint8),
("plen", c_uint8),
("oper", c_uint16),
("sha", c_uint8 * 6),
("spa", c_uint8 * 4),
("tha", c_uint8 * 6),
("tpa", c_uint8 * 4)]
class Eth(LittleEndianStructure):
_fields_ = [("dst", c_uint8 * 6),
("src", c_uint8 * 6),
("type", c_uint16)]
eth_len = sizeof(Eth)
arp_len = sizeof(ARP)
eth_brd = (c_uint8 * 6)(0xFF)
eth_arp = sbo16(0x0806)
eth_arp_op_req = sbo16(0x01)
eth_arp_op_rep = sbo16(0x02)
offset_eth_p = None
offset_arp_p = None
header_p = None
def getoffsets(header, buf):
global header_p
global offset_eth_p
global offset_arp_p
header_p = header
print ("Packet buffer now at 0x%X" % int(buf))
offset_eth_p = cast(buf, POINTER(Eth))
offset_arp_p = cast(buf + eth_len, POINTER(ARP))
def printeth(header, eth):
print ("Got packet, length was %d type: 0x%X src: %s dst: %s " % (
header.contents.len, eth.contents.type,
eth.contents.src, eth.contents.dst))
# Get default capture device
dev = pcap.lookup_dev()
handle = None
net = None
filename = "demo.pcap"
if dev is None:
handle = pcap.open_offline(filename)
if not handle:
print >> sys.stderr, "Couldn't open capture device: %s" % pcap.get_error()
sys.exit(2)
else:
print "Will listen on %s" % dev
# Attempt to open the capture device
handle = pcap.open_live(dev, 65335, 1, 50)
net = pcap.lookup_net(dev)
if net is None:
print >> sys.stderr, "Couldn't get IP interface for device: %s" % pcap.get_error()
sys.exit(2)
# Build ARP Filter
pcap_filter = pcap.pcap_compile(handle, "arp", 0, net)
pcap.setfilter(handle, pcap_filter)
print "Capture started"
def main():
# Grab a packet
pb_last, header = pcap.getbuffer(handle)
pb_curr = pb_last
getoffsets(header, pb_curr)
count = 0
while True:
if pb_curr:
# libpcap writes to a different buffer position sometimes
# depending on how many packets are queued.
if pb_curr != pb_last:
pb_last = pb_curr
getoffsets(header, pb_curr)
count = count + 1
offset_eth = offset_eth_p.contents
offset_arp = offset_arp_p.contents
# Basic sanity check
if header_p.len < eth_len:
print ("Packet truncated, expected minimum of %d bytes, only got %d bytes" % (eth_len, header_p.len))
else:
# Typecast pointers into the offset_eth header
if eth_arp != offset_eth.type:
print("Incorrect ethertype, expected 0x%X, got 0x%X" %
(sbo16(eth_arp), offset_eth.type))
else:
# Request
if offset_arp.oper == eth_arp_op_req:
# ARP request was not broadcast (a little weird)
xx = cast(offset_eth.dst, c_void_p)
yy = cast(eth_brd, c_void_p)
if c.memcmp(xx, yy, 6) != 0:
pass
#print "DIRECTED REQUEST"
# Ethernet SRC and hardware SRC address must always be identical
#if offset_eth.contents.src != offset_arp.contents.sha:
xx = cast(offset_eth.src, c_void_p)
yy = cast(offset_arp.sha, c_void_p)
if c.memcmp(xx, yy, 6) != 0:
print "SOURCE MISMATCH REQUEST"
else:
#print "ok"
pass
# Reply
elif offset_arp.oper == eth_arp_op_rep:
# I'm here I'm here! (Mostly VRRP/CARP maybe HSRP?)
if offset_eth.dst != eth_brd:
#print "GRATUITOUS REPLY"
pass
# Ethernet SRC and hardware SRC address must always be identical
#if offset_eth.contents.src != offset_arp.contents.sha:
xx = cast(offset_eth.src, c_void_p)
yy = cast(offset_arp.sha, c_void_p)
if c.memcmp(xx, yy, 6) != 0:
print "SOURCE MISMATCH REPLY"
else:
#print "ok"
pass
else:
print "INVALID ARP OPERATOR"
else:
break
pb_curr = pcap.next(handle, header)
return count
x = time.time()
count = main()
fin = time.time()
print("elapsed time : %.2fms" % ((fin - x) * 1000))
print("Total packets : %d" % count)
print("packets/s : %0.2f\n" % (count / ((fin - x))))
pcap.close(handle)
_______________________________________________
pypy-dev mailing list
pypy-dev@python.org
http://mail.python.org/mailman/listinfo/pypy-dev