On Saturday, 13 August 2011 18:32:58 Antonio Cuni wrote: > On 12/08/11 17:49, David Naylor wrote: > > Would it not be a simple matter of changing the __(get|set)state method > > to use a tuple or even an int(long)? > > yes, I think it should be enough. I'm going on vacation soon and I won't > have a look at it right now, so if anybody wants to work on it, he's very > welcome (hint, hint :-)).
See attached for my naive attempt (and I did not run any unit tests on the code). It provides between 4.5x to 13.4x improvement in hash speed. If method 1 is acceptable I could properly implement it. If you look at the __hash__ method for datetime you will notice three return statements. The performance of those statements are as follows, based on: @bench.bench def hashdate(): res = 0 for i in range(10000000): now = datetime.datetime(i // 10000 + 1, (i % 10000) % 12 + 1, (i % 100) % 28 + 1) res ^= hash(now) return res hashdate() Method 1 (direct integer compute): hashdate: 0.70 seconds Method 2 (hash of __getstate()): hashdate: 2.39 seconds Method 3 (unity): hashdate: 0.68 seconds Method 4 (original): hashdate: 10.93 seconds (python: 12.60 seconds) And back to my original "benchmark" with the change of `key = i`: # python iforkey.py ifdict: [2.8676719665527344, 2.872897148132324, 2.8396730422973633] keydict: [2.3266799449920654, 2.3431849479675293, 2.3421859741210938] defaultdict: [3.706634044647217, 3.6940698623657227, 3.7520179748535156] # pypy iforkey.py (original) ifdict: [29.201794147491455, 29.047310829162598, 29.34461998939514] keydict: [14.939809083938599, 15.250468015670776, 15.542209148406982] defaultdict: [15.11891484260559, 15.064191102981567, 14.94817304611206] # pypy iforkey (method 1) ifdict: [7.455403804779053, 7.376722097396851, 7.447360038757324] keydict: [3.9056499004364014, 3.833178997039795, 3.8482401371002197] defaultdict: [3.9568910598754883, 3.8757669925689697, 3.88435697555542] # pypy iforkey.py (method 2) ifdict: [11.993246078491211, 11.865861892700195, 11.916783094406128] keydict: [6.141685962677002, 6.092236042022705, 6.082683086395264] defaultdict: [6.376708030700684, 6.337490081787109, 6.361854791641235] So, it appears pypy is failing to speed up this contrived example...
--- datetime.py 2011-08-11 20:39:53.000000000 +0200 +++ /home/DragonSA/datetime.py 2011-08-13 19:48:49.000000000 +0200 @@ -13,7 +13,7 @@ Sources for time zone and DST data: http://www.twinsun.com/tz/tz-link.htm This was originally copied from the sandbox of the CPython CVS repository. -Thanks to Tim Peters for suggesting using it. +Thanks to Tim Peters for suggesting using it. """ import time as _time @@ -742,11 +742,6 @@ year, month, day (required, base 1) """ - if isinstance(year, str): - # Pickle support - self = object.__new__(cls) - self.__setstate(year) - return self _check_date_fields(year, month, day) self = object.__new__(cls) self.__year = year @@ -986,14 +981,7 @@ __safe_for_unpickling__ = True # For Python 2.2 def __getstate(self): - yhi, ylo = divmod(self.__year, 256) - return ("%c%c%c%c" % (yhi, ylo, self.__month, self.__day), ) - - def __setstate(self, string): - if len(string) != 4 or not (1 <= ord(string[2]) <= 12): - raise TypeError("not enough arguments") - yhi, ylo, self.__month, self.__day = map(ord, string) - self.__year = yhi * 256 + ylo + return (self.__year, self.__month, self.__day) def __reduce__(self): return (self.__class__, self.__getstate()) @@ -1112,10 +1100,6 @@ tzinfo (default to None) """ self = object.__new__(cls) - if isinstance(hour, str): - # Pickle support - self.__setstate(hour, minute or None) - return self _check_tzinfo_arg(tzinfo) _check_time_fields(hour, minute, second, microsecond) self.__hour = hour @@ -1201,13 +1185,7 @@ def __hash__(self): """Hash.""" - tzoff = self._utcoffset() - if not tzoff: # zero or None - return hash(self.__getstate()[0]) - h, m = divmod(self.hour * 60 + self.minute - tzoff, 60) - if 0 <= h < 24: - return hash(time(h, m, self.second, self.microsecond)) - return hash((h, m, self.second, self.microsecond)) + return hash(self.__getstate()) # Conversion to string @@ -1351,22 +1329,7 @@ __safe_for_unpickling__ = True # For Python 2.2 def __getstate(self): - us2, us3 = divmod(self.__microsecond, 256) - us1, us2 = divmod(us2, 256) - basestate = ("%c" * 6) % (self.__hour, self.__minute, self.__second, - us1, us2, us3) - if self._tzinfo is None: - return (basestate,) - else: - return (basestate, self._tzinfo) - - def __setstate(self, string, tzinfo): - if len(string) != 6 or ord(string[0]) >= 24: - raise TypeError("an integer is required") - self.__hour, self.__minute, self.__second, us1, us2, us3 = \ - map(ord, string) - self.__microsecond = (((us1 << 8) | us2) << 8) | us3 - self._tzinfo = tzinfo + return (self.__hour, self.__minute, self.__second, self.__microsecond, self._tzinfo) def __reduce__(self): return (time, self.__getstate()) @@ -1384,11 +1347,6 @@ def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0, microsecond=0, tzinfo=None): - if isinstance(year, str): - # Pickle support - self = date.__new__(cls, year[:4]) - self.__setstate(year, month) - return self _check_tzinfo_arg(tzinfo) _check_time_fields(hour, minute, second, microsecond) self = date.__new__(cls, year, month, day) @@ -1602,7 +1560,7 @@ if L[-1] == 0: del L[-1] if L[-1] == 0: - del L[-1] + del L[-1] s = ", ".join(map(str, L)) s = "%s(%s)" % ('datetime.' + self.__class__.__name__, s) if self._tzinfo is not None: @@ -1796,35 +1754,17 @@ return base + timedelta(minutes = otoff-myoff) def __hash__(self): - tzoff = self._utcoffset() - if tzoff is None: - return hash(self.__getstate()[0]) - days = _ymd2ord(self.year, self.month, self.day) - seconds = self.hour * 3600 + (self.minute - tzoff) * 60 + self.second - return hash(timedelta(days, seconds, self.microsecond)) + #return ((self.__year * 10000 + self.__month + 100 + self.__day) + (self.__hour * 10000 + self.__minute * 100 + self.__second)) * 1000000 + self.__microsecond + return hash(self.__getstate()) + #return 0 # Pickle support. __safe_for_unpickling__ = True # For Python 2.2 def __getstate(self): - yhi, ylo = divmod(self.__year, 256) - us2, us3 = divmod(self.__microsecond, 256) - us1, us2 = divmod(us2, 256) - basestate = ("%c" * 10) % (yhi, ylo, self.__month, self.__day, - self.__hour, self.__minute, self.__second, - us1, us2, us3) - if self._tzinfo is None: - return (basestate,) - else: - return (basestate, self._tzinfo) - - def __setstate(self, string, tzinfo): - (yhi, ylo, self.__month, self.__day, self.__hour, - self.__minute, self.__second, us1, us2, us3) = map(ord, string) - self.__year = yhi * 256 + ylo - self.__microsecond = (((us1 << 8) | us2) << 8) | us3 - self._tzinfo = tzinfo + return (self.__year, self.__month, self.__day, self.__hour, + self.__minute, self.__second, self.__microsecond, self._tzinfo) def __reduce__(self): return (self.__class__, self.__getstate())
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ pypy-dev mailing list pypy-dev@python.org http://mail.python.org/mailman/listinfo/pypy-dev