On Saturday, 13 August 2011 18:32:58 Antonio Cuni wrote:
> On 12/08/11 17:49, David Naylor wrote:
> > Would it not be a simple matter of changing the __(get|set)state method
> > to use a tuple or even an int(long)?
> 
> yes, I think it should be enough. I'm going on vacation soon and I won't
> have a look at it right now, so if anybody wants to work on it, he's very
> welcome (hint, hint :-)).

See attached for my naive attempt (and I did not run any unit tests on the 
code).  It provides between 4.5x to 13.4x improvement in hash speed.  If 
method 1 is acceptable I could properly implement it.  

If you look at the __hash__ method for datetime you will notice three return 
statements.  The performance of those statements are as follows, 
based on:

@bench.bench
def hashdate():
     res = 0
     for i in range(10000000):
         now = datetime.datetime(i // 10000 + 1, (i % 10000) % 12 + 1, (i % 
100) % 28 + 1)
         res ^= hash(now)
     return res

hashdate()

Method 1 (direct integer compute):
hashdate: 0.70 seconds

Method 2 (hash of __getstate()):
hashdate: 2.39 seconds

Method 3 (unity):
hashdate: 0.68 seconds

Method 4 (original):
hashdate: 10.93 seconds (python: 12.60 seconds)

And back to my original "benchmark" with the change of `key = i`:

# python iforkey.py
ifdict: [2.8676719665527344, 2.872897148132324, 2.8396730422973633]
keydict: [2.3266799449920654, 2.3431849479675293, 2.3421859741210938]
defaultdict: [3.706634044647217, 3.6940698623657227, 3.7520179748535156]

# pypy iforkey.py (original)
ifdict: [29.201794147491455, 29.047310829162598, 29.34461998939514]
keydict: [14.939809083938599, 15.250468015670776, 15.542209148406982]
defaultdict: [15.11891484260559, 15.064191102981567, 14.94817304611206]

# pypy iforkey (method 1)
ifdict: [7.455403804779053, 7.376722097396851, 7.447360038757324]
keydict: [3.9056499004364014, 3.833178997039795, 3.8482401371002197]
defaultdict: [3.9568910598754883, 3.8757669925689697, 3.88435697555542]

# pypy iforkey.py (method 2)
ifdict: [11.993246078491211, 11.865861892700195, 11.916783094406128]
keydict: [6.141685962677002, 6.092236042022705, 6.082683086395264]
defaultdict: [6.376708030700684, 6.337490081787109, 6.361854791641235]

So, it appears pypy is failing to speed up this contrived example...
--- datetime.py	2011-08-11 20:39:53.000000000 +0200
+++ /home/DragonSA/datetime.py	2011-08-13 19:48:49.000000000 +0200
@@ -13,7 +13,7 @@
 Sources for time zone and DST data: http://www.twinsun.com/tz/tz-link.htm
 
 This was originally copied from the sandbox of the CPython CVS repository.
-Thanks to Tim Peters for suggesting using it. 
+Thanks to Tim Peters for suggesting using it.
 """
 
 import time as _time
@@ -742,11 +742,6 @@
 
         year, month, day (required, base 1)
         """
-        if isinstance(year, str):
-            # Pickle support
-            self = object.__new__(cls)
-            self.__setstate(year)
-            return self
         _check_date_fields(year, month, day)
         self = object.__new__(cls)
         self.__year = year
@@ -986,14 +981,7 @@
     __safe_for_unpickling__ = True      # For Python 2.2
 
     def __getstate(self):
-        yhi, ylo = divmod(self.__year, 256)
-        return ("%c%c%c%c" % (yhi, ylo, self.__month, self.__day), )
-
-    def __setstate(self, string):
-        if len(string) != 4 or not (1 <= ord(string[2]) <= 12):
-            raise TypeError("not enough arguments")
-        yhi, ylo, self.__month, self.__day = map(ord, string)
-        self.__year = yhi * 256 + ylo
+        return (self.__year, self.__month, self.__day)
 
     def __reduce__(self):
         return (self.__class__, self.__getstate())
@@ -1112,10 +1100,6 @@
         tzinfo (default to None)
         """
         self = object.__new__(cls)
-        if isinstance(hour, str):
-            # Pickle support
-            self.__setstate(hour, minute or None)
-            return self
         _check_tzinfo_arg(tzinfo)
         _check_time_fields(hour, minute, second, microsecond)
         self.__hour = hour
@@ -1201,13 +1185,7 @@
 
     def __hash__(self):
         """Hash."""
-        tzoff = self._utcoffset()
-        if not tzoff: # zero or None
-            return hash(self.__getstate()[0])
-        h, m = divmod(self.hour * 60 + self.minute - tzoff, 60)
-        if 0 <= h < 24:
-            return hash(time(h, m, self.second, self.microsecond))
-        return hash((h, m, self.second, self.microsecond))
+        return hash(self.__getstate())
 
     # Conversion to string
 
@@ -1351,22 +1329,7 @@
     __safe_for_unpickling__ = True      # For Python 2.2
 
     def __getstate(self):
-        us2, us3 = divmod(self.__microsecond, 256)
-        us1, us2 = divmod(us2, 256)
-        basestate = ("%c" * 6) % (self.__hour, self.__minute, self.__second,
-                                  us1, us2, us3)
-        if self._tzinfo is None:
-            return (basestate,)
-        else:
-            return (basestate, self._tzinfo)
-
-    def __setstate(self, string, tzinfo):
-        if len(string) != 6 or ord(string[0]) >= 24:
-            raise TypeError("an integer is required")
-        self.__hour, self.__minute, self.__second, us1, us2, us3 = \
-                                                            map(ord, string)
-        self.__microsecond = (((us1 << 8) | us2) << 8) | us3
-        self._tzinfo = tzinfo
+        return (self.__hour, self.__minute, self.__second, self.__microsecond, self._tzinfo)
 
     def __reduce__(self):
         return (time, self.__getstate())
@@ -1384,11 +1347,6 @@
 
     def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0,
                 microsecond=0, tzinfo=None):
-        if isinstance(year, str):
-            # Pickle support
-            self = date.__new__(cls, year[:4])
-            self.__setstate(year, month)
-            return self
         _check_tzinfo_arg(tzinfo)
         _check_time_fields(hour, minute, second, microsecond)
         self = date.__new__(cls, year, month, day)
@@ -1602,7 +1560,7 @@
         if L[-1] == 0:
             del L[-1]
         if L[-1] == 0:
-            del L[-1]            
+            del L[-1]
         s = ", ".join(map(str, L))
         s = "%s(%s)" % ('datetime.' + self.__class__.__name__, s)
         if self._tzinfo is not None:
@@ -1796,35 +1754,17 @@
         return base + timedelta(minutes = otoff-myoff)
 
     def __hash__(self):
-        tzoff = self._utcoffset()
-        if tzoff is None:
-            return hash(self.__getstate()[0])
-        days = _ymd2ord(self.year, self.month, self.day)
-        seconds = self.hour * 3600 + (self.minute - tzoff) * 60 + self.second
-        return hash(timedelta(days, seconds, self.microsecond))
+        #return ((self.__year * 10000 + self.__month + 100 + self.__day) + (self.__hour * 10000 + self.__minute * 100 + self.__second)) * 1000000 + self.__microsecond
+        return hash(self.__getstate())
+        #return 0
 
     # Pickle support.
 
     __safe_for_unpickling__ = True      # For Python 2.2
 
     def __getstate(self):
-        yhi, ylo = divmod(self.__year, 256)
-        us2, us3 = divmod(self.__microsecond, 256)
-        us1, us2 = divmod(us2, 256)
-        basestate = ("%c" * 10) % (yhi, ylo, self.__month, self.__day,
-                                   self.__hour, self.__minute, self.__second,
-                                   us1, us2, us3)
-        if self._tzinfo is None:
-            return (basestate,)
-        else:
-            return (basestate, self._tzinfo)
-
-    def __setstate(self, string, tzinfo):
-        (yhi, ylo, self.__month, self.__day, self.__hour,
-         self.__minute, self.__second, us1, us2, us3) = map(ord, string)
-        self.__year = yhi * 256 + ylo
-        self.__microsecond = (((us1 << 8) | us2) << 8) | us3
-        self._tzinfo = tzinfo
+        return (self.__year, self.__month, self.__day, self.__hour,
+                self.__minute, self.__second, self.__microsecond, self._tzinfo)
 
     def __reduce__(self):
         return (self.__class__, self.__getstate())

Attachment: signature.asc
Description: This is a digitally signed message part.

_______________________________________________
pypy-dev mailing list
pypy-dev@python.org
http://mail.python.org/mailman/listinfo/pypy-dev

Reply via email to