Like other things posted here without notices to the contrary, this
code is in the public domain.

You can find partial hash collisions by sorting the hashes and then
walking through the sorted lists looking for successive entries with
long common prefixes.  I was going to write a program to do that, and
had gotten as far as doing the hashjoin bit, but then I realized the
problem was profoundly irrelevant to my life.

#!/usr/bin/python
# Slow birthday-attack program
# Does only about 1000 hashes per second
# Can practically find partial hash collisions up to 32 bits
# without much trouble on my 128MB-RAM laptop.
# But its data access pattern is pretty random, so it degrades
# spectacularly once it starts hitting swap.
from __future__ import generators
import sha, sys, bsddb

def strings(maxlen):
    if maxlen == 0:
        yield ''
    else:
        for item in strings(maxlen - 1):
            yield item
        for num in range(256):
            char = chr(num)
            for item in strings(maxlen-1):
                yield char + item

def smartcollide(a, b, minlen):
    count = 0
    ahasher, bhasher = sha.new(a), sha.new(b)
    ahits = {}
    # this makes it 5-7x slower
    # but may degrade more slowly once it exceeds memory size
    # haven't yet seen any evidence it helps.
    #ahits = bsddb.hashopen('collider.db', 'n')
    while 1:
        for eachstring in strings(minlen * 4):
            aht = ahasher.copy(); aht.update(eachstring)
            ahits[aht.hexdigest()[:minlen]] = eachstring
            bht = bhasher.copy(); bht.update(eachstring)
            count += 2
            if ahits.has_key(bht.hexdigest()[:minlen]):
                return a + ahits[bht.hexdigest()[:minlen]], b + eachstring, count

def main(maxlen, str1, str2, collider):
    for minlen in range(maxlen):
        a, b, count = collider(str1, str2, minlen)
        ahash, bhash = sha.new(a).hexdigest(), sha.new(b).hexdigest()
        print (a, ahash, b, bhash, count)

if __name__ == '__main__':
    main(11, sys.argv[1], sys.argv[2], smartcollide)



Reply via email to