test/shaping/Makefile.am | 2 test/shaping/hb-diff-ngrams | 5 + test/shaping/hb-diff-stat | 5 + test/shaping/hb_test_tools.py | 174 +++++++++++++++++++++++++++++++++++++++--- 4 files changed, 177 insertions(+), 9 deletions(-)
New commits: commit 2214a03900d32710573a1b05c7665195b3129761 Author: Behdad Esfahbod <beh...@behdad.org> Date: Wed May 9 09:54:54 2012 +0200 Add hb-diff-ngrams diff --git a/test/shaping/Makefile.am b/test/shaping/Makefile.am index 81c9991..4fb762c 100644 --- a/test/shaping/Makefile.am +++ b/test/shaping/Makefile.am @@ -13,6 +13,7 @@ EXTRA_DIST += \ hb-diff \ hb-diff-colorize \ hb-diff-filter-failures \ + hb-diff-ngrams \ hb-diff-stat \ hb-manifest-read \ hb-manifest-update \ diff --git a/test/shaping/hb-diff-ngrams b/test/shaping/hb-diff-ngrams new file mode 100755 index 0000000..a496447 --- /dev/null +++ b/test/shaping/hb-diff-ngrams @@ -0,0 +1,5 @@ +#!/usr/bin/python + +from hb_test_tools import * + +UtilMains.process_multiple_files (DiffSinks.print_ngrams) diff --git a/test/shaping/hb_test_tools.py b/test/shaping/hb_test_tools.py index a38f067..3ff75b8 100644 --- a/test/shaping/hb_test_tools.py +++ b/test/shaping/hb_test_tools.py @@ -155,12 +155,60 @@ class DiffFilters: if not DiffHelpers.test_passed (lines): for l in lines: yield l +class Stat: + + def __init__ (self): + self.count = 0 + self.freq = 0 + + def add (self, test): + self.count += 1 + self.freq += test.freq + +class Stats: + + def __init__ (self): + self.passed = Stat () + self.failed = Stat () + self.total = Stat () + + def add (self, test): + self.total.add (test) + if test.passed: + self.passed.add (test) + else: + self.failed.add (test) + + def mean (self): + return float (self.passed.count) / self.total.count + + def variance (self): + return (float (self.passed.count) / self.total.count) * \ + (float (self.failed.count) / self.total.count) + + def stddev (self): + return self.variance () ** .5 + + def zscore (self, population): + """Calculate the standard score. + Population is the Stats for population. + Self is Stats for sample. + Returns larger absolute value if sample is highly unlikely to be random. + Anything outside of -3..+3 is very unlikely to be random. + See: http://en.wikipedia.org/wiki/Standard_score""" + + return (self.mean () - population.mean ()) / population.stddev () + + + + class DiffSinks: @staticmethod def print_stat (f): passed = 0 failed = 0 + # XXX port to Stats, but that would really slow us down here for key, lines in DiffHelpers.separate_test_cases (f): if DiffHelpers.test_passed (lines): passed += 1 @@ -172,21 +220,34 @@ class DiffSinks: @staticmethod def print_ngrams (f, ns=(1,2,3)): gens = tuple (Ngram.generator (n) for n in ns) + allstats = Stats () + allgrams = {} for key, lines in DiffHelpers.separate_test_cases (f): test = Test (lines) - unicodes = test.unicodes - del test + allstats.add (test) for gen in gens: - print "Printing %d-grams:" % gen.n - for ngram in gen (unicodes): - print ngram + for ngram in gen (test.unicodes): + if ngram not in allgrams: + allgrams[ngram] = Stats () + allgrams[ngram].add (test) + + importantgrams = {} + for ngram, stats in allgrams.iteritems (): + if stats.failed.count >= 30: # for statistical reasons + importantgrams[ngram] = stats + allgrams = importantgrams + del importantgrams + + for ngram, stats in allgrams.iteritems (): + print "zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram)) class Test: def __init__ (self, lines): + self.freq = 1 self.passed = True self.identifier = None self.text = None commit 178e6dce01ad28c8708bad62ce0fb79c46e836dc Author: Behdad Esfahbod <beh...@behdad.org> Date: Wed May 9 08:57:29 2012 +0200 Add N-gram generator diff --git a/test/shaping/hb_test_tools.py b/test/shaping/hb_test_tools.py index d3c0939..a38f067 100644 --- a/test/shaping/hb_test_tools.py +++ b/test/shaping/hb_test_tools.py @@ -169,6 +169,53 @@ class DiffSinks: total = passed + failed print "%d out of %d tests passed. %d failed (%g%%)" % (passed, total, failed, 100. * failed / total) + @staticmethod + def print_ngrams (f, ns=(1,2,3)): + gens = tuple (Ngram.generator (n) for n in ns) + for key, lines in DiffHelpers.separate_test_cases (f): + test = Test (lines) + unicodes = test.unicodes + del test + + for gen in gens: + print "Printing %d-grams:" % gen.n + for ngram in gen (unicodes): + print ngram + + + +class Test: + + def __init__ (self, lines): + self.passed = True + self.identifier = None + self.text = None + self.unicodes = None + self.glyphs = None + for l in lines: + symbol = l[0] + if symbol != ' ': + self.passed = False + i = 1 + if ':' in l: + i = l.index (':') + if not self.identifier: + self.identifier = l[1:i] + i = i + 2 # Skip colon and space + j = -1 + if l[j] == '\n': + j -= 1 + brackets = l[i] + l[j] + l = l[i+1:-2] + if brackets == '()': + self.text = l + elif brackets == '<>': + self.unicodes = Unicode.parse (l) + elif brackets == '[]': + # XXX we don't handle failed tests here + self.glyphs = l + + class DiffHelpers: @staticmethod @@ -205,6 +252,23 @@ class FilterHelpers: return printer +class Ngram: + + @staticmethod + def generator (n): + + def gen (f): + l = [] + for x in f: + l.append (x) + if len (l) == n: + yield tuple (l) + l[:1] = [] + + gen.n = n + return gen + + class UtilMains: @staticmethod @@ -276,10 +340,14 @@ class Unicode: return '<' + u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8') + '>' @staticmethod - def encode (s): + def parse (s): s = re.sub (r"[<+>,\\uU\n ]", " ", s) s = re.sub (r"0[xX]", " ", s) - return u''.join (unichr (int (x, 16)) for x in s.split (' ') if len (x)).encode ('utf-8') + return [int (x, 16) for x in s.split (' ') if len (x)] + + @staticmethod + def encode (s): + return u''.join (unichr (x) for x in Unicode.parse (s)).encode ('utf-8') shorthands = { "ZERO WIDTH NON-JOINER": "ZWNJ", commit 98669ceb77657d60435f2cb2e3fc18272c0a2c6a Author: Behdad Esfahbod <beh...@behdad.org> Date: Wed May 9 08:16:15 2012 +0200 Use groupby() diff --git a/test/shaping/hb_test_tools.py b/test/shaping/hb_test_tools.py index 70a9ce1..d3c0939 100644 --- a/test/shaping/hb_test_tools.py +++ b/test/shaping/hb_test_tools.py @@ -150,7 +150,8 @@ class DiffFilters: @staticmethod def filter_failures (f): - for lines in DiffHelpers.separate_test_cases (f): + for key, lines in DiffHelpers.separate_test_cases (f): + lines = list (lines) if not DiffHelpers.test_passed (lines): for l in lines: yield l @@ -160,7 +161,7 @@ class DiffSinks: def print_stat (f): passed = 0 failed = 0 - for lines in DiffHelpers.separate_test_cases (f): + for key, lines in DiffHelpers.separate_test_cases (f): if DiffHelpers.test_passed (lines): passed += 1 else: @@ -176,22 +177,11 @@ class DiffHelpers: have a colon character, groups them by identifier, yielding lists of all lines with the same identifier.''' - acc = [] - iden = None - for l in f: - if ':' not in l: - if acc: yield acc - acc = [] - iden = None - yield [l] - continue - l_iden = l[1:l.index (':')] - if acc and iden != l_iden: - yield acc - acc = [] - iden = l_iden - acc.append (l) - if acc: yield acc + def identifier (l): + if ':' in l[1:]: + return l[1:l.index (':')] + return l + return groupby (f, key=identifier) @staticmethod def test_passed (lines): commit c438a14b62433db488b5c90854a4a3934adf3305 Author: Behdad Esfahbod <beh...@behdad.org> Date: Wed May 9 07:45:17 2012 +0200 Add hb-diff-stat diff --git a/test/shaping/Makefile.am b/test/shaping/Makefile.am index f216c5d..81c9991 100644 --- a/test/shaping/Makefile.am +++ b/test/shaping/Makefile.am @@ -13,6 +13,7 @@ EXTRA_DIST += \ hb-diff \ hb-diff-colorize \ hb-diff-filter-failures \ + hb-diff-stat \ hb-manifest-read \ hb-manifest-update \ hb-unicode-decode \ diff --git a/test/shaping/hb-diff-stat b/test/shaping/hb-diff-stat new file mode 100755 index 0000000..81626e1 --- /dev/null +++ b/test/shaping/hb-diff-stat @@ -0,0 +1,5 @@ +#!/usr/bin/python + +from hb_test_tools import * + +UtilMains.process_multiple_files (DiffSinks.print_stat) diff --git a/test/shaping/hb_test_tools.py b/test/shaping/hb_test_tools.py index 17181ac..70a9ce1 100644 --- a/test/shaping/hb_test_tools.py +++ b/test/shaping/hb_test_tools.py @@ -151,9 +151,23 @@ class DiffFilters: @staticmethod def filter_failures (f): for lines in DiffHelpers.separate_test_cases (f): - if any (l[0] != ' ' for l in lines): + if not DiffHelpers.test_passed (lines): for l in lines: yield l +class DiffSinks: + + @staticmethod + def print_stat (f): + passed = 0 + failed = 0 + for lines in DiffHelpers.separate_test_cases (f): + if DiffHelpers.test_passed (lines): + passed += 1 + else: + failed += 1 + total = passed + failed + print "%d out of %d tests passed. %d failed (%g%%)" % (passed, total, failed, 100. * failed / total) + class DiffHelpers: @staticmethod @@ -175,10 +189,14 @@ class DiffHelpers: if acc and iden != l_iden: yield acc acc = [] - iden = l_iden + iden = l_iden acc.append (l) if acc: yield acc + @staticmethod + def test_passed (lines): + return all (l[0] == ' ' for l in lines) + class FilterHelpers: commit 1058d031e2046eb80331b0950eaff75c2bf608dc Author: Behdad Esfahbod <beh...@behdad.org> Date: Wed May 9 07:30:07 2012 +0200 Make hb-diff-filter-failtures retain all test info for failed tests diff --git a/test/shaping/hb_test_tools.py b/test/shaping/hb_test_tools.py index 03a7710..17181ac 100644 --- a/test/shaping/hb_test_tools.py +++ b/test/shaping/hb_test_tools.py @@ -149,16 +149,35 @@ class ZipDiffer: class DiffFilters: @staticmethod - def filter_failures (f, symbols=diff_symbols): - for l in f: - if l[0] in symbols: - # TODO retain all lines of the failure - yield l + def filter_failures (f): + for lines in DiffHelpers.separate_test_cases (f): + if any (l[0] != ' ' for l in lines): + for l in lines: yield l +class DiffHelpers: -class ShapeFilters: + @staticmethod + def separate_test_cases (f): + '''Reads lines from f, and if the lines have identifiers, ie. + have a colon character, groups them by identifier, + yielding lists of all lines with the same identifier.''' - pass + acc = [] + iden = None + for l in f: + if ':' not in l: + if acc: yield acc + acc = [] + iden = None + yield [l] + continue + l_iden = l[1:l.index (':')] + if acc and iden != l_iden: + yield acc + acc = [] + iden = l_iden + acc.append (l) + if acc: yield acc class FilterHelpers: _______________________________________________ HarfBuzz mailing list HarfBuzz@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/harfbuzz