scipy: http://www.scipy.org/ t-test: http://en.wikipedia.org/wiki/Student's_t-test
scipy and numpy modules are needed. Use unpaired T-test to compare two samples, user can check p-value to know if regression bug exists. Verified by this page: http://graphpad.com/quickcalcs/ttest1.cfm If the difference of two samples is considered to be not statistically significant(p <= 0.05), it will add a '+' or '-' before p-value. '+': avg_sample1 < avg_sample2 '-': avg_sample1 > avg_sample2 The analysis module doesn't care about what's tested, what's the relationship of each cols/lines. Currently I describe the sample relationship of cols in perf.conf This is used to update col results after computing averages. eg. | [netperf] | update_avg = 4,2,3|14,5,12|15,6,13 "normalize" = $throughput / $cpu "#tpkt/#exit" = $tx-pkts / $io_exit "#rpkt/#irq" = $rx-pkts / $irq_inj Then col 4(normalize),14(#tpkt/#exit),15(#rpkt/#irq) will be re-computed. Signed-off-by: Amos Kong <ak...@redhat.com> --- client/tools/analyzer.py | 166 ---------------------------- client/tools/perf.conf | 6 + client/tools/regression.py | 259 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 256 insertions(+), 175 deletions(-) delete mode 100644 client/tools/analyzer.py diff --git a/client/tools/analyzer.py b/client/tools/analyzer.py deleted file mode 100644 index 28df97e..0000000 --- a/client/tools/analyzer.py +++ /dev/null @@ -1,166 +0,0 @@ -import sys, re, string, time, commands, os, random - -def tee(content, filename): - """ Write content to standard output and file """ - fd = open(filename, "a") - fd.write(content + "\n") - fd.close() - print content - -class samples(): - def __init__(self, files): - self.files_dict = [] - for i in range(len(files)): - fd = open(files[i], "r") - self.files_dict.append(fd.readlines()) - fd.close() - - def getAvg(self): - return self._process(self.files_dict, self._get_list_avg) - - def getAvgPercent(self, avgs_dict): - return self._process(avgs_dict, self._get_augment_rate) - - def getSD(self): - return self._process(self.files_dict, self._get_list_sd) - - def getSDPercent(self, sds_dict): - return self._process(sds_dict, self._get_percent) - - def _get_percent(self, data): - """ num2 / num1 * 100 """ - result = "0.0" - if len(data) == 2 and float(data[0]) != 0: - result = "%.1f" % (float(data[1]) / float(data[0]) * 100) - return result - - def _get_augment_rate(self, data): - """ (num2 - num1) / num1 * 100 """ - result = "+0.0" - if len(data) == 2 and float(data[0]) != 0: - result = "%+.1f" % (((float(data[1]) - float(data[0])) - / float(data[0])) * 100) - return result - - def _get_list_sd(self, data): - """ - sumX = x1 + x2 + ... + xn - avgX = sumX / n - sumSquareX = x1^2 + ... + xn^2 - SD = sqrt([sumSquareX - (n * (avgX ^ 2))] / (n - 1)) - """ - sum = sqsum = 0 - n = len(data) - for i in data: - sum += float(i) - sqsum += float(i) ** 2 - avg = sum / n - if avg == 0 or n == 1: - return "0.0" - return "%.1f" % (((sqsum - (n * avg**2)) / (n - 1))**0.5) - - def _get_list_avg(self, data): - """ Compute the average of list members """ - sum = 0 - for i in data: - sum += float(i) - if "." in data[0]: - return "%.2f" % (sum / len(data)) - return "%d" % (sum / len(data)) - - def _process_lines(self, files_dict, row, func): - """ Process lines of different sample files with assigned method """ - lines = [] - ret_lines = [] - - for i in range(len(files_dict)): - lines.append(files_dict[i][row].split("|")) - for col in range(len(lines[0])): - data_list = [] - for i in range(len(lines)): - data_list.append(lines[i][col].strip()) - ret_lines.append(func(data_list)) - return "|".join(ret_lines) - - def _process(self, files_dict, func): - """ Process dicts of sample files with assigned method """ - ret_lines = [] - for i in range(len(files_dict[0])): - if re.findall("[a-zA-Z]", files_dict[0][i]): - ret_lines.append(files_dict[0][i].strip()) - else: - line = self._process_lines(files_dict, i, func) - ret_lines.append(line) - return ret_lines - - -def display(lists, rate, f, summary="Augment Rate", prefix="% ", ignore_col=1): - """ - Display lists data to standard format - - param lists: row data lists - param rate: augment rate list - param f: result output file - param summary: compare result summary - param prefix: output prefix in rate lines - param ignore_col: do not display some columns - """ - def format(list, str, ignore_col=0): - """ Format the string width of list member """ - str = str.split("|") - for l in range(len(list)): - line = list[l].split("|") - for col in range(len(line)): - line[col] = line[col].rjust(len(str[col]), ' ') - if not re.findall("[a-zA-Z]", line[col]) and col < ignore_col: - line[col] = " " * len(str[col]) - list[l] = "|".join(line) - return list - - for l in range(len(lists[0])): - if not re.findall("[a-zA-Z]", lists[0][l]): - break - tee("\n== %s " % summary + "="*(len(lists[0][l-1]) - len(summary) + 3) , f) - for n in range(len(lists)): - lists[n] = format(lists[n], lists[n][l-1]) - rate = format(rate, rate[l-1], ignore_col) - for i in range(len(lists[0])): - for n in range(len(lists)): - is_diff = False - for j in range(len(lists)): - if lists[0][i] != lists[j][i]: - is_diff = True - if is_diff or n==0: - tee(' ' * len(prefix) + lists[n][i], f) - if lists[0][i] != rate[i] and not re.findall("[a-zA-Z]", rate[i]): - tee(prefix + rate[i], f) - - -def analyze(sample_list1, sample_list2, log_file="./result.txt"): - """ Compute averages of two lists of files, compare and display results """ - - commands.getoutput("rm -f %s" % log_file) - tee(time.ctime(time.time()), log_file) - s1 = samples(sample_list1.split()) - avg1 = s1.getAvg() - sd1 = s1.getSD() - s2 = samples(sample_list2.split()) - avg2 = s2.getAvg() - sd2 = s2.getSD() - sd1 = s1.getSDPercent([avg1, sd1]) - sd2 = s1.getSDPercent([avg2, sd2]) - display([avg1], sd1, log_file, summary="Avg1 SD Augment Rate", - prefix="%SD ") - display([avg2], sd2, log_file, summary="Avg2 SD Augment Rate", - prefix="%SD ") - avgs_rate = s1.getAvgPercent([avg1, avg2]) - display([avg1, avg2], avgs_rate, log_file, summary="AvgS Augment Rate", - prefix="% ") - - -if __name__ == "__main__": - if len(sys.argv) < 3: - print 'Usage: python %s "$results list1" "$results list2" $log_file'\ - % sys.argv[0] - sys.exit(1) - analyze(sys.argv[1], sys.argv[2], sys.argv[3]) diff --git a/client/tools/perf.conf b/client/tools/perf.conf index 61b01d8..3e49b8b 100644 --- a/client/tools/perf.conf +++ b/client/tools/perf.conf @@ -2,9 +2,13 @@ [ntttcp] result_file_pattern = .*.RHS +ignore_col = 1 +avg_update = [netperf] -result_file_pattern = netperf-result.RHS +result_file_pattern = .*.RHS +ignore_col = 2 +avg_update = 4,2,3|14,5,12|15,6,13 [iozone] result_file_pattern = diff --git a/client/tools/regression.py b/client/tools/regression.py index c550f6f..5ee779e 100644 --- a/client/tools/regression.py +++ b/client/tools/regression.py @@ -1,9 +1,244 @@ -import ConfigParser, sys, commands, os -import analyzer +""" +Program that parses standard format results, +compute and check regression bug. -def compare(testname, olddir, curdir, config_file='perf.conf', output_dir="./result.txt"): +@copyright: Red Hat 2011-2012 +@author: Amos Kong <ak...@redhat.com> +""" +import sys, re, string, commands, os, random, warnings, ConfigParser + + +class Sample(): + """ Collect test results in same environment to a sample """ + def __init__(self, files): + self.files_dict = [] + for i in range(len(files)): + fd = open(files[i], "r") + f = [] + for l in fd.readlines(): + f.append(l.strip()) + self.files_dict.append(f) + fd.close() + + def getAvg(self, avg_update=None): + return self._process_files(self.files_dict, self._get_list_avg, + avg_update=avg_update) + + def getAvgPercent(self, avgs_dict): + return self._process_files(avgs_dict, self._get_augment_rate) + + def getSD(self): + return self._process_files(self.files_dict, self._get_list_sd) + + def getSDRate(self, sds_dict): + return self._process_files(sds_dict, self._get_rate) + + def getTtestPvalue(self, fs_dict1, fs_dict2): + """ + scipy lib is used to compute p-value of Ttest + scipy: http://www.scipy.org/ + t-test: http://en.wikipedia.org/wiki/Student's_t-test + """ + try: + from scipy import stats + import numpy as np + except ImportError: + print "No python scipy/numpy library installed!" + return None + + ret = [] + s1 = self._process_files(fs_dict1, self._get_list_self, merge=False) + s2 = self._process_files(fs_dict2, self._get_list_self, merge=False) + # s*[line][col] contians items (line*col) of all sample files + + for line in range(len(s1)): + tmp = [] + if type(s1[line]) != list: + tmp = s1[line] + else: + for col in range(len(s1[line])): + avg1 = self._get_list_avg(s1[line][col]) + avg2 = self._get_list_avg(s2[line][col]) + sample1 = np.array(s1[line][col]) + sample2 = np.array(s2[line][col]) + warnings.simplefilter("ignore", RuntimeWarning) + (t, p) = stats.ttest_ind(sample1, sample2) + flag = " " + if p <= 0.05: + flag = "+" + if avg1 > avg2 : + flag = "-" + tmp.append(flag + "%.3f" % (1 - p )) + tmp = "|".join(tmp) + ret.append(tmp) + return ret + + def _get_rate(self, data): + """ num2 / num1 * 100 """ + result = "0.0" + if len(data) == 2 and float(data[0]) != 0: + result = float(data[1]) / float(data[0]) * 100 + if result < 1: + result = "%.2f%%" % result + else: + result = "%.0f%%" % result + return result + + def _get_augment_rate(self, data): + """ (num2 - num1) / num1 * 100 """ + result = "+0.0" + if len(data) == 2 and float(data[0]) != 0: + result = "%+.3f%%" % ((float(data[1]) - float(data[0])) + / float(data[0]) * 100) + return result + + def _get_list_sd(self, data): + """ + sumX = x1 + x2 + ... + xn + avgX = sumX / n + sumSquareX = x1^2 + ... + xn^2 + SD = sqrt([sumSquareX - (n * (avgX ^ 2))] / (n - 1)) + """ + sum = sqsum = 0 + n = len(data) + for i in data: + sum += float(i) + sqsum += float(i) ** 2 + avg = sum / n + if avg == 0 or n == 1 or sqsum - (n * avg**2) <= 0: + return "0.0" + return "%.3f" % (((sqsum - (n * avg**2)) / (n - 1))**0.5) + + def _get_list_avg(self, data): + """ Compute the average of list entries """ + sum = 0 + for i in data: + sum += float(i) + if "." in str(data[0]): + return "%.2f" % (sum / len(data)) + return "%d" % (sum / len(data)) + + def _get_list_self(self, data): + """ Use this to convert sample dicts """ + return data + + def _process_lines(self, files_dict, row, func, avg_update, merge): + """ Process smae lines of different samples by unified function """ + lines = [] + ret = [] + + for i in range(len(files_dict)): + lines.append(files_dict[i][row].split("|")) + for col in range(len(lines[0])): + data_list = [] + for i in range(len(lines)): + tmp = lines[i][col].strip() + if "." in tmp: + data_list.append(float(tmp)) + else: + data_list.append(int(tmp)) + ret.append(func(data_list)) + + if avg_update: + for i in avg_update.split('|'): + l = i.split(',') + ret[int(l[0])] = "%.2f" % (float(ret[int(l[1])]) / + float(ret[int(l[2])])) + if merge: + return "|".join(ret) + return ret + + def _process_files(self, files_dict, func, avg_update=None, merge=True): + """ + Process dicts of sample files with assigned function, + func has one list augment. + """ + ret_lines = [] + for i in range(len(files_dict[0])): + if re.findall("[a-zA-Z]", files_dict[0][i]): + ret_lines.append(files_dict[0][i].strip()) + else: + line = self._process_lines(files_dict, i, func, avg_update, + merge) + ret_lines.append(line) + return ret_lines + + +def display(lists, rates, f, ignore_col, sum="Augment Rate",prefix0=None, + prefix1=None, prefix2=None): + """ + Display lists data to standard format + + param lists: row data lists + param rates: augment rates lists + param f: result output file + param ignore_col: do not display some columns + param sum: compare result summary + param prefix0: output prefix in head lines + param prefix1: output prefix in Avg/SD lines + param prefix2: output prefix in Diff Avg/P-value lines + """ + + for l in range(len(lists[0])): + if not re.findall("[a-zA-Z]", lists[0][l]): + break + tee("\n== %s " % sum + "==", f) + + for i in range(len(lists[0])): + for n in range(len(lists)): + is_diff = False + for j in range(len(lists)): + if lists[0][i] != lists[j][i]: + is_diff = True + if len(lists) == 1 and not re.findall("[a-zA-Z]", lists[j][i]): + is_diff = True + if is_diff: + tee(prefix1[n] + lists[n][i], f) + if not is_diff and n == 0: + if '|' in lists[n][i]: + tee(prefix0 + lists[n][i], f) + else: + tee(lists[n][i], f) + for n in range(len(rates)): + if lists[0][i] != rates[n][i] and not re.findall("[a-zA-Z]", + rates[n][i]): + tee(prefix2[n] + rates[n][i], f) + +def analyze(test, sample_list1, sample_list2, configfile): + """ Compute averages/p-vales of two samples, print results nicely """ + config = ConfigParser.ConfigParser() + config.read(configfile) + ignore_col = int(config.get(test, "ignore_col")) + avg_update = config.get(test, "avg_update") + + commands.getoutput("rm -f %s.txt" % test) + s1 = Sample(sample_list1.split()) + avg1 = s1.getAvg(avg_update=avg_update) + sd1 = s1.getSD() + + s2 = Sample(sample_list2.split()) + avg2 = s2.getAvg(avg_update=avg_update) + sd2 = s2.getSD() + + sd1 = s1.getSDRate([avg1, sd1]) + sd2 = s1.getSDRate([avg2, sd2]) + avgs_rate = s1.getAvgPercent([avg1, avg2]) + + pvalues = s1.getTtestPvalue(s1.files_dict, s2.files_dict) + rlist = [avgs_rate] + if pvalues: + # p-value list isn't null + rlist.append(pvalues) + display([avg1, sd1, avg2, sd2], rlist, test+".txt", ignore_col, + sum="Regression Testing", prefix0="#||", + prefix1=["1|Avg|", " |%SD|", "2|Avg|", " |%SD|"], + prefix2=["-|%Diff between Avg|", "-|Significance|"]) + + +def compare(testname, olddir, curdir, configfile='perf.conf'): + """ Find result files from directories """ config = ConfigParser.ConfigParser() - config.read(config_file) + config.read(configfile) result_file_pattern = config.get(testname, "result_file_pattern") def search_files(dir): @@ -14,11 +249,19 @@ def compare(testname, olddir, curdir, config_file='perf.conf', output_dir="./res oldlist = search_files(olddir) newlist = search_files(curdir) if oldlist != "" or newlist != "": - analyzer.analyze(oldlist, newlist, output_dir) + analyze(testname, oldlist, newlist, configfile) + + +def tee(content, file): + """ Write content to standard output and file """ + fd = open(file, "a") + fd.write(content + "\n") + fd.close() + print content if __name__ == "__main__": - if len(sys.argv) != 5: - print 'Usage: python %s $testname $dir1 $dir2 $configfile' % sys.argv[0] + if len(sys.argv) != 4: + print 'Usage: python %s $testname $dir1 $dir2' % sys.argv[0] sys.exit(1) - compare(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) + compare(sys.argv[1], sys.argv[2], sys.argv[3]) _______________________________________________ Autotest mailing list Autotest@test.kernel.org http://test.kernel.org/cgi-bin/mailman/listinfo/autotest