Hi, A while ago I wrote a program to merge xls files. Now I refactored it because before, it was one big chunk of spaghetti code and I wanted to add some functionality. The code below works, but I have the feeling that it could still be simplified. Most functions have many arguments - isn't that called 'tight coupling'? Could somebody give me a few general pointers as to how to improve this program without loosing functionality? The first function is the main() function. Shouldn't a programmer strive for information hiding in such a function? Ideally, it should almost read like regular english, right? Or is that too textbook-ish? ;-)
Thanks in advance! Albert-Jan """ Merge all xls files in a given directory into one multisheet xls file. The sheets get the orginal file name, without the extension. File names should not exceed 29 characters """ import glob, os.path, time import xlrd, xlwt def merge_xls(in_dir="d:/temp/", out_file="d:/temp/merged_output.xls"): """ Main function: merge xls sheets """ xls_files = glob.glob(in_dir + "*.xls") xls_files.sort() merged_book = xlwt.Workbook() osheet_names = [os.path.basename(xls_file)[:-4] for xls_file in xls_files] for xls_no, xls_file in enumerate(xls_files): print "---> Processing file %s" % (xls_file) book = xlrd.open_workbook(xls_file) isheet_names = xlrd.Book.sheet_names(book) check_xls(book, merged_book, isheet_names, osheet_names, xls_no, xls_files) stamped_outfile = out_file[:-4] + "_" + time.strftime("%Y-%m-%d") + ".xls" merged_book.save(stamped_outfile) print_msg(xls_files, osheet_names, stamped_outfile) def check_xls(book, merged_book, isheet_names, osheet_names, xls_no, xls_files): """ Check existence and file names of input xls files """ if xls_files and len(osheet_names[xls_no]) <= 29: write_sheets(book, merged_book, isheet_names, osheet_names, xls_no, xls_files) elif xls_files: print "WARNING *** File name too long: <%s.xls> (maximum is 31 chars) " % (osheet_names[xls_no]) print "WARNING *** File <%s.xls> was skipped." % (osheet_names[xls_no]) else: print "NOTE *** No xls files in %s. Nothing to do" % (in_dir) def write_sheets(book, merged_book, isheet_names, osheet_names, xls_no, xls_files): """ Write sheets, and add sheet numbering in case of multisheet xls input """ osheet_name = osheet_names[xls_no] xls_file = xls_files[xls_no] if book.nsheets == 1: ws = merged_book.add_sheet(osheet_name) isheet_name = isheet_names[0] sheet = book.sheet_by_index(0) write_cells(sheet, book, ws, isheet_name, osheet_name, xls_file) elif book.nsheets in range(1, 100): for sheetx in range(book.nsheets): isheet_name = isheet_names[sheetx] ws = merged_book.add_sheet(osheet_name+str(sheetx+1).zfill(2)) sheet = book.sheet_by_index(sheetx) write_cells(sheet, book, ws, isheet_name, osheet_name, xls_file) else: raise Exception ("ERROR *** File %s has %s sheets (maximum is 99)" % (xls_file, book.nsheets)) def write_cells(sheet, book, ws, isheet_name, osheet_name, xls_file, format_cell=True): """ Write cells, and apply formatting if needed """ MISSINGVALUES = ("#LEEG!", "#NULL!") rx = 0 # initialize to zero in case of empty input xls file. style = format_cells(ws) for rx in range(sheet.nrows): for cx in range(sheet.ncols): cell_value = sheet.cell_value(rx, cx) if format_cell and rx == 0: format_cells(ws) ws.write(rx, cx, cell_value, style) elif cell_value in MISSINGVALUES or xlrd.XL_CELL_EMPTY: ws.write(rx, cx, " ") else: ws.write(rx, cx, cell_value) footer = "source tab: " + isheet_name + " || source file: " + os.path.basename(xls_file) if format_cell: ws.write(rx+2, 0, footer.upper(), style) # print bold source tab & file name below the table else: ws.write(rx+2, 0, footer.upper()) def format_cells(ws, font='Arial', boldrow=True, panes_frozen=True): """ Add horizontal split pane and bold font at first row """ ws.panes_frozen = panes_frozen ws.horz_split_pos = 1 font0 = xlwt.Font() font0.name = font font0.struck_out = False font0.bold = boldrow style0 = xlwt.XFStyle() style0.font = font0 return style0 def print_msg(xls_files, osheet_names, stamped_outfile): """ Print status messages """ print "\n---> Merged xls file written to %s using the following source files: " % (stamped_outfile) MAXSHEETNAMELEN = 29 for n_sheet, osheet_name in enumerate(osheet_names): if len(osheet_name) <= MAXSHEETNAMELEN: print "\t", str(n_sheet+1).zfill(3), "%s.xls" % (osheet_name) excl_sheets = [os.path.basename(xls_file)[:-4] for xls_file in xls_files if \ len(os.path.basename(xls_file)[:-4]) > MAXSHEETNAMELEN] if excl_sheets: print "\n--> The following files were skipped because the file name exceeds 29 characters: " for n_sheet, excl_sheet in enumerate(excl_sheets): print "\t", str(n_sheet+1).zfill(3), excl_sheet if __name__ == "__main__": merge_xls() _______________________________________________ Tutor maillist - Tutor@python.org http://mail.python.org/mailman/listinfo/tutor