I am processing two csv files against another, my first implementation used python list of lists and list.append to generate a new list while looping all the data including the non-relevant data (can't determine location of specific data element in a list of list). So I re-implented the exact same code but using numpy.array's (2d arrays) using numpy.where to prevent looping over an entire dataset needlessly but the numpy.array based code is about 7.6 times slower?
relevant list of list code: > starttime = time.clock() > #NI_data_list room_eqp_list > NI_data_list_new = [] > for NI_row in NI_data_list: > treelevel = NI_row[0] > elevation = NI_row[1] > locater = NI_row[2] > area = NI_row[3] > NIroom = NI_row[4] > #Write appropriate equipment models and drawing into new list > if NIroom != '': > #Write appropriate equipment models and drawing into new list > for row in room_eqp_list: > eqp_room = row[0] > if len(eqp_room) == 5: > eqp_drawing = row[1] > if NIroom == eqp_room: > newrow = > [int(treelevel)+1,elevation,locater,area,NIroom,eqp_drawing] > NI_data_list_new.append(newrow) > #Write appropriate piping info into the new list > for prow in unique_piping_list: > pipe_room = prow[0] > if len(pipe_room) == 5: > pipe_drawing = prow[1] > if pipe_room == NIroom: > piperow = > [int(treelevel)+1,elevation,locater,area,NIroom,pipe_drawing] > NI_data_list_new.append(piperow) > #Write appropriate equipment models and drawing into new list > if (locater != '' and NIroom == ''): > #Write appropriate equipment models and drawing into new list > for row in room_eqp_list: > eqp_locater = row[0] > if len(eqp_locater) == 4: > eqp_drawing = row[1] > if locater == eqp_locater: > newrow = > [int(treelevel)+1,elevation,eqp_locater,area,'',eqp_drawing] > NI_data_list_new.append(newrow) > #Write appropriate piping info into the new list > for prow in unique_piping_list: > pipe_locater = prow[0] > if len(pipe_locater) == 4: > pipe_drawing = prow[1] > if pipe_locater == locater: > piperow = > [int(treelevel)+1,elevation,pipe_locater,area,'',pipe_drawing] > NI_data_list_new.append(piperow) > #Rewrite NI_data to new list > if NIroom == '': > NI_data_list_new.append(NI_row) > > print (time.clock()-starttime) > relevant numpy.array code: > NI_data_write_url = reports_dir + 'NI_data_room2.csv' > NI_data_list_file = open(NI_data_write_url, 'wb') > NI_data_list_writer = csv.writer(NI_data_list_file, delimiter=',', > quotechar='"') > starttime = time.clock() > #NI_data_list room_eqp_list > NI_data_list_new = numpy.array([['TreeDepth', 'Elevation', > 'BuildingLocater', 'Area', 'Room', 'Item']]) > for NI_row in NI_data_list: > treelevel = NI_row[0] > elevation = NI_row[1] > locater = NI_row[2] > area = NI_row[3] > NIroom = NI_row[4] > #Write appropriate equipment models and drawing into new array > if NIroom != '': > #Write appropriate equipment models and drawing into new array > (rowtest, columntest) = numpy.where(room_eqp_list==NIroom) > for row_iter in rowtest: > eqp_room = room_eqp_list[row_iter,0] > if len(eqp_room) == 5: > eqp_drawing = room_eqp_list[row_iter,1] > if NIroom == eqp_room: > newrow = > numpy.array([[int(treelevel)+1,elevation,locater,area,NIroom,eqp_drawing]]) > NI_data_list_new = numpy.append(NI_data_list_new, > newrow, 0) > > #Write appropriate piping info into the new array > (rowtest, columntest) = > numpy.where(unique_room_piping_list==NIroom) > for row_iter in rowtest: #unique_room_piping_list > pipe_room = unique_room_piping_list[row_iter,0] > if len(pipe_room) == 5: > pipe_drawing = unique_room_piping_list[row_iter,1] > if pipe_room == NIroom: > piperow = > numpy.array([[int(treelevel)+1,elevation,locater,area,NIroom,pipe_drawing]]) > NI_data_list_new = numpy.append(NI_data_list_new, > piperow, 0) > #Write appropriate equipment models and drawing into new array > if (locater != '' and NIroom == ''): > #Write appropriate equipment models and drawing into new array > (rowtest, columntest) = numpy.where(room_eqp_list==locater) > for row_iter in rowtest: > eqp_locater = room_eqp_list[row_iter,0] > if len(eqp_locater) == 4: > eqp_drawing = room_eqp_list[row_iter,1] > if locater == eqp_locater: > newrow = > numpy.array([[int(treelevel)+1,elevation,eqp_locater,area,'',eqp_drawing]]) > NI_data_list_new = numpy.append(NI_data_list_new, > newrow, 0) > #Write appropriate piping info into the new array > (rowtest, columntest) = numpy.where(unique_room_eqp_list==locater) > for row_iter in rowtest: > pipe_locater = unique_room_piping_list[row_iter,0] > if len(pipe_locater) == 4: > pipe_drawing = unique_room_piping_list[row_iter,1] > if pipe_locater == locater: > piperow = > numpy.array([[int(treelevel)+1,elevation,pipe_locater,area,'',pipe_drawing]]) > NI_data_list_new = numpy.append(NI_data_list_new, > piperow, 0) > #Rewrite NI_data to new list > if NIroom == '': > NI_data_list_new = numpy.append(NI_data_list_new,[NI_row],0) > > print (time.clock()-starttime) > some relevant output > >>> print NI_data_list_new > [['TreeDepth' 'Elevation' 'BuildingLocater' 'Area' 'Room' 'Item'] > ['0' '' '1000' '' '' ''] > ['1' '' '1000' '' '' 'docname Rev 0'] > ..., > ['5' '6' '1164' '4' '' 'eqp11 RB, R. surname, 24-NOV-08'] > ['4' '6' '1164' '4' '' 'anotherdoc Rev A'] > ['0' '' '' '' '' '']] > Is numpy.append so slow? or is the culprit numpy.where? Dewald Pieterse "A democracy is nothing more than mob rule, where fifty-one percent of the people take away the rights of the other forty-nine." ~ Thomas Jefferson
_______________________________________________ NumPy-Discussion mailing list [email protected] http://mail.scipy.org/mailman/listinfo/numpy-discussion
