I know this is a general python list and I am asking about pandas but this question is probably not great for asking on stackoverflow. I have a list of files (~80 files, ~30,000 rows) I need to process with my current code it is take minutes for each file. Any suggestions of a fast way. I am try to stick with pandas for educational purposes. Any suggestions would be great. If you are curious the can find the data file I am using below here. http://www.nber.org/nhamcs/data/nhamcsopd2010.csv
drugs_current = {'CITALOPRAM': 4332, 'ESCITALOPRAM': 4812, 'FLUOXETINE': 236, 'FLUVOXAMINE': 3804, 'PAROXETINE': 3157, 'SERTRALINE': 880, 'METHYLPHENIDATE': 900, 'DEXMETHYLPHENIDATE': 4777, 'AMPHETAMINE-DEXTROAMPHETAMINE': 4035, 'DEXTROAMPHETAMINE': 804, 'LISDEXAMFETAMINE': 6663, 'METHAMPHETAMINE': 805, 'ATOMOXETINE': 4827, 'CLONIDINE': 44, 'GUANFACINE': 717} drugs_98_05 = { 'SERTRALINE': 56635, 'CITALOPRAM': 59829, 'FLUOXETINE': 80006, 'PAROXETINE_HCL': 57150, 'FLUVOXAMINE': 57064, 'ESCITALOPRAM': 70466, 'DEXMETHYLPHENIDATE': 70427, 'METHYLPHENIDATE': 70374, 'METHAMPHETAMINE': 53485, 'AMPHETAMINE1': 70257, 'AMPHETAMINE2': 70258, 'AMPHETAMINE3': 50265, 'DEXTROAMPHETAMINE1': 70259, 'DEXTROAMPHETAMINE2': 70260, 'DEXTROAMPHETAMINE3': 51665, 'COMBINATION_PRODUCT': 51380, 'FIXED_COMBINATION': 51381, 'ATOMOXETINE': 70687, 'CLONIDINE1': 51275, 'CLONIDINE2': 70357, 'GUANFACINE': 52498 } df = pd.read_csv('nhamcsopd2010.csv' , index_col='PATCODE', low_memory=False) col_init = list(df.columns.values) keep_col = ['PATCODE', 'PATWT', 'VDAY', 'VMONTH', 'VYEAR', 'MED1', 'MED2', 'MED3', 'MED4', 'MED5'] for col in col_init: if col not in keep_col: del df[col] if f[-3:] == 'csv' and f[-6:-4] in ('93', '94', '95', '96', '97', '98', '99', '00', '91', '02', '03', '04', '05'): drugs = drugs_98_05 elif f[-3:] == 'csv' and f[-6:-4] in ('06', '08', '09', '10'): drugs = drugs_current for n in drugs: df[n] = df[['MED1','MED2','MED3','MED4','MED5']].isin([drugs[n]]).any(1) Vincent Davis 720-301-3003
-- https://mail.python.org/mailman/listinfo/python-list