Hi, When I run the following code, there is an error:
ValueError: For numerical factors, num_columns must be an int ================ import numpy as np import pandas as pd from patsy import dmatrices from sklearn.linear_model import LogisticRegression X = [0.5,0.75,1.0,1.25,1.5,1.75,1.75,2.0,2.25,2.5,2.75,3.0,3.25, 3.5,4.0,4.25,4.5,4.75,5.0,5.5] y = [0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1] zipped = list(zip(X,y)) df = pd.DataFrame(zipped,columns = ['study_hrs','p_or_f']) y, X = dmatrices('p_or_f ~ study_hrs', df, return_type="dataframe") ======================= I have check 'df' is this type: ============= type(df) Out[25]: pandas.core.frame.DataFrame ============= I cannot figure out where the problem is. Can you help me? Thanks. Error message: .......... --------------------------------------------------------------------------- ValueError Traceback (most recent call last) C:\Users\rj\pyprj\stackoverflow_logisticregression0.py in <module>() 17 df = pd.DataFrame(zipped,columns = ['study_hrs','p_or_f']) 18 ---> 19 y, X = dmatrices('p_or_f ~ study_hrs', df, return_type="dataframe") 20 21 y = np.ravel(y) C:\Users\rj\AppData\Local\Enthought\Canopy\User\lib\site-packages\patsy\highlevel.pyc in dmatrices(formula_like, data, eval_env, NA_action, return_type) 295 eval_env = EvalEnvironment.capture(eval_env, reference=1) 296 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env, --> 297 NA_action, return_type) 298 if lhs.shape[1] == 0: 299 raise PatsyError("model is missing required outcome variables") C:\Users\rj\AppData\Local\Enthought\Canopy\User\lib\site-packages\patsy\highlevel.pyc in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type) 150 return iter([data]) 151 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env, --> 152 NA_action) 153 if design_infos is not None: 154 return build_design_matrices(design_infos, data, C:\Users\rj\AppData\Local\Enthought\Canopy\User\lib\site-packages\patsy\highlevel.pyc in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action) 55 data_iter_maker, 56 eval_env, ---> 57 NA_action) 58 else: 59 return None C:\Users\rj\AppData\Local\Enthought\Canopy\User\lib\site-packages\patsy\build.pyc in design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action) 704 factor_states[factor], 705 num_columns=num_column_counts[factor], --> 706 categories=None) 707 else: 708 assert factor in cat_levels_contrasts C:\Users\rj\AppData\Local\Enthought\Canopy\User\lib\site-packages\patsy\design_info.pyc in __init__(self, factor, type, state, num_columns, categories) 86 if self.type == "numerical": 87 if not isinstance(num_columns, int): ---> 88 raise ValueError("For numerical factors, num_columns " 89 "must be an int") 90 if categories is not None: ValueError: For numerical factors, num_columns must be an int -- https://mail.python.org/mailman/listinfo/python-list