# -*- coding: utf-8 -*-
"""
Created on Wed Sep 17 11:14:21 2014

@author: Ken
"""
import numpy as np

from sklearn.tree._tree import DepthFirstTreeBuilder, BestFirstTreeBuilder
from sklearn import datasets
from sklearn.tree import _tree
from sklearn.tree._tree import Tree

from sklearn.utils import check_random_state


iris = datasets.load_iris()

x = iris.data[:, :2]  # we only take the first two features.
y = iris.target


CRITERIA_CLF = {"gini": _tree.Gini, "entropy": _tree.Entropy}
CRITERIA_REG = {"mse": _tree.MSE, "friedman_mse": _tree.FriedmanMSE}
SPLITTERS = {"best": _tree.BestSplitter,
             "presort-best": _tree.PresortBestSplitter,
             "random": _tree.RandomSplitter}

criterion = "gini"
splitter = "best"
min_samples_split = 2
min_samples_leaf = 1
#min_weight_leaf = 0
max_depth = (2 ** 31) - 1
max_leaf_nodes = (2 ** 31) - 1

random_state = 0
random_state = check_random_state(random_state)

y = np.reshape(y, (-1, 1))

n_samples, n_features_ = x.shape
max_features_ = n_features_

n_outputs_ = y.shape[1]
n_classes_ = [1] * n_outputs_
n_classes_ = np.array(n_classes_, dtype=np.intp)

criterion = CRITERIA_CLF[criterion](n_outputs_,
                                    n_classes_)

splitter = SPLITTERS[splitter](criterion,
                               max_features_,
                               min_samples_leaf,
                               random_state)

# Note: current pipped version of sklearn omits min_weight_leaf
builder = BestFirstTreeBuilder(splitter, min_samples_split,
                               min_samples_leaf,
                               #min_weight_leaf,
                               max_depth,
                               max_leaf_nodes)

tree_ = Tree(n_features_, n_classes_, n_outputs_)

builder.build(tree_, x, y)
