Hi Andy,

Please find attached a Jupyter notebook showing exactly where the problem
appears.

Best,
Sam

On Thu, Aug 17, 2017 at 4:03 PM, Andreas Mueller <t3k...@gmail.com> wrote:

> Hi Sam.
>
> Can you say which test fails exactly and where (i.e. give traceback)?
> The estimator checks are currently quite strict with respect to raising
> helpful error messages.
> That doesn't mean your estimator is broken (necessarily).
> With a precomputed gram matrix, I expect the shape of X in predict to be
> (n_samples_test, n_samples_train), right?
> Does you estimator have a _pairwise attribute? (It should to work with
> cross-validation, I'm not sure if it's
> used in the estimator checks right now, but it should).
>
> Your feedback will help making check_estimator be more robust. I don't
> think it's tested with anything that requires
> "precomputed" kernels.
>
> Thanks
>
> Andy
>
>
> On 08/17/2017 05:22 AM, Sam Barnett wrote:
>
> I am rolling classifier based on SVC which computes a custom Gram matrix
> and runs this through the SVC classifier with kernel = 'precomputed'. While
> this works fine with the fit method, I face a dilemma with the predict
> method, shown here:
>
>
>     def predict(self, X):
>         """Run the predict method of the previously-instantiated SVM
>         classifier, returning the predicted classes for test set X."""
>
>         # Check is fit had been called
>         check_is_fitted(self, ['X_', 'y_'])
>
>         # Input validation
>         X = check_array(X)
>
>         cut_off = self.cut_ord_pair[0]
>         order = self.cut_ord_pair[1]
>
>         X_gram = seq_kernel_free(X, self.X_, \
>         pri_kernel=kernselect(self.kernel, self.coef0, self.gamma,
> self.degree, self.scale), \
>         cut_off=cut_off, order=order)
>
>         X_gram = np.nan_to_num(X_gram)
>
>         return self.ord_svc_.predict(X_gram)
>
> This will run on any dataset just fine. However, it fails the
> check_estimator test. Specifically, when trying to raise an error for
> malformed input on predict (in check_classifiers_train), it says that a
> ValueError is not raised. Yet if I change the order of X and self.X_ in
> seq_kernel_free (which computes the [n_samples_train, n_samples_test] Gram
> matrix), it passes the check_estimator test yet fails to run the predict
> method.
>
> How do I resolve both issues simultaneously?
>
>
>
> _______________________________________________
> scikit-learn mailing 
> listscikit-learn@python.orghttps://mail.python.org/mailman/listinfo/scikit-learn
>
>
>
> _______________________________________________
> scikit-learn mailing list
> scikit-learn@python.org
> https://mail.python.org/mailman/listinfo/scikit-learn
>
>
import numpy as np
import pdb

"""assume m positive"""

def shift( A,j,m):
    newA=np.roll(A, m, axis=j)

    for index, x in np.ndenumerate(newA):
        if (index[j]<m):
            newA[index]=0
    return newA


def getDiffMatrix(x, y, primary_kernel):
    shape = list([x.shape[0]-1, y.shape[0]-1])
    left = np.zeros([shape[0], shape[0]+1])
    right = np.zeros([shape[1]+1, shape[1]])
    l = max(shape)+1
    ones = np.diag(np.ones([l]))
    ones_up = np.diag(np.ones([l-1]), 1)
    ones_down = np.diag(np.ones([l-1]), -1)
    left += (ones-ones_up)[tuple(slice(0,n) for n in left.shape)]
    right += (ones-ones_down)[tuple(slice(0,n) for n in right.shape)]
    diff = np.zeros([shape[0]+1, shape[1]+1])
    for i in range(shape[0]+1):
        for j in range(shape[1]+1):
            diff[i,j] = primary_kernel(x[i], y[j])
    return np.dot(left, np.dot(diff, right))


def kernel(K,M,D):
    (lens,lent)=K.shape
    A=np.zeros( (M,lens,lent) )
    A[0]=K
    for m in range(1,M):
        Q = np.cumsum(np.cumsum(A[m-1], axis=0), axis=1)
        Qshifted=shift(Q,0,1)
        Qshifted=shift(Qshifted,1,1)
        A[m]=K*(Qshifted+1)

    return 1+np.sum(A[M-1])



def kernelHO(K,M,D):
    (lens,lent)=K.shape
    B=np.zeros( (M,D,D,lens,lent) )
    B[0,0,0]=K
    for m in range(1,M):
        D_=min(D,m)
        K1=np.sum(B[m-1],axis=0)
        K2=np.sum(K1,axis=0)
        K3=np.cumsum(np.cumsum(K2, axis=0), axis=1)
        K3shifted=shift(K3,0,1)
        K3shifted2=shift(K3shifted,1,1)
        B[m,0,0]=np.multiply(K,(K3shifted2+1))
        for d in range(1,D_):
            K1=np.sum(B[m-1,d-1],axis=0)
            K2=np.cumsum(K1,axis=1)
            K2shifted=shift(K2,1,1)
            B[m,d,0]=np.divide(1,(d+1))*K*K2shifted

            K2_=np.sum(B[m-1],axis=0)
            K4_=np.cumsum(K2_[d-1],axis=0)
            K4shifted_=shift(K4_,0,1)
            B[m,0,d]=np.divide(1,(d+1))*K*K4shifted_

            for d_ in range(1,D_):
                B[m,d,d_]+=np.divide(1,((d+1)*(d_+1)))*K*B[m-1,d-1,d_-1]

    return 1+np.sum(B[M-1])


def seq_kernel_free(A, B, pri_kernel=np.dot, cut_off=2, order=1, normalise=True):
    """Computes the cross-kernel matrix between datasets A, B, whose
    rows represent time series."""

    gram_matrix = np.zeros( (A.shape[0], B.shape[0]) )
    K_temp = np.zeros( (A.shape[1]-1, A.shape[1]-1) )
    (kxx, kyy) = (float(0), float(0))

    for row1ind in range(A.shape[0]):
        for row2ind in range(B.shape[0]):
            K_temp = getDiffMatrix(A[row1ind], B[row2ind], pri_kernel)
            gram_matrix[row1ind,row2ind] = kernelHO(K_temp, cut_off, order)

    if normalise == True:
        normfacx = np.zeros( (A.shape[0], 1) )
        normfacy = np.zeros( (1, B.shape[0]) )
        for row1ind in range(A.shape[0]):
            kxx = kernelHO(getDiffMatrix(A[row1ind], A[row1ind], pri_kernel), cut_off, order)
            normfacx[row1ind, 0] = kxx ** (-0.5)
        for row2ind in range(B.shape[0]):
            kyy = kernelHO(getDiffMatrix(B[row2ind], B[row2ind], pri_kernel), cut_off, order)
            normfacy[0, row2ind] = kyy ** (-0.5)
        normprod = np.dot(normfacx, normfacy)
        gram_matrix = np.multiply(gram_matrix, normprod)

    # if normalise == True:
    #     normfacrow = np.zeros_like(gram_matrix)
    #     normfaccol = np.zeros_like(gram_matrix)
    #     for row1ind in range(A.shape[0]):
    #         kxx = kernelHO(getDiffMatrix(A[row1ind], A[row1ind], pri_kernel), cut_off, order)
    #         normfacrow[row1ind, :] = kxx ** (-0.5)
    #     for row2ind in range(B.shape[0]):
    #         kyy = kernelHO(getDiffMatrix(B[row2ind], B[row2ind], pri_kernel), cut_off, order)
    #         normfaccol[:, row2ind] = kyy ** (-0.5)
    #     gram_matrix = np.multiply(np.multiply(gram_matrix, normfacrow), normfaccol)

    return gram_matrix


def seq_kernel_gram(A, pri_kernel=np.dot, cut_off=2, order=1, normalise=True):
    """Computes the cross-kernel matrix between datasets A, B, whose
    rows represent time series."""

    gram_matrix = np.zeros( (A.shape[0], A.shape[0]) )
    K_temp = np.zeros( (A.shape[1]-1, A.shape[1]-1) )

    for row1ind in range(A.shape[0]):
        for row2ind in range(row1ind, A.shape[0]):
            K_temp = getDiffMatrix(A[row1ind], A[row2ind], pri_kernel)
            gram_matrix[row1ind,row2ind] = kernelHO(K_temp, cut_off, order)

    if normalise == True:
        normvec = np.power(np.diag(gram_matrix).reshape((A.shape[0],1)), -0.5)
        normfac = np.dot(normvec, normvec.T)
        gram_matrix = np.multiply(gram_matrix, normfac)

    # if normalise == True:
    #     normfac = np.zeros_like(gram_matrix)
    #     normprod = float(0)
    #     for row1ind in range(A.shape[0]):
    #         for row2ind in range(row1ind, A.shape[0]):
    #             normprod = gram_matrix[row1ind,row1ind] * gram_matrix[row2ind, row2ind]
    #             normfac[row1ind, row2ind] = normprod ** (-0.5)
    #     gram_matrix = np.multiply(gram_matrix, normfac)

    # Speed-up, as Gram matrices are symmetric.
    gram_matrix += ( gram_matrix.T -np.diag(gram_matrix.diagonal()) )

    return gram_matrix

Attachment: SeqSVC Check Estimator Test.ipynb
Description: Binary data

import numpy as np
from functools import partial
import pdb

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import unique_labels
from sklearn.svm import SVC
from sklearn.preprocessing import KernelCenterer

from kernelsqizer import seq_kernel_free, seq_kernel_gram
from timeseriestools import timeLagMult, timeIndexMult

# LIST OF STANDARD PRESET KERNELS.
def kPolynom(x,y,coef0=0,gamma=1,degree=1):
    return (coef0+gamma*np.inner(x,y))**degree
def kGauss(x,y,scale=1,gamma=1):
    return scale * np.exp(-gamma*np.sum(np.square(x-y)))
def kLinear(x,y,scale=1):
    return scale * np.inner(x,y)
def kSigmoid(x,y,gamma=1,coef0=0):
    return np.tanh(gamma*np.inner(x,y) +coef0)

def kernselect(kername, coef0, gamma, degree, scale):
    """If the input of kernselect is one of the listed strings, this
    function returns the corresponding function with the specified
    parameters. Otherwise, this function returns its input (one must
    still specify the now-redundant parameters).
    """

    switcher = {
        'linear': partial(kPolynom, coef0=coef0, gamma=gamma, degree=degree),
        'rbf': partial(kGauss, scale=scale, gamma=gamma),
        'sigmoid': partial(kLinear, scale=scale),
        'poly': partial(kSigmoid, gamma=gamma, coef0=coef0),
            }
    return switcher.get(kername, kername)


class TimeSeriesPreprocessor(object):
    """This class can be used reshape (and recover) higher-dimensional
    time series as 2D arrays for use in the scikit-learn interface. This
    class itself is NOT a scikit-learn estimator, and so cannot be
    placed in a pipeline.

    Parameters
    ----------
    numfeatures: int
        The number of recordings in time for a single path realisation.
    """
    def __init__(self, numfeatures):
        self.numfeatures = numfeatures

    def flattenData(self, data):
        highShape = data.shape
        return data.reshape( (highShape[0], np.prod(highShape[1:])) )

    def recoverHighData(self, data):
        flatShape = data.shape
        last_dim = int(flatShape[1]/self.numfeatures)
        newShape = (flatShape[0], self.numfeatures, last_dim)
        return data.reshape(newShape)


class SeqSVC(BaseEstimator, ClassifierMixin):
    """C-Support Vector Classification with Sequentialisation.

    The implementation is based on scikit-learn's svm.SVC.
    The fit time complexity is more than quadratic with the number of samples
    which makes it hard to scale to dataset with more than a couple of 10000
    samples.

    The multiclass support is handled according to a one-vs-one scheme.

    For details on the precise mathematical formulation of the provided
    kernel functions and how `gamma`, `coef0` and `degree` affect each
    other, see the narrative documentation:
    :ref:`svm_kernels`.

    Read more in the :ref:`User Guide <svm_classification>`.

    Parameters
    ----------
    C : float, optional (default=1.0)
        Penalty parameter C of the error term.

    kernel : string, optional (default='rbf')
         Specifies the base kernel type to be used in the sequentialising
         algorithm.
         It must be one of 'linear', 'poly', 'rbf', 'sigmoid', or a callable.
         If none is given, 'rbf' will be used. If a callable is given its
         sequentialisation is used to pre-compute the kernel matrix from data
         matrices; that matrix should be an array of shape
         ``(n_samples, n_samples)``.

    degree : int, optional (default=3)
        Degree of the polynomial kernel function ('poly').
        Ignored by all other kernels.

    gamma : float, optional (default='auto')
        Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
        If gamma is 'auto' then 1/n_features will be used instead.

    coef0 : float, optional (default=0.0)
        Independent term in kernel function.
        It is only significant in 'poly' and 'sigmoid'.

    probability : boolean, optional (default=False)
        Whether to enable probability estimates. This must be enabled prior
        to calling `fit`, and will slow down that method.

    shrinking : boolean, optional (default=True)
        Whether to use the shrinking heuristic.

    tol : float, optional (default=1e-3)
        Tolerance for stopping criterion.

    cache_size : float, optional
        Specify the size of the kernel cache (in MB).

    class_weight : {dict, 'balanced'}, optional
        Set the parameter C of class i to class_weight[i]*C for
        SVC. If not given, all classes are supposed to have
        weight one.
        The "balanced" mode uses the values of y to automatically adjust
        weights inversely proportional to class frequencies in the input data
        as ``n_samples / (n_classes * np.bincount(y))``

    verbose : bool, default: False
        Enable verbose output. Note that this setting takes advantage of a
        per-process runtime setting in libsvm that, if enabled, may not work
        properly in a multithreaded context.

    max_iter : int, optional (default=-1)
        Hard limit on iterations within solver, or -1 for no limit.

    decision_function_shape : 'ovo', 'ovr' or None, default=None
        Whether to return a one-vs-rest ('ovr') decision function of shape
        (n_samples, n_classes) as all other classifiers, or the original
        one-vs-one ('ovo') decision function of libsvm which has shape
        (n_samples, n_classes * (n_classes - 1) / 2).
        The default of None will currently behave as 'ovo' for backward
        compatibility and raise a deprecation warning, but will change 'ovr'
        in 0.19.
        .. versionadded:: 0.17
           *decision_function_shape='ovr'* is recommended.
        .. versionchanged:: 0.17
           Deprecated *decision_function_shape='ovo' and None*.

    random_state : int seed, RandomState instance, or None (default)
        The seed of the pseudo random number generator to use when
        shuffling the data for probability estimation.

    scale : float, optional (default = 1)
        Kernel coefficient for Gaussian and linear primary kernels.

    cut_ord_pair : tuple (of ints), optional (default = (2, 1))
        A tuple (M, D) representing the cut-off and order for the
        sequentialised of the kernel. This is introduced as a pair in order to
        prevent GridSearchCV from instantiating the classifier with the order
        value strictly greater than the cut-off value.

    n_iter_ : int, optional (default = 1)
        A redundant parameter that serves as a hacky fix in order to pass
        the check_estimator test.

    preprocess : string or int, optional (default = 0)
        Creates synthetic higher-dimensional time series out of a dataset of
        one-dimensional time-series. If input is an int > 0, then classifier
        uses 'lagged' version of dataset. If input is the str 'index', then
        classifier adds the time-index to each feature. For more information,
        see timeseriestools.


    Examples - ADD WHEN FINISHED
    --------


    See also
    --------
    SVC
        C-Support Vector Classification.
    """

    def __init__(self, C=1.0, kernel='rbf', degree=3, gamma=1.0, \
        coef0=0.0, shrinking=True, probability=False, tol=0.001, \
        cache_size=200, class_weight=None, verbose=False, max_iter=-1, \
        decision_function_shape=None, random_state=None, \
        scale=1.0, cut_ord_pair=(2,1), preprocess=0):

        self.C = C
        self.shrinking = shrinking
        self.probability = probability
        self.tol = tol
        self.cache_size = cache_size
        self.class_weight = class_weight
        self.verbose = verbose
        self.max_iter = max_iter
        self.decision_function_shape = decision_function_shape
        self.random_state = random_state
        self.kernel = kernel
        self.degree = degree
        self.gamma = gamma
        self.coef0 = coef0
        self.scale = scale
        self.cut_ord_pair = cut_ord_pair
        self.preprocess = preprocess

    def fit(self, X, y=None):
        """Instantiate a standard SVM classifier with sequentialised
        kernel and fit it to data-target pair X, y."""

        self.n_iter_ = 1 # HACKY BUG FIX

        # Check that X and y have correct shape
        X, y = check_X_y(X, y)

        cut_off = self.cut_ord_pair[0]
        order = self.cut_ord_pair[1]

        # Store the classes seen during fit
        self.classes_ = unique_labels(y)

        self.X_ = np.array(X)
        self.y_ = np.array(y)
        self.input_shape_ = X.shape

        ts_preprocessor = TimeSeriesPreprocessor(X.shape[1])
        if type(self.preprocess) == int and self.preprocess > 0:
            X_high = timeLagMult(X, self.preprocess)
            X = ts_preprocessor.flattenData(X_high)
        elif type(self.preprocess) == str and self.preprocess == 'index':
            X_high = timeIndexMult(X)
            X = ts_preprocessor.flattenData(X_high)
        else:
            pass

        X_gram = seq_kernel_gram(X, \
        pri_kernel=kernselect(self.kernel, self.coef0, self.gamma, self.degree, self.scale), \
        cut_off=cut_off, order=order)

        X_gram = np.nan_to_num(X_gram)

        self.ord_svc_ = SVC(C=self.C, \
            kernel='precomputed', \
            degree=self.degree, gamma=self.gamma, \
            coef0=self.coef0, shrinking=self.shrinking, probability=self.probability, tol=self.tol, \
            cache_size=self.cache_size, class_weight=self.class_weight, verbose=self.verbose, \
            max_iter=self.max_iter, decision_function_shape=self.decision_function_shape, \
            random_state=self.random_state)

        self.ord_svc_.fit(X_gram, y)

        return self

    def predict(self, X):
        """Run the predict method of the previously-instantiated SVM
        classifier, returning the predicted classes for test set X."""

        # Check is fit had been called
        check_is_fitted(self, ['X_', 'y_'])

        # Input validation
        X = check_array(X)

        ts_preprocessor = TimeSeriesPreprocessor(X.shape[1])
        if type(self.preprocess) == int and self.preprocess > 0:
            X_high = timeLagMult(X, self.preprocess)
            X = ts_preprocessor.flattenData(X_high)
        elif type(self.preprocess) == str and self.preprocess == 'index':
            X_high = timeIndexMult(X)
            X = ts_preprocessor.flattenData(X_high)
        else:
            pass

        cut_off = self.cut_ord_pair[0]
        order = self.cut_ord_pair[1]

        X_gram = seq_kernel_free(X, self.X_, \
        pri_kernel=kernselect(self.kernel, self.coef0, self.gamma, self.degree, self.scale), \
        cut_off=cut_off, order=order)

        X_gram = np.nan_to_num(X_gram)

        return self.ord_svc_.predict(X_gram)
import numpy as np

def timeLag(vec, n):

    """Turns a one-dimensional array vec into a two-dimensional array s.t.,
    if vec = [x_0, ..., x_{N-1}], the output is equal to
    [ [x_{-n}, ..., x_0], ..., [x_{N-1-n}, ..., x_{N-1}]].
    If k-n < 0, then set x_{k-n} = x_0."""

    # Initialise the lagged vector with zeros.
    vec_lag = np.zeros( (len(vec), n+1) )

    vec_lag[:, n] = vec
    for ind in range(1, n+1):
        vec_lag[:, n-ind] = np.roll(vec, ind)
        vec_lag[0:ind, n-ind] = vec_lag[0,n]

    return vec_lag

def timeIndex(vec):

    """Turns a one-dimensional array vec into a two-dimensional array s.t.,
    if vec = [x_0, x_1, x_2, ...] the output is equal to
    [[x_0, 0], [x_1, 1], [x_2, 2], ...]."""

    vec_index = np.zeros( (len(vec), 2))
    vec_index[:, 0] = vec
    vec_index[:, 1] = np.arange(len(vec))

    return vec_index

def timeLagMult(arr,n):

    """Performs timeLag on an array containing multiple vectors."""

    arr_lag = np.zeros( (arr.shape[0], arr.shape[1], n+1))
    for row_ind in range(arr.shape[0]):
        arr_lag[row_ind, :, :] = timeLag(arr[row_ind, :], n)

    return arr_lag

def timeIndexMult(arr):

    """Performs timeIndex on an array containing multiple vectors."""

    arr_index = np.zeros( (arr.shape[0], arr.shape[1], 2))
    for row_ind in range(arr.shape[0]):
        arr_index[row_ind, :, :] = timeIndex(arr[row_ind, :])

    return arr_index
_______________________________________________
scikit-learn mailing list
scikit-learn@python.org
https://mail.python.org/mailman/listinfo/scikit-learn

Reply via email to