# -*- coding: utf-8 -*-
"""
Created on Fri Jul 28 16:59:56 2017

@author: c61372

An attempt at providing an Principal Component Analysis 
class in OpenTURNS.
"""
from openturns import Matrix, Sample
from math import sqrt

class PrincipalComponentAnalysisAlgorithm():
    def __init__(self, ncomponents):
        self._ncomponents = ncomponents
        self._VT = None # Transpose of the matrix V (from SVD)
        self._explainedVarRatio = None # Explained variance ratio
        self._size = None # Sample size
        self._dim = None # Sample dimension
        self._mu = None # Sample mean

    def run(self, data):
        # data : a Sample, the data to fit
        # 1. Centre les donnees
        self._mu = data.computeMean()
        self._size = data.getSize()
        # TODO : vectoriser la boucle
        for i in range(self._size):
            data[i,:] = data[i,:]-self._mu
        # 2. Calcule la SVD
        Y = data/sqrt(self._size-1)
        M = Matrix(Y)
        S, U, VT = M.computeSVD(True)
        self._VT = VT
        # 3. Calcule les variances
        self._dim = data.getDimension()
        for j in range(self._dim):
            S[j] = S[j]**2
        # 4. Calcule la part de variance expliquée
        explainedVarRatio = 0.
        totalvar = sum(S)
        for j in range(self._ncomponents):
            explainedVarRatio = explainedVarRatio + S[j]/totalvar
        self._explainedVarRatio = explainedVarRatio
        return None

    def transform(self,data):
        # Transform from the regular space to the Principal Component space
        V = self._VT.transpose()
        R = V[:,0:self._ncomponents]
        Y = Matrix(data) * R
        Ysample = self.matrix2Sample(Y)
        return Ysample

    def getExplainedVarianceRatio(self):
        return self._explainedVarRatio

    def inversetransform(self,data):
        # Transform from the Principal Component space into regular space
        size = data.getSize()
        data = Matrix(data)
        VTProject = self._VT[0:self._ncomponents,:]
        X = data * VTProject
        # Ajoute la moyenne
        X = self.matrix2Sample(X)
        # TODO : vectoriser la boucle
        for i in range(size):
            X[i,:] = X[i,:]+self._mu
        return X

    def matrix2Sample(self,mymatrix):
        # Convert a Matrix into a Sample
        # TODO : vectorize with mysample = Sample(mymatrix)
        # This is not possible for the moment.
        # http://trac.openturns.org/ticket/901
        size = mymatrix.getNbRows()
        dim = mymatrix.getNbColumns()
        mysample = Sample(size,dim)
        for j in range(dim):
            for i in range(size):
                mysample[i,j] = mymatrix[i,j]
        return mysample

if (__name__=="__main__"):
    """
    Source : "Analyse en composantes principales", 
    Jean-François Delmas et Saad Salam
    Weight of several parts of 23 cows
    X1: weight (alive)
    X2: skeleton weight
    X3: first grade meat weight
    X4: total meat weight
    X5: fat weight
    X6: bones weight
    """
    data = [\
    [ 395,     224,     35.1,     79.1,     6.0,     14.9 ], \
    [ 410,     232,     31.9,     73.4,     8.7,     16.4 ], \
    [ 405,     233,     30.7,     76.5,     7.0,     16.5 ], \
    [ 405,     240,     30.4,     75.3,     8.7,     16.0 ], \
    [ 390,     217,     31.9,     76.5,     7.8,     15.7 ], \
    [ 415,     243,     32.1,     77.4,     7.1,     18.5 ], \
    [ 390,     229,     32.1,     78.4,     4.6,     17.0 ], \
    [ 405,     240,     31.1,     76.5,     8.2,     15.3 ], \
    [ 420,     234,     32.4,     76.0,     7.2,     16.8 ], \
    [ 390,     223,     33.8,     77.0,     6.2,     16.8 ], \
    [ 415,     247,     30.7,     75.5,     8.4,     16.1 ], \
    [ 400,     234,     31.7,     77.6,     5.7,     18.7 ], \
    [ 400,     224,     28.2,     73.5,     11.0,     15.5 ], \
    [ 395,     229,     29.4,     74.5,     9.3,     16.1 ], \
    [ 395,     219,     29.7,     72.8,     8.7,     18.5 ], \
    [ 395,     224,     28.5,     73.7,     8.7,     17.3 ], \
    [ 400,     223,     28.5,     73.1,     9.1,     17.7 ], \
    [ 400,     224,     27.8,     73.2,     12.2,     14.6 ], \
    [ 400,     221,     26.5,     72.3,     13.2,     14.5 ], \
    [ 410,     233,     25.9,     72.3,     11.1,     16.6 ], \
    [ 402,     234,     27.1,     72.1,     10.4,     17.5 ], \
    [ 400,     223,     26.8,     70.3,     13.5,     16.2 ], \
    [ 400,     213,     25.8,     70.4,     12.1,     17.5 ]];
    
    from numpy import array
    data = array(data)
    data = Sample(data)

    ncomponents = 2
    mypca = PrincipalComponentAnalysisAlgorithm(ncomponents)
    mypca.run(data)
    Y = mypca.transform(data)
    explainedvar = mypca.getExplainedVarianceRatio()

    from pylab import figure, plot, xlabel, ylabel, title
    figure()
    title("Explained variance=%.2f %%" % (explainedvar*100))
    plot(Y[:,0],Y[:,1],"b.")
    xlabel("PC1")
    ylabel("PC2")
    
    # Transformation inverse
    Y = [[0, 0.], \
         [-5,-5], \
         [5,5]]
    Y = Sample(Y)
    X = mypca.inversetransform(Y)