Hello,
after reading a lot through the tutorial this is the code that i came up
with regarding the implementation of a gaussian process regression
optimisation (File appended):
initCovarianceAndGradients(): initialisation of matrices and
calculations which are needed by both marginal likelihood calculation
and gradient calculation:
Within this function i calculate some things globally which are strongly
reused by the value() and gradient() functions. What i do not really
understand is the passing of the double[] argument to the value()
function and the value() function of the gradient() method. Are those
methods called by the optimizer with the updated parameters? If this is
the case i have to recalculate the global calculations with each call to
the value() and gradient() methods.
Thanks for clarification
Am 14.05.2012 12:53, schrieb Gilles Sadowski:
Hello.
thanks for the reply. But i wonder what is the input for value and gradient.
in DifferentiableMultivariateRealFunction this needs to be a double array
but what needs to be provided there? The parameters for the function to
optimize?
Thank you very much again
Andreas
Do please have a look to the examples, as your question (and my
answer) is too vague if not supported by proper code. I guess the
answer to your question is 'yes', the double[] array is indeed the set
of parameters, but again, do check the examples, I would not like to
be misguiding you. Besides the user guide which should provide you
with the answer, have a look to this implementation [1], line 153. In
this implementation, x[i] and y[i] are the data points, yhat[i] are
the model predictions, and a[] are the parameters. You should be able
to find your way with this example.
I've also just added another bit of code show-casing the usage of the
"non-linear least-squares" optimizers (svn revision 1338144).
Best regards,
Gilles
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]
--
Dipl.-Ing. (FH) Andreas Niekler
Mitarbeiter und Promovend
Bereich Multimedia-Produktionssysteme und -technologien
Hochschule für Technik, Wirtschaft und Kultur Leipzig
Fachbereich Medien
Besucher
Gustav-Freytag-Straße 40
04277 Leipzig
Telefon: +49 0341 30 76 2378
Email: [email protected]
http://www.fbm.htwk-leipzig.de
/**
*
*/
package de.uni_leipzig.asv.toolchain.outputApplication.TimeSeriesAnalysis.cov;
import org.apache.commons.math.FunctionEvaluationException;
import org.apache.commons.math.analysis.DifferentiableMultivariateRealFunction;
import org.apache.commons.math.analysis.MultivariateRealFunction;
import org.apache.commons.math.analysis.MultivariateVectorialFunction;
import org.apache.commons.math.linear.CholeskyDecomposition;
import org.apache.commons.math.linear.CholeskyDecompositionImpl;
import org.apache.commons.math.linear.DecompositionSolver;
import org.apache.commons.math.linear.LUDecompositionImpl;
import org.apache.commons.math.linear.MatrixUtils;
import org.apache.commons.math.linear.NonSquareMatrixException;
import org.apache.commons.math.linear.NotPositiveDefiniteMatrixException;
import org.apache.commons.math.linear.NotSymmetricMatrixException;
import org.apache.commons.math.linear.RealMatrix;
import org.apache.commons.math.linear.RealMatrixImpl;
import org.apache.commons.math.stat.StatUtils;
/**
* @author Niekler
*
*/
public class GaussianProcessRegressionMarginalLikelihood implements
DifferentiableMultivariateRealFunction {
RealMatrix K;
RealMatrix invK;
RealMatrix y;
CholeskyDecomposition L;
double L_diag;
RealMatrix alpha;
RealMatrix alpha2;
private double[][] cov;
private double [][][] paramCov;
private double[] targets;
private double[] X;
private double[] parameter;
public GaussianProcessRegressionMarginalLikelihood(double[] targets,
double[] X, double[] parameter) {
this.targets = targets;
this.X = X;
this.parameter = parameter;
this.initCovarianceAndGradients();
this.cov = new double[targets.length][targets.length];
this.paramCov = new double[parameter.length][][];
for(int i = 0; i < parameter.length; i++)
{
paramCov[i] = new
double[targets.length][targets.length];
}
}
private void initCovarianceAndGradients()
{
double mean = StatUtils.mean(this.targets);
double[] y_array = new double[this.targets.length];
for(int i = 0; i < this.targets.length;i++)
{
y_array[i] = (targets[i]- mean) * 100;
//y_array[i] = targets[i]- mean;
}
for(int i = 0; i < this.X.length;i++)
{
for(int j = 0; j < this.X.length;j++)
{
double covar =
(Double)squaredExponential.cov(X[i],
X[j],parameter[0],parameter[1],parameter[2]);
this.cov[i][j] = covar;
double dSigmaF =
(Double)squaredExponential.dSigmaF(X[i],
X[j],parameter[0],parameter[1],parameter[2]);
this.paramCov[0][i][j] = covar;
double dSigmaN =
(Double)squaredExponential.dSigmaN(X[i],
X[j],parameter[0],parameter[1],parameter[2]);
this.paramCov[1][i][j] = covar;
double dL = (Double)squaredExponential.dL(X[i],
X[j],parameter[0],parameter[1],parameter[2]);
this.paramCov[2][i][j] = covar;
}
}
y = MatrixUtils.createColumnRealMatrix(y_array);
K = MatrixUtils.createRealMatrix(cov);
//identity matrix for I
RealMatrix k_eye =
MatrixUtils.createRealIdentityMatrix(cov.length);
//k_eye = k_eye.scalarMultiply(Math.pow(Float.MIN_VALUE * 1000,
2));
k_eye = k_eye.scalarMultiply(Math.pow(parameter[2], 2));
//RealMatrix choleskyInput =
K.add(k_eye.scalarMultiply(Math.pow(parameter[2], 2)));
RealMatrix choleskyInput = K.add(k_eye);
CholeskyDecomposition L = null;
try {
L = new
CholeskyDecompositionImpl(choleskyInput);
} catch (NonSquareMatrixException e) {
e.printStackTrace();
} catch (NotSymmetricMatrixException e) {
e.printStackTrace();
} catch (NotPositiveDefiniteMatrixException e) {
e.printStackTrace();
}
DecompositionSolver solverLTransponse = new
LUDecompositionImpl(L.getLT()).getSolver();
DecompositionSolver solverL = new
LUDecompositionImpl(L.getL()).getSolver();
//inverse of Ltranspose for left devision
RealMatrix L_transpose_1 = solverLTransponse.getInverse();
//inverse of Ltranspose for left devision
RealMatrix L_1 = solverL.getInverse();
//L\y
RealMatrix L_y = L_1.multiply(y);
//alpha = L'\(L\y)
RealMatrix alpha = L_transpose_1.multiply(L_y);
double L_diag = 0.0;
for(int i = 0; i < L.getL().getColumnDimension();i++)
{
L_diag += Math.log(L.getL().getEntry(i, i));
}
DecompositionSolver solverK = new
LUDecompositionImpl(K).getSolver();
this.invK = solverK.getInverse();
alpha2 = invK.multiply(y);
}
/* log p(y|X,theta) = -(1/2) * y^T*Ky^(-1) * y - 1/2 * log * |Ky| -
(n/2) * log(2*pi)
*
*/
@Override
public double value(double[] parameters) throws
FunctionEvaluationException,
IllegalArgumentException {
double logpyX = - y.transpose().multiply(alpha).getData()[0][0]
* 0.5
- L_diag
- X.length * Math.log(2 *
Math.PI) * 0.5;
return logpyX;
}
@Override
public MultivariateVectorialFunction gradient() {
return new MultivariateVectorialFunction() {
public double[] value() {
RealMatrix innerMatrix =
alpha2.multiply(alpha2.transpose()).subtract(invK);
double[] result = new double[paramCov.length];
for(int i = 0; i < paramCov.length; i++)
{
result[i] = parameter[i] *
innerMatrix.multiply(MatrixUtils.createRealMatrix(paramCov[i])).getTrace() *
0.5;
}
return result;
}
};
}
@Override
public MultivariateRealFunction partialDerivative(int k) {
return new MultivariateRealFunction() {
public double value(double[] point) {
return gradient()[k];
}
};
}
}
/**
*
*/
package de.uni_leipzig.asv.toolchain.outputApplication.TimeSeriesAnalysis.cov;
/**
* @author Niekler
*
*/
public class squaredExponential {
public static double cov(double X_1, double X_2, double sigmaF,
double sigmaN, double lengthScale) {
double covar = Math.pow(sigmaF, 2.0D)
* Math.exp(-Math.pow(X_1 - X_2, 2.0D)
/ (2 * Math.pow(lengthScale,
2.0D)));
if (X_1 == X_2)
covar += Math.pow(sigmaN, 2.0D);
return covar;
}
public static double dSigmaF(double X_1, double X_2, double sigmaF,
double sigmaN, double lengthScale) {
double d_sigmaf = 2
* sigmaF
* Math.exp(-Math.pow(X_1 - X_2, 2.0D)
/ (2 * Math.pow(lengthScale,
2.0D)));
return d_sigmaf;
}
public static double dSigmaN(double X_1, double X_2, double sigmaF,
double sigmaN, double lengthScale) {
double d_sigman = 0.0;
if (X_1 == X_2)
d_sigman = Math.pow(sigmaN, 2.0D);
return d_sigman;
}
public static double dL(double X_1, double X_2, double sigmaF,
double sigmaN, double lengthScale) {
double d_l = cov(X_1, X_2, sigmaF, sigmaN, lengthScale) * -1.0
* Math.pow(lengthScale, -3) * Math.pow(X_1 -
X_2, 2);
return d_l;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]