gpr.pl

1/* 2Gaussian process (GP), see https://en.wikipedia.org/wiki/Gaussian_process 3and http://arxiv.org/pdf/1505.02965v2.pdf and 4Christopher M. Bishop, 5Pattern Recognition and Machine Learning, Springer, 2006, section 6.4 6 7A Gaussian Process defines a probability distribution over functions. 8This distribution has the property that, given N values, 9their image through a function sampled from the GP follows a multivariate 10normal with mean 0 and covariance matrix K. 11A GP is defined by a kernel function k that determines K in this way 12K_nm=k(x_n,x_m) 13GPs can be used for regression: the random functions 14predict the y value corresponding to a x value given a set X and Y of 15observed values. It can be proven that y is Gaussian distributed with mean 16m(y)=k*C^-1*Y 17where k is the row vector with elements k(X_i, x) and C has elements 18C_ij=k(x_i,x_j)+sigma*delta_ij, with sigma a user defined parameter (variance 19over the observed values) and delta_ij is the Kronecker function (delta_ij=1 20if i=j and 0 otherwise). 21When performing GP regression, you choose the kernel and you want to estimate 22the parameters of the kernel. You can define a prior distribution over the 23parameters. In this program, you can sample kernels and thus functions and 24predictions and you can computed the expected value of the predictions for 25a squared exponential kernel with l uniformly 26distributed in {1,2,3} and sigma uniformly distributed in [-2,2]. 27 28*/

?- draw_fun(sq_exp_p,C). % draw 5 functions from a GP with a squared exponential kernel with a prior % over the parameters sigma and l ?- draw_fun(sq_exp_const_lin(1.0,4.0,0.0,5.00),C). % draw 5 functions from a GP with a squared exponential, constant, linear % kernel (see Bishop page 308 Figure 6.5, bottom right ??- draw_fun(sq_exp,C). % draw 5 functions from a GP with a squared exponential kernel with % parameters sigma=1 and l=1 ?- draw_fun(ou,C). % draw 5 functions from a GP with a Ornstein-Uhlenbeck kernel ?- draw_fun(lin,C). % draw 5 functions from a GP with a linear kernel ?- draw_fun([1,2,3,4,5,6],min,C). % draw 5 functions from a GP with a min kernel ?- draw_fun_pred(sq_exp_p,C). % Given the three points % XT=[2.5,6.5,8.5] % YT=[1,-0.8,0.6] % draws 5 functions predicting points with X=[0,...,10] with a % squared exponential kernel. The graphs shows as dots the given points. ?- draw_fun_pred_exp(sq_exp_p,C). % Given the three points % XT=[2.5,6.5,8.5] % YT=[1,-0.8,0.6] % draws the expected prediction for points with X=[0,...,10] with a % squared exponential kernel. The graphs shows as dots the given points. */

60:- use_module(library(mcintyre)). 61:- use_module(library(matrix)). 62:- use_module(library(clpfd)). 63 64:- if(current_predicate(use_rendering/1)). 65:- use_rendering(c3). 66:- endif. 67:- mc. 68:- begin_lpad.

75gp(X,Kernel,Y):- 76 compute_cov(X,Kernel,0,C), 77 gp(C,Y). 78 79gp(Cov,Y):gaussian(Y,Mean,Cov):- 80 length(Cov,N), 81 list0(N,Mean). 82 83compute_cov(X,Kernel,Var,C):- 84 length(X,N), 85 cov(X,N,Kernel,Var,CT,CND), 86 transpose(CND,CNDT), 87 matrix_sum(CT,CNDT,C). 88 89cov([],_,_,_,[],[]). 90 91cov([XH|XT],N,Ker,Var,[KH|KY],[KHND|KYND]):- 92 length(XT,LX), 93 N1 is N-LX-1, 94 list0(N1,KH0), 95 cov_row(XT,XH,Ker,KH1), 96 call(Ker,XH,XH,KXH0), 97 KXH is KXH0+Var, 98 append([KH0,[KXH],KH1],KH), 99 append([KH0,[0],KH1],KHND), 100 cov(XT,N,Ker,Var,KY,KYND). 101 102cov_row([],_,_,[]). 103 104cov_row([H|T],XH,Ker,[KH|KT]):- 105 call(Ker,H,XH,KH), 106 cov_row(T,XH,Ker,KT).

112gp_predict(XP,Kernel,Var,XT,YT,YP):- 113 compute_cov(XT,Kernel,Var,C), 114 matrix_inversion(C,C_1), 115 transpose([YT],YST), 116 matrix_multiply(C_1,YST,C_1T), 117 gp_predict_single(XP,Kernel,XT,C_1T,YP). 118 119gp_predict_single([],_,_,_,[]). 120 121gp_predict_single([XH|XT],Kernel,X,C_1T,[YH|YT]):- 122 compute_k(X,XH,Kernel,K), 123 matrix_multiply([K],C_1T,[[YH]]), 124 gp_predict_single(XT,Kernel,X,C_1T,YT). 125 126compute_k([],_,_,[]). 127 128compute_k([XH|XT],X,Ker,[HK|TK]):- 129 call(Ker,XH,X,HK), 130 compute_k(XT,X,Ker,TK). 131 132 133% list of kernels 134 135% squared exponential kernel with a prior on its parameters: l is uniformly 136% distributed in {1,2,3} and sigma is uniformly distributed in [-2,2]. 137sq_exp_p(X,XP,K):- 138 sigma(Sigma), 139 l(L), 140 K is Sigma^2*exp(-((X-XP)^2)/2/(L^2)). 141 142 143l(L):uniform(L,[1,2,3]). 144 145sigma(Sigma):uniform_dens(Sigma,-2,2). 146 147% squared exponential kernel with fixed parameters: sigma=1, l=1. 148sq_exp(X,XP,K):- 149 K is exp(-((X-XP)^2)/2). 150 151% squared exponential with linear and constant compoonent, 152% from Bishop, page 307 eq 6.63. 153sq_exp_const_lin(Theta0,Theta1,Theta2,Theta3,X,XP,K):- 154 K is Theta0*exp(-((X-XP)^2)*Theta1/2)+Theta2+Theta3*X*XP. 155 156% min kernel 157min(X,XP,K):- 158 K is min(X,XP). 159 160% linear kernel with an additional term for the diagonal to ensure positive 161% definedness. 162lin(X,X,K):-!, 163 K is (X*X)+1. 164 165lin(X,XP,K):- 166 K is (X*XP). 167 168% Ornstein-Uhlenbeck kernel 169ou(X,XP,K):- 170 K is exp(-abs(X-XP)). 171 172:- end_lpad.

184draw_fun(X,Kernel,C):- 185 mc_sample_arg_first(gp(X,Kernel,Y),5,Y,L), 186 numlist(1,5,LD), 187 maplist(name_s,L,LD,L1), 188 C = c3{data:_{x:x, columns:[[x|X]|L1] ,type:spline}, 189 axis:_{ x:_{ tick:_{fit:false}}}}.

196draw_fun_pred(Kernel,C):- 197 numlist(0,10,X), 198 XT=[2.5,6.5,8.5], 199 YT=[1,-0.8,0.6], 200 mc_lw_sample_arg(gp_predict(X,Kernel,0.3,XT,YT,Y),gp(XT,Kernel,YT),5,Y,L), 201 keysort(L,LS), 202 reverse(LS,[Y1-_,Y2-_,Y3-_|_]), 203 C = c3{data:_{xs:_{y:xt,f1:x,f2:x,f3:x}, 204 columns:[[y|YT],[xt|XT],[x|X],[f1|Y1],[f2|Y2],[f3|Y3]], 205 types:_{f1: spline,f2:spline,f3:spline,y:scatter}}, 206 axis:_{ x:_{ tick:_{fit:false}}}}.

213draw_fun_pred_exp(Kernel,C):- 214 numlist(0,10,X), 215 XT=[2.5,6.5,8.5], 216 YT=[1,-0.8,0.6], 217 compute_e(X,Kernel,XT,YT,Y), 218 C = c3{data:_{xs:_{y:xt,f:x}, 219 columns:[[y|YT],[xt|XT],[x|X],[f|Y]], 220 types:_{f: spline,y:scatter}}, 221 axis:_{ x:_{ tick:_{fit:false}}}}. 222 223compute_e([],_,_,_,[]). 224compute_e([X|T],Kernel,XT,YT,[YE|TYE]):- 225 mc_lw_expectation(gp_predict([X],Kernel,0.3,XT,YT,[Y]),gp(XT,Kernel,YT),5,Y,YE), 226 compute_e(T,Kernel,XT,YT,TYE). 227 228name_s(V-_,N,[ND|V]):- 229 atomic_concat(f,N,ND)