dirichlet_process.pl

1/* 2Dirichlet process (DP), see https://en.wikipedia.org/wiki/Dirichlet_process 3Samples are drawn from a base distribution. New samples have a nonzero 4probability of being equal to already sampled values. The process depends 5on a parameter alpha (concentration parameter): with alpha->0, a single 6value is sampled, with alpha->infinite the distribution is equal to the base 7distribution. 8In this example the base distribution is a Gaussian with mean 0 and variance 91, as in https://en.wikipedia.org/wiki/Dirichlet_process#/media/File:Dirichlet_process_draws.svg 10To model the process, this example uses a stick breaking process: to sample 11a value, a sample beta_1 is taken from Beta(1,alpha) and a coin with heads 12probability beta_1 is flipped. If the coin lands heads, a sample from the base 13distribution is taken and returned. Otherwise, a sample beta_2 is taken again 14from Beta(1,alpha) and a coin is flipped. This procedure is repeated until 15a heads is obtained, the index of i beta_i is the index of the value to be 16returned. 17The example queries show both the distribution of indexes and values of the DP. 18Moreover, they show the distribution of unique indexes as in 19http://www.robots.ox.ac.uk/~fwood/anglican/examples/viewer/?worksheet=nonparametrics/dp-mixture-model 20*/

?- hist(200,100,G). % show the distribution of indexes with concentration parameter 10. ?- hist_val(200,100,G). % show the distribution of values with concentration parameter 10. Should look % like row 2 of https://en.wikipedia.org/wiki/Dirichlet_process#/media/File:Dirichlet_process_draws.svg ?- hist_repeated_indexes(100,40,G). % show the distribution of unique indexes in 100 samples with concentration parameter 10.

32 :- use_module(library(mcintyre)). 33 34:- if(current_predicate(use_rendering/1)). 35:- use_rendering(c3). 36:- endif. 37:- mc. 38:- begin_lpad. 39 40% dp_n_values(N0,N,Alpha,L) 41% returns in L a list of N-N0 samples from the DP with concentration parameter 42% Alpha 43dp_n_values(N,N,_Alpha,[]):-!. 44 45dp_n_values(N0,N,Alpha,[[V]-1|Vs]):- 46 N0<N, 47 dp_value(N0,Alpha,V), 48 N1 is N0+1, 49 dp_n_values(N1,N,Alpha,Vs). 50 51% dp_value(NV,Alpha,V) 52% returns in V the NVth sample from the DP with concentration parameter 53% Alpha 54dp_value(NV,Alpha,V):- 55 dp_stick_index(NV,Alpha,I), 56 dp_pick_value(I,V). 57 58% dp_pick_value(I,V) 59% returns in V the value of index I of the base distribution 60% (in this case N(0,1)) 61dp_pick_value(_,V):gaussian(V,0,1). 62 63% dp_stick_index(NV,Alpha,I) 64% returns in I the index of the NVth sample from the DP 65dp_stick_index(NV,Alpha,I):- 66 dp_stick_index(1,NV,Alpha,I). 67 68dp_stick_index(N,NV,Alpha,V):- 69 stick_proportion(N,Alpha,P), 70 choose_prop(N,NV,Alpha,P,V). 71 72% choose_prop(N,NV,Alpha,P,V) 73% returns in V the index of the end of the stick breaking process starting 74% from index N for the NVth value to be sampled from the DP 75choose_prop(N,NV,_Alpha,P,N):- 76 pick_portion(N,NV,P). 77 78choose_prop(N,NV,Alpha,P,V):- 79 neg_pick_portion(N,NV,P), 80 N1 is N+1, 81 dp_stick_index(N1,NV,Alpha,V). 82 83% sample of the beta_i parameters 84stick_proportion(_,Alpha,P):beta(P,1,Alpha). 85 86% flip of the coin for the portion of the stick of size P 87pick_portion(N,NV,P):P;neg_pick_portion(N,NV,P):1-P. 88 89:- end_lpad. 90 91hist(Samples,NBins,Chart):- 92 mc_sample_arg(dp_stick_index(1,10.0,V),Samples,V,L), 93 histogram(L,Chart,[nbins(NBins)]). 94 95hist_repeated_indexes(Samples,NBins,Chart):- 96 repeat_sample(0,Samples,L), 97 histogram(L,Chart,[nbins(NBins)]). 98 99repeat_sample(S,S,[]):-!. 100 101repeat_sample(S0,S,[[N]-1|LS]):- 102 mc_sample_arg_first(dp_stick_index(1,1,10.0,V),10,V,L), 103 length(L,N), 104 S1 is S0+1, 105 repeat_sample(S1,S,LS). 106 107hist_val(Samples,NBins,Chart):- 108 mc_sample_arg_first(dp_n_values(0,Samples,10.0,V),1,V,L), 109 L=[Vs-_], 110 histogram(Vs,Chart,[nbins(NBins)])