1/*
    2Dirichlet process (DP), see https://en.wikipedia.org/wiki/Dirichlet_process
    3Samples are drawn from a base distribution. New samples have a nonzero
    4probability of being equal to already sampled values. The process depends
    5on a parameter alpha (concentration parameter): with alpha->0, a single 
    6value is sampled, with alpha->infinite the distribution is equal to the base
    7distribution.
    8In this example the base distribution is a Gaussian with mean 0 and variance
    91, as in https://en.wikipedia.org/wiki/Dirichlet_process#/media/File:Dirichlet_process_draws.svg
   10To model the process, this example uses a stick breaking process: to sample
   11a value, a sample beta_1 is taken from Beta(1,alpha) and a coin with heads
   12probability beta_1 is flipped. If the coin lands heads, a sample from the base
   13distribution is taken and returned. Otherwise, a sample beta_2 is taken again
   14from Beta(1,alpha) and a coin is flipped. This procedure is repeated until
   15a heads is obtained, the index of i beta_i is the index of the value to be 
   16returned.
   17The example queries show both the distribution of indexes and values of the DP.
   18Moreover, they show the distribution of unique indexes as in 
   19http://www.robots.ox.ac.uk/~fwood/anglican/examples/viewer/?worksheet=nonparametrics/dp-mixture-model
   20*/

?- hist(200,100,G). % show the distribution of indexes with concentration parameter 10. ?- hist_val(200,100,G). % show the distribution of values with concentration parameter 10. Should look % like row 2 of https://en.wikipedia.org/wiki/Dirichlet_process#/media/File:Dirichlet_process_draws.svg ?- hist_repeated_indexes(100,40,G). % show the distribution of unique indexes in 100 samples with concentration parameter 10.

*/

   32 :- use_module(library(mcintyre)).   33
   34:- if(current_predicate(use_rendering/1)).   35:- use_rendering(c3).   36:- endif.   37:- mc.   38:- begin_lpad.   39
   40% dp_n_values(N0,N,Alpha,L)
   41% returns in L a list of N-N0 samples from the DP with concentration parameter
   42% Alpha
   43dp_n_values(N,N,_Alpha,[]):-!.
   44
   45dp_n_values(N0,N,Alpha,[[V]-1|Vs]):-
   46  N0<N,
   47  dp_value(N0,Alpha,V),
   48  N1 is N0+1,
   49  dp_n_values(N1,N,Alpha,Vs).
   50  
   51% dp_value(NV,Alpha,V)
   52% returns in V the NVth sample from the DP with concentration parameter
   53% Alpha
   54dp_value(NV,Alpha,V):-
   55  dp_stick_index(NV,Alpha,I),
   56  dp_pick_value(I,V).
   57
   58% dp_pick_value(I,V)
   59% returns in V the value of index I of the base distribution 
   60% (in this case N(0,1))
   61dp_pick_value(_,V):gaussian(V,0,1).
   62
   63% dp_stick_index(NV,Alpha,I)
   64% returns in I the index of the NVth sample from the DP
   65dp_stick_index(NV,Alpha,I):-
   66  dp_stick_index(1,NV,Alpha,I).
   67
   68dp_stick_index(N,NV,Alpha,V):-
   69  stick_proportion(N,Alpha,P),
   70  choose_prop(N,NV,Alpha,P,V).
   71  
   72% choose_prop(N,NV,Alpha,P,V)
   73% returns in V the index of the end of the stick breaking process starting
   74% from index N for the NVth value to be sampled from the DP
   75choose_prop(N,NV,_Alpha,P,N):-
   76  pick_portion(N,NV,P).
   77
   78choose_prop(N,NV,Alpha,P,V):-
   79  neg_pick_portion(N,NV,P),
   80  N1 is N+1,
   81  dp_stick_index(N1,NV,Alpha,V).
   82 
   83% sample of the beta_i parameters
   84stick_proportion(_,Alpha,P):beta(P,1,Alpha).
   85
   86% flip of the coin for the portion of the stick of size P
   87pick_portion(N,NV,P):P;neg_pick_portion(N,NV,P):1-P.
   88
   89:- end_lpad.   90
   91hist(Samples,NBins,Chart):-
   92  mc_sample_arg(dp_stick_index(1,10.0,V),Samples,V,L),
   93  histogram(L,Chart,[nbins(NBins)]).
   94
   95hist_repeated_indexes(Samples,NBins,Chart):-
   96  repeat_sample(0,Samples,L),
   97  histogram(L,Chart,[nbins(NBins)]).
   98
   99repeat_sample(S,S,[]):-!.
  100
  101repeat_sample(S0,S,[[N]-1|LS]):-
  102  mc_sample_arg_first(dp_stick_index(1,1,10.0,V),10,V,L),
  103  length(L,N),
  104  S1 is S0+1,
  105  repeat_sample(S1,S,LS).
  106
  107hist_val(Samples,NBins,Chart):-
  108  mc_sample_arg_first(dp_n_values(0,Samples,10.0,V),1,V,L),
  109  L=[Vs-_],
  110  histogram(Vs,Chart,[nbins(NBins)])