1/* 
    2Hidden Markov model for part-of-speech tagging.
    3The states represent parts-of-speech, and the symbols emitted by the states are words. The assumption is that a word depends probabilistically on just its own part-of-speech (i.e. its tag) which in turn depends on the part-of-speech of the preceding word (or on the start state in case there is no preceding word).
    4From
    5http://www.ling.gu.se/~lager/Spaghetti/spaghetti.html
    6Original program by Torbjorn Lager, adapted to MCINTYRE by Fabrizio Riguzzi
    7*/
    8:- use_module(library(mcintyre)).    9
   10:- if(current_predicate(use_rendering/1)).   11:- use_rendering(c3).   12:- use_rendering(graphviz).   13:- endif.   14
   15:- mc.   16
   17:- begin_lpad.   18
   19% hmm(O): O is the output sequence 
   20% hmm(S,O): O is the output sequence and S is the sequence of states
   21% hmm(Q,S0,S,O):  from state Q and previous state S0, generates output O and
   22% sequence of states S
   23
   24hmm(O):-hmm(_,O).
   25% O is an output sequence if there is a state seuqnece S such that hmm(S,O) 
   26% holds
   27
   28hmm(S,O):-trans(start,Q0,[]),hmm(Q0,[],S0,O),reverse(S0,S).
   29% O is an output sequence and S a state sequence if the chain stats at state
   30% start and ends generating state sequence S and output sequence O
   31
   32hmm(Q,S0,S,[L|O]):-
   33	trans(Q,Q1,S0),
   34	out(L,Q,S0),
   35	hmm(Q1,[Q|S0],S,O).
   36% an HMM in state Q goes in state Q1, emits the word L 
   37% and continues the chain
   38
   39hmm(_,S,S,[]).
   40% an HMM in sny state terminates the sequence without emitting any symbol
   41
   42
   43trans(start,det,_):0.30;
   44trans(start,aux,_):0.20;
   45trans(start,v,_):0.10;
   46trans(start,n,_):0.10;
   47trans(start,pron,_):0.30.
   48
   49trans(det,det,_):0.20;
   50trans(det,aux,_):0.01;
   51trans(det,v,_):0.01;
   52trans(det,n,_):0.77;
   53trans(det,pron,_):0.01.
   54
   55trans(aux,det,_):0.18;
   56trans(aux,aux,_):0.10;
   57trans(aux,v,_):0.50;
   58trans(aux,n,_):0.01;
   59trans(aux,pron,_):0.21.
   60
   61trans(v,det,_):0.36;
   62trans(v,aux,_):0.01;
   63trans(v,v,_):0.01;
   64trans(v,n,_):0.26;
   65trans(v,pron,_):0.36.
   66
   67trans(n,det,_):0.01;
   68trans(n,aux,_):0.25;
   69trans(n,v,_):0.39;
   70trans(n,n,_):0.34;
   71trans(n,pron,_):0.01.
   72
   73trans(pron,det,_):0.01;
   74trans(pron,aux,_):0.45;
   75trans(pron,v,_):0.52;
   76trans(pron,n,_):0.01;
   77trans(pron,pron,_):0.01.
   78
   79/*
   80out(a,det,_):0.300.
   81out(can,aux,_):0.010.
   82out(can,v,_):0.005.
   83out(can,n,_):0.001.
   84out(he,pron,_):0.070.
   85*/
   86out(a,det,_).
   87out(can,aux,_).
   88out(can,v,_).
   89out(can,n,_).
   90out(he,pron,_).
   91
   92
   93:- end_lpad.   94
   95state_diagram(digraph(G)):-
   96    setof(A,(B,S,Body)^
   97          clause(trans(A,B,S),Body),Nodes),
   98    maplist(nodelab,Nodes,NodesLab),
   99    findall(edge(A -> B,[label=P]),
  100      (clause(trans(A,B,_),
  101        sample_head(_,_,_,Probs,N)),
  102        nth0(N,Probs,_:P)),
  103      Edges),
  104    append(NodesLab,Edges,G).
  105
  106nodelab(N,node(N,[label=Lab])):-
  107    findall(W,clause(out(W,N,_),_),L),
  108    atomic_list_concat([N,'\nOut:\n'|L],Lab).

?- mc_sample_arg(hmm(S,[he,can,can,a,can]),20,S,O). % sample the state sequence corresonding to the phrase "he can can a can" % the most frequent state sequence is an approximate POS tagging for the % sentence. It corresponds to the Viterbi path of the HMM. % expected result: the most frequent tagging should be [pron, aux, v, det, n] ?- mc_sample_arg(hmm(S,[he,can,can,a,can]),20,S,O),argbar(O,C).

?- state_diagram(G). % show the state diagram */