hmmpos.pl

1/* 2Hidden Markov model for part-of-speech tagging. 3The states represent parts-of-speech, and the symbols emitted by the states are words. The assumption is that a word depends probabilistically on just its own part-of-speech (i.e. its tag) which in turn depends on the part-of-speech of the preceding word (or on the start state in case there is no preceding word). 4From 5http://www.ling.gu.se/~lager/Spaghetti/spaghetti.html 6Original program by Torbjorn Lager, adapted to MCINTYRE by Fabrizio Riguzzi 7*/ 8:- use_module(library(mcintyre)). 9 10:- if(current_predicate(use_rendering/1)). 11:- use_rendering(c3). 12:- use_rendering(graphviz). 13:- endif. 14 15:- mc. 16 17:- begin_lpad. 18 19% hmm(O): O is the output sequence 20% hmm(S,O): O is the output sequence and S is the sequence of states 21% hmm(Q,S0,S,O): from state Q and previous state S0, generates output O and 22% sequence of states S 23 24hmm(O):-hmm(_,O). 25% O is an output sequence if there is a state seuqnece S such that hmm(S,O) 26% holds 27 28hmm(S,O):-trans(start,Q0,[]),hmm(Q0,[],S0,O),reverse(S0,S). 29% O is an output sequence and S a state sequence if the chain stats at state 30% start and ends generating state sequence S and output sequence O 31 32hmm(Q,S0,S,[L|O]):- 33 trans(Q,Q1,S0), 34 out(L,Q,S0), 35 hmm(Q1,[Q|S0],S,O). 36% an HMM in state Q goes in state Q1, emits the word L 37% and continues the chain 38 39hmm(_,S,S,[]). 40% an HMM in sny state terminates the sequence without emitting any symbol 41 42 43trans(start,det,_):0.30; 44trans(start,aux,_):0.20; 45trans(start,v,_):0.10; 46trans(start,n,_):0.10; 47trans(start,pron,_):0.30. 48 49trans(det,det,_):0.20; 50trans(det,aux,_):0.01; 51trans(det,v,_):0.01; 52trans(det,n,_):0.77; 53trans(det,pron,_):0.01. 54 55trans(aux,det,_):0.18; 56trans(aux,aux,_):0.10; 57trans(aux,v,_):0.50; 58trans(aux,n,_):0.01; 59trans(aux,pron,_):0.21. 60 61trans(v,det,_):0.36; 62trans(v,aux,_):0.01; 63trans(v,v,_):0.01; 64trans(v,n,_):0.26; 65trans(v,pron,_):0.36. 66 67trans(n,det,_):0.01; 68trans(n,aux,_):0.25; 69trans(n,v,_):0.39; 70trans(n,n,_):0.34; 71trans(n,pron,_):0.01. 72 73trans(pron,det,_):0.01; 74trans(pron,aux,_):0.45; 75trans(pron,v,_):0.52; 76trans(pron,n,_):0.01; 77trans(pron,pron,_):0.01. 78 79/* 80out(a,det,_):0.300. 81out(can,aux,_):0.010. 82out(can,v,_):0.005. 83out(can,n,_):0.001. 84out(he,pron,_):0.070. 85*/ 86out(a,det,_). 87out(can,aux,_). 88out(can,v,_). 89out(can,n,_). 90out(he,pron,_). 91 92 93:- end_lpad. 94 95state_diagram(digraph(G)):- 96 setof(A,(B,S,Body)^ 97 clause(trans(A,B,S),Body),Nodes), 98 maplist(nodelab,Nodes,NodesLab), 99 findall(edge(A -> B,[label=P]), 100 (clause(trans(A,B,_), 101 sample_head(_,_,_,Probs,N)), 102 nth0(N,Probs,_:P)), 103 Edges), 104 append(NodesLab,Edges,G). 105 106nodelab(N,node(N,[label=Lab])):- 107 findall(W,clause(out(W,N,_),_),L), 108 atomic_list_concat([N,'\nOut:\n'|L],Lab).

?- mc_sample_arg(hmm(S,[he,can,can,a,can]),20,S,O). % sample the state sequence corresonding to the phrase "he can can a can" % the most frequent state sequence is an approximate POS tagging for the % sentence. It corresponds to the Viterbi path of the HMM. % expected result: the most frequent tagging should be [pron, aux, v, det, n] ?- mc_sample_arg(hmm(S,[he,can,can,a,can]),20,S,O),argbar(O,C).

?- state_diagram(G). % show the state diagram */