:- module(transform,[preprocess/6, topsem/2, % +Der, -Sem topatt/2, % +Der, -Attributes topstr/2, % +Der, -String topcat/2]). % +Der, -Category :- use_module(library(lists),[append/3]). :- use_module(semlib(options),[option/2]). :- use_module(semlib(errors),[error/2,warning/2]). :- use_module(boxer(morpha),[morpha/2]). :- use_module(boxer(slashes)). /* ------------------------------------------------------------------------- Pre-Processing of CCG derivation to ensure correct format ------------------------------------------------------------------------- */ preprocess(SID,X,Y,Tags,Start,End):- setTokID(SID,Start,TokID), trans(X,TokID,Y,End,Tags), !. preprocess(SID,_,_,_,_,_):- error('unable to preprocess derivation ~p',[SID]), !, fail. /* ------------------------------------------------------------------------- Funny (C&C wrongly analysed cases of N coordination) ------------------------------------------------------------------------- */ trans(fa(n,X,funny(n,Conj,fa(n,Y,Z))),N1,X2,N3,Tags):- !, trans(fa(n,ba(n/n,X,conj((n/n)\(n/n),n/n,Conj,Y)),Z),N1,X2,N3,Tags). trans(funny(_,_,X1),N1,X2,N3,Tags):- !, warning('the funny combinatory rule causes skipping token ~p',[N1]), N2 is N1 + 1, %% assuming we skip one word (i.e. 'and') trans(X1,N2,X2,N3,Tags). /* ------------------------------------------------------------------------- Punctuation typechange rules ------------------------------------------------------------------------- */ trans(rtc(C,X1,Pu1),N1,ba(C,nil,Att,Str,X2,X3),N3,Tags1-Tags3):- Pu1 =.. [t,_|Cs], !, trans(X1,N1,X2,N2,Tags1-Tags2), topcat(X2,Cat), topatt(X2,Att), Pu2 =.. [t,C\Cat|Cs], trans(Pu2,N2,X3,N3,Tags2-Tags3), strings(X2,X3,Str). trans(ltc(C,Pu1,X1),N1,fa(C,nil,Att,Str,X2,X3),N3,Tags1-Tags3):- trans(Pu1,N1,Pu2,N2,Tags1-Tags2), Pu2 = t(_,Tok2,Sem2,Att2,I), !, trans(X1,N2,X3,N3,Tags2-Tags3), topcat(X3,Cat), topatt(X3,Att), X2 = t(C/Cat,Tok2,Sem2,Att2,I), strings(X3,X2,Str). /* ------------------------------------------------------------------------- Punctuation rules ------------------------------------------------------------------------- */ trans(rp(Cat,X1,Y0),N1,X2,N3,Tags):- Y0 =.. [t,_|L], !, Y1 =.. [t,Cat\Cat|L], trans(ba(Cat,X1,Y1),N1,X2,N3,Tags). trans(lp(Cat,X0,Y1),N1,X2,N3,Tags):- X0 =.. [t,_|L], !, X1 =.. [t,Cat/Cat|L], trans(fa(Cat,X1,Y1),N1,X2,N3,Tags). /* ------------------------------------------------------------------------- Application ------------------------------------------------------------------------- */ trans(fa(_,X1,Y1), N1, fa(C1,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, trans(X1,N1,X2,N2,Tags1-Tags2), topcat(X2,C1/C2), trans(Y1,N2,Y2,N3,Tags2-Tags3), topcat(Y2,C2), strings(X2,Y2,Str), headAtt(X2,Y2,Att). trans(ba(_,X1,Y1), N1, ba(C2,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, trans(X1,N1,X2,N2,Tags1-Tags2), topcat(X2,C1), trans(Y1,N2,Y2,N3,Tags2-Tags3), topcat(Y2,C2\C1), strings(X2,Y2,Str), headAtt(Y2,X2,Att). /* ------------------------------------------------------------------------- Composition ------------------------------------------------------------------------- */ trans(fc(_,X1,Y1), N1, fc(C1/C3,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, trans(X1,N1,X2,N2,Tags1-Tags2), trans(Y1,N2,Y2,N3,Tags2-Tags3), topcat(X2,C1/C2), topcat(Y2,C2/C3), strings(X2,Y2,Str), headAtt(X2,Y2,Att). trans(bc(_,X1,Y1), N1, bc(C3\C1,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, trans(X1,N1,X2,N2,Tags1-Tags2), trans(Y1,N2,Y2,N3,Tags2-Tags3), topcat(X2,C2\C1), topcat(Y2,C3\C2), strings(X2,Y2,Str), headAtt(Y2,X2,Att). /* ------------------------------------------------------------------------- Generalised Composition ------------------------------------------------------------------------- */ trans(gfc(C,N,X1,Y1), N1, gfc(C,N,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, trans(X1,N1,X2,N2,Tags1-Tags2), trans(Y1,N2,Y2,N3,Tags2-Tags3), strings(X2,Y2,Str), headAtt(X2,Y2,Att). trans(gbc(C,N,X1,Y1), N1, gbc(C,N,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, trans(X1,N1,X2,N2,Tags1-Tags2), trans(Y1,N2,Y2,N3,Tags2-Tags3), strings(X2,Y2,Str), headAtt(Y2,X2,Att). /* ------------------------------------------------------------------------- Crossed Composition ------------------------------------------------------------------------- */ trans(bxc(_,X1,Y1), N1, bxc(C3/C1,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, trans(X1,N1,X2,N2,Tags1-Tags2), trans(Y1,N2,Y2,N3,Tags2-Tags3), topcat(X2,C2/C1), topcat(Y2,C3\C2), strings(X2,Y2,Str), headAtt(X2,Y2,Att). trans(fxc(C,X1,Y1), N1, fxc(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, trans(X1,N1,X2,N2,Tags1-Tags2), trans(Y1,N2,Y2,N3,Tags2-Tags3), strings(X2,Y2,Str), headAtt(Y2,X2,Att). /* ------------------------------------------------------------------------- Generalised Crossed Composition ------------------------------------------------------------------------- */ trans(gfxc(C,N,X1,Y1), N1, gfxc(C,N,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, trans(X1,N1,X2,N2,Tags1-Tags2), trans(Y1,N2,Y2,N3,Tags2-Tags3), strings(X2,Y2,Str), headAtt(X2,Y2,Att). trans(gbxc(C,N,X1,Y1), N1, gbxc(C,N,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, trans(X1,N1,X2,N2,Tags1-Tags2), trans(Y1,N2,Y2,N3,Tags2-Tags3), strings(X2,Y2,Str), headAtt(Y2,X2,Att). /* ------------------------------------------------------------------------- Conjuction (Coordination) ------------------------------------------------------------------------- */ %trans(conj(np:nb\np:nb,np:nb,X1,Y1), N1, conj(np\np,np,nil,Att,X2,Y2), N3, Tags1-Tags3):- % X1 =.. [t,conj|Cs], !, % X3 =.. [t,conj:np|Cs], % trans(X3,N1,X2,N2,Tags1-Tags2), % trans(Y1,N2,Y2,N3,Tags2-Tags3), % topatt(Y2,Att). trans(conj(np\np,np,X1,Y1), N1, conj(np\np,np,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- X1 =.. [t,comma |Cs], !, % replace apposition comma X3 =.. [t,conj:app|Cs], % by category conj:app trans(X3,N1,X2,N2,Tags1-Tags2), trans(Y1,N2,Y2,N3,Tags2-Tags3), strings(X2,Y2,Str), topatt(Y2,Att). trans(conj(Cat\Cat,Cat,X1,Y1), N1, conj(NewCat\NewCat,NewCat,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, adjustFeatures(Cat,NewCat), trans(X1,N1,X2,N2,Tags1-Tags2), X2 =.. [_,conj:NewCat|_], trans(Y1,N2,Y2,N3,Tags2-Tags3), strings(X2,Y2,Str), topatt(Y2,Att). trans(conj(_C1,_C2,Y1,Z1), N1, conj(C\C,C,nil,Att,Str,Y2,Z2), N3, Tags1-Tags3):- !, trans(Y1,N1,Y2,N2,Tags1-Tags2), trans(Z1,N2,Z2,N3,Tags2-Tags3), strings(Y2,Z2,Str), topcat(Z2,C), topatt(Z2,Att). trans(coord(_C,X1,Y1,Z1), N1, coord(C,nil,Att,Str,X2,Y2,Z2), N4, Tags1-Tags4):- !, trans(X1,N1,X2,N2,Tags1-Tags2), trans(Y1,N2,Y2,N3,Tags2-Tags3), trans(Z1,N3,Z2,N4,Tags3-Tags4), topstr(X2,S1), topstr(Y2,S2), append(S1,S2,S3), topstr(Z2,S4), append(S3,S4,Str), topcat(Z2,C), topatt(Z2,Att). /* ------------------------------------------------------------------------- Unary Rules: Type Changing ------------------------------------------------------------------------- */ trans(lx(C,D,X),N1,T,N2,Tags):- !, trans(tc(C,D,X),N1,T,N2,Tags). trans(tc(C1,_,X1), N1, tc(C3,C2,nil,Att,Str,X2), N2, Tags):- !, adjustFeatures(C1,C3), trans(X1,N1,X2,N2,Tags), topcat(X2,C2), topstr(X2,Str), topatt(X2,Att). /* ------------------------------------------------------------------------- Unary Rules: Type Raising ------------------------------------------------------------------------- */ trans(tr(C1/(C1\C2),X1), N1, ftr(C1/(C1\C2),C2,nil,Att,Str,X2), N2, Tags):- !, trans(X1,N1,X2,N2,Tags), topcat(X2,C2), topstr(X2,Str), topatt(X2,Att). trans(tr(C1\(C1/C2),X1), N1, btr(C1\(C1/C2),C2,nil,Att,Str,X2), N2, Tags):- !, trans(X1,N1,X2,N2,Tags), topcat(X2,C2), topstr(X2,Str), topatt(X2,Att). /* ------------------------------------------------------------------------- Substitution ------------------------------------------------------------------------- */ trans(fs(C,X1,Y1), N1, fs(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, trans(X1,N1,X2,N2,Tags1-Tags2), trans(Y1,N2,Y2,N3,Tags2-Tags3), strings(X2,Y2,Str), headAtt(X2,Y2,Att). trans(bs(C,X1,Y1), N1, bs(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, trans(X1,N1,X2,N2,Tags1-Tags2), trans(Y1,N2,Y2,N3,Tags2-Tags3), strings(X2,Y2,Str), headAtt(Y2,X2,Att). trans(fxs(C,X1,Y1), N1, fxs(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, trans(X1,N1,X2,N2,Tags1-Tags2), trans(Y1,N2,Y2,N3,Tags2-Tags3), strings(X2,Y2,Str), headAtt(X2,Y2,Att). trans(bxs(C,X1,Y1), N1, bxs(C,nil,Att,Str,X2,Y2), N3, Tags1-Tags3):- !, trans(X1,N1,X2,N2,Tags1-Tags2), trans(Y1,N2,Y2,N3,Tags2-Tags3), strings(X2,Y2,Str), headAtt(Y2,X2,Att). /* ------------------------------------------------------------------------- Token (repair rules -- systematically wrong output of C&C parser) ------------------------------------------------------------------------- */ trans(t(A,B,people,C,D,E),N,Tok,M,Tags):- option('--x',true), !, % SemEval-2014 trans(t(A,B,person,C,D,E),N,Tok,M,Tags). trans(t(A,B,C,'NNP',D,E),N,Tok,M,Tags):- option('--x',true), !, % SemEval-2014 trans(t(A,B,C,'NN',D,E),N,Tok,M,Tags). trans(t(Cat,'\'t',_,'VB',S,Ne),N,Tok,M,Tags):- !, trans(t(Cat,'\'t','RB',[lemma:not,sense:S,namex:Ne]),N,Tok,M,Tags). trans(t(Cat,'\'t',_,'VB',Att),N,Tok,M,Tags):- !, trans(t(Cat,'\'t','RB',Att),N,Tok,M,Tags). trans(t(A,B,please,'VB',D,E),N,Tok,M,Tags):- !, trans(t(A,B,please,'RB',D,E),N,Tok,M,Tags). /* ------------------------------------------------------------------------- Token (wrapper rules -- dealing with old output of C&C parser) ------------------------------------------------------------------------- */ trans(t(Cat,Token,Pos),N,Tok,M,Tags):- !, trans(t(Cat,Token,Pos,[]),N,Tok,M,Tags). trans(t(Cat,Token,Lem,Pos,S1,Ne),N,Tok,M,Tags):- !, context(S1,S2), trans(t(Cat,Token,Pos,[lemma:Lem,namex:Ne|S2]),N,Tok,M,Tags). /* ------------------------------------------------------------------------- Token ------------------------------------------------------------------------- */ trans(t(Cat1,Tok,Pos,Tags),N,t(Cat2,Tok,nil,RevTags,N),M,T1-T2):- adjustFeatures(Cat1,Cat2), tags(T2,N,[tok:Tok,pos:Pos|Tags],T1), morpha([pos:Pos|Tags],RevTags), M is N + 1. % old input version (t/6 terms) %trans(t(Cat1,Tok,Lem,Pos,S1,Ne),N,t(Cat2,Tok,nil,[pos:Pos,lemma:Lem,namex:Ne|S2],N),M,T1-T2):- % context(S1,S2), % adjustFeatures(Cat1,Cat2), % tags(T2,N,[tok:Tok,pos:Pos,lemma:Lem,namex:Ne|S2],T1), % M is N + 1. /* ========================================================================= String Formation ========================================================================= */ strings(D1,D2,W):- topstr(D1,W1), topstr(D2,W2), append(W1,W2,W). /* ========================================================================= External Context Information (for now only Word Sense Disambiguation) ========================================================================= */ context(Number,Sense):- number(Number), !, Sense = [sense:Number]. context(_,Sense):- Sense = []. /* ========================================================================= Determine Feature on N ========================================================================= */ featureN('NNPS', nam):- !. featureN('NNP', nam):- !. featureN('CD', num):- !. featureN(_, nom). /* ========================================================================= Adjust features (mostly bugs in C&C parser) ========================================================================= */ adjustFeatures(conj/conj, conj:X/conj:X):- !. adjustFeatures(conj, conj:_):- !. adjustFeatures(comma, conj:_):- !. adjustFeatures(semi, conj:_):- !. adjustFeatures(s, s:_):- !. %%% bug in C&C parser adjustFeatures(s\np, s:dcl\np):- !. %%% bug in C&C parser adjustFeatures(s/s:X, s:X/s:X). %%% bug in C&C parser adjustFeatures(s/s:X, s:_/s:X):- !. %%% bug in C&C parser adjustFeatures( (((s:Y\np)\(s:Y\np))\((s\np)\(s\np)))/((s\np)\(s\np)),Cat):- !, Cat = (((s:Y\np)\(s:Y\np))\((s:X\np)\(s:X\np)))/((s:Z\np)\(s:Z\np)). adjustFeatures(np:_, np):- !. adjustFeatures(n:_, n):- !. /* ========================================================================= Adjust features ========================================================================= */ adjustFeatures(F1/A1,F2/A2):- !, adjustFeatures(F1,F2), adjustFeatures(A1,A2). adjustFeatures(F1\A1,F2\A2):- !, adjustFeatures(F1,F2), adjustFeatures(A1,A2). adjustFeatures(Cat,Cat). /* ========================================================================= Adding Info ========================================================================= */ tags(T,ID,Tags,[ID:Tags|T]). /* ========================================================================= Get top categorie, semantics or attributes from a derivation ========================================================================= */ topcat(Der,Cat):- top(Der,Cat,_,_,_), !. topsem(Der,Sem):- top(Der,_,Sem,_,_), !. topatt(Der,Att):- top(Der,_,_,Att,_), !. topstr(Der,Str):- top(Der,_,_,_,Str), !. /* ========================================================================= Top categorie, semantics or attributes from a derivation ========================================================================= */ top(fa(C,S,A,W,_,_),C,S,A,W). top(ba(C,S,A,W,_,_),C,S,A,W). top(fc(C,S,A,W,_,_),C,S,A,W). top(bc(C,S,A,W,_,_),C,S,A,W). top(fxc(C,S,A,W,_,_),C,S,A,W). top(bxc(C,S,A,W,_,_),C,S,A,W). top(fs(C,S,A,W,_,_),C,S,A,W). top(bs(C,S,A,W,_,_),C,S,A,W). top(fxs(C,S,A,W,_,_),C,S,A,W). top(bxs(C,S,A,W,_,_),C,S,A,W). top(gfc(C,_,S,A,W,_,_),C,S,A,W). top(gbc(C,_,S,A,W,_,_),C,S,A,W). top(gfxc(C,_,S,A,W,_,_),C,S,A,W). top(gbxc(C,_,S,A,W,_,_),C,S,A,W). top(gfc(C,S,A,W,_,_),C,S,A,W). top(gbc(C,S,A,W,_,_),C,S,A,W). top(gfxc(C,S,A,W,_,_),C,S,A,W). top(gbxc(C,S,A,W,_,_),C,S,A,W). top(ftr(C,_,S,A,W,_),C,S,A,W). top(btr(C,_,S,A,W,_),C,S,A,W). top(tc(C,_,S,A,W,_),C,S,A,W). top(lx(C,S,A,W,_,_),C,S,A,W). top(t(C,W,S,A,_),C,S,A,[W]). top(conj(C,_,S,A,W,_,_),C,S,A,W). top(coord(C,S,A,W,_,_,_),C,S,A,W). /* ------------------------------------------------------------------------- Take attributes from head ------------------------------------------------------------------------- */ headAtt(D1,D2,Att):- topcat(D1,C1/C2), C1==C2, !, topatt(D2,Att). headAtt(D1,D2,Att):- topcat(D1,C1\C2), C1==C2, !, topatt(D2,Att). headAtt(D0,_ ,Att):- topatt(D0,Att), !. /* ------------------------------------------------------------------------- Set Token-ID ------------------------------------------------------------------------- */ setTokID(_,Start,Start):- option('--tokid',global), !. setTokID(SID,_,Start):- option('--tokid',local), !, Start is (SID*1000)+1.