2:- use_module(library(sgml)).    3:- use_module(library(lists),[member/2,append/3,reverse/2]).    4
    5test(F):- 
    6   load_xml_file(F,T), 
    7%  pretty_print(T,0),
    8   elements(T,['VNCLASS'],f(X,C)),   
    9   value(X,'ID',ID),
   10   check(ID,C).
   11
   12verbnet2prolog(F):- 
   13   load_xml_file(F,T), 
   14%  pretty_print(T,0),
   15   elements(T,['VNCLASS'],f(X,C)),   
   16   value(X,'ID',ID),
   17   members(C,F,[],ID).
   18
   19
   20/* ----------------------------------------------------------------------
   21   Pretty Printing XML
   22---------------------------------------------------------------------- */ 
   23
   24pretty_print([],_).
   25
   26pretty_print([element(A,B,C)|L],Tab):- !,
   27   tab(Tab), write(A), write(' '), write(B), nl,
   28   NewTab is Tab+3, 
   29   pretty_print(C,NewTab),
   30   pretty_print(L,Tab).
   31
   32pretty_print([E|L],Tab):-  
   33   tab(Tab), write(unknown:E),nl,
   34   pretty_print(L,Tab).
   35
   36
   37/* ----------------------------------------------------------------------
   38   Checking Syntax
   39---------------------------------------------------------------------- */ 
   40
   41check(ID,X):-
   42   elements(X,['FRAMES','FRAME'],f(_,Frame)),   
   43   elements(Frame,['DESCRIPTION'],f(De,_)),   
   44   value(De,primary,Primary),
   45   write(ID), write(': '), write(Primary),nl,
   46   fail.
   47
   48check(_,X):-
   49   elements(X,['SUBCLASSES','VNSUBCLASS'],f(Y,Sub)), 
   50   value(Y,'ID',ID),
   51   check(ID,Sub),
   52   fail.
   53
   54check(_,_).
   55
   56         
   57/* ----------------------------------------------------------------------
   58   Processing all members of a VerbNet class
   59---------------------------------------------------------------------- */ 
   60
   61members(X,File,SuperFrames,XID):- 
   62   findall(Name,(elements(X,['MEMBERS','MEMBER'],f(Member,_)),value(Member,name,Name)),Names),
   63   findall(Frame,(elements(X,['FRAMES','FRAME'],f(_,Frame))),SubFrames),
   64   append(SuperFrames,SubFrames,Frames),
   65   frameMember(Frames,Names,XID,File),
   66   findall(Sub,(elements(X,['SUBCLASSES','VNSUBCLASS'],f(Y,Sub)),
   67                value(Y,'ID',YID),
   68                members(Sub,File,Frames,YID)),_).
   69                 
   70
   71/* ----------------------------------------------------------------------
   72   Process a member/frame pair
   73---------------------------------------------------------------------- */ 
   74
   75frameMember([],_,_,_):- !.
   76
   77frameMember([F|L],Names,ID,File):-
   78   pairMemberFrame(Names,F,ID,File),
   79   frameMember(L,Names,ID,File).
   80
   81pairMemberFrame([],_,_,_).
   82
   83pairMemberFrame([Name|L],Frame,ID,File):-
   84   elements(Frame,['DESCRIPTION'],f(De,_)),   
   85   value(De,primary,Pr),
   86   elements(Frame,['SYNTAX'],f(_,Syntax)),  
   87   format('verbnet(~q, ~q, ',[Name,Pr]),
   88   subcatpat(Syntax,[],SubCatPat), 
   89   format('~q, ',[SubCatPat]), 
   90   subcat(Syntax,[],SubCat),
   91   atom_chars(ID,IDChars),
   92   formatID(IDChars,[_,_|FID]),
   93   format('~q,~q). %%% ~p (~p)~n',[SubCat,FID,ID,File]), !,
   94   pairMemberFrame(L,Frame,ID,File).
   95
   96
   97/* ----------------------------------------------------------------------
   98   Format VerbNet ID
   99---------------------------------------------------------------------- */ 
  100
  101formatID(Chars,[Pre,Sep1|L]):-
  102   Seps = ['-','.'], member(Sep1,Seps),
  103   append(PreChars,[Sep1|RestChars],Chars), 
  104   \+ ( member(Sep2,Seps), member(Sep2,PreChars) ), !,
  105   formatNumber(PreChars,Pre),
  106   formatID(RestChars,L).
  107
  108formatID(Chars,[ID]):-
  109   formatNumber(Chars,ID).
  110
  111formatNumber(Chars,Num):-
  112   Chars = [First|_], 
  113   member(First,['0','1','2','3','4','5','6','7','8','9']), !, 
  114   number_chars(Num,Chars).
  115
  116formatNumber(Chars,Atom):-
  117   atom_chars(Atom,Chars).
  118
  119/* ----------------------------------------------------------------------
  120   Printing the subcat frame
  121---------------------------------------------------------------------- */ 
  122
  123subcat([],Acc1,Acc2):- postproc(Acc1,[],Acc2).
  124subcat([E|L],Acc1,Acc3):- cat(E,Acc1,Acc2), subcat(L,Acc2,Acc3).
  125
  126subcatpat([],Acc1,Acc2):- postproc(Acc1,[],Acc2).
  127subcatpat([E|L],Acc1,Acc3):- catpat(E,Acc1,Acc2), subcatpat(L,Acc2,Acc3).
  128
  129
  130/* ----------------------------------------------------------------------
  131   Post Processing (reverse + rewriting)
  132---------------------------------------------------------------------- */ 
  133
  134postproc([],L,L).
  135postproc([np,pp|L1],Acc,L2):- !, postproc(L1,[pp|Acc],L2).
  136postproc([np:V,pp|L1],Acc,L2):- !, postproc(L1,[pp:V|Acc],L2).
  137postproc([s,pp|L1],Acc,L2):- !, postproc(L1,[s|Acc],L2).
  138postproc([s:V,pp|L1],Acc,L2):- !, postproc(L1,[s:V|Acc],L2).
  139postproc([X|L1],Acc,L2):- postproc(L1,[X|Acc],L2).
  140
  141/* ----------------------------------------------------------------------
  142   Syntactic Restrictions
  143---------------------------------------------------------------------- */ 
  144
  145restr(Restr,Type):- 
  146  Restr = [element('SYNRESTRS',[],L)],
  147  member(element('SYNRESTR',['Value'='+',type=Type],[]),L), !.
  148
  149s_restr(that_comp).
  150s_restr(for_comp).
  151s_restr(wh_comp).
  152
  153% s_restr(poss_ing). % not sentence!
  154s_restr(acc_ing).
  155s_restr(oc_ing).
  156s_restr(ac_ing).
  157s_restr(be_sc_ing).
  158s_restr(np_omit_ing).  % ???
  159s_restr(np_ppart).     % ??? 
  160s_restr(np_p_ing).     % ???
  161s_restr(np_ing).       % ???
  162
  163s_restr(how_extract).
  164s_restr(what_extract).
  165
  166s_restr(wh_inf).
  167s_restr(what_inf).
  168s_restr(wheth_inf).
  169s_restr(oc_bare_inf).
  170s_restr(oc_to_inf).
  171s_restr(ac_to_inf).
  172s_restr(sc_to_inf).
  173s_restr(np_to_inf).
  174s_restr(vc_to_inf).
  175s_restr(rs_to_inf). % very rare -- bug?
  176s_restr(to_inf_rs). % very rare -- bug?
  177
  178
  179/* ----------------------------------------------------------------------
  180   Printing a category
  181---------------------------------------------------------------------- */ 
  182
  183cat(element('NP', [value=Value], R),A,[s:Value|A]):- s_restr(S), restr(R,S), !.
  184cat(element('NP', [value=Value], _),A,[np:Value|A]):- !.
  185cat(element('PREP', [], _),A,[pp|A]):- !.
  186cat(element('PREP', [value=Value], _),A,[prep:Value|A]):- !.
  187cat(element('LEX', [value='[+be]'], _),A,[lex:be|A]):- !. 
  188cat(element('LEX', [value='it[+be]'], _),A,[lex:be,lex:it|A]):- !.
  189cat(element('LEX', [value=at], _),A,[prep:at|A]):- !.
  190cat(element('LEX', [value=of], _),A,[prep:of|A]):- !.
  191cat(element('LEX', [value=Value], _),A,[lex:Value|A]):- !.
  192cat(element('VERB',[],[]),A,[v|A]):- !.
  193cat(element('ADJ',[],[]),A,[adj|A]):- !.
  194cat(element('ADV',[],[]),A,[adv|A]):- !.
  195cat(U,A,[unk:U|A]):- !.
  196
  197catpat(element('NP',_,R),A,[s|A]):- s_restr(S), restr(R,S), !.
  198catpat(element('NP',_,_),A,[np|A]):- !.
  199catpat(element('PREP', [], _),A,[pp|A]):- !.
  200catpat(element('PREP', [value=_], _),A,[prep|A]):- !.
  201catpat(element('LEX',[value=at],_),A,[prep|A]):- !.
  202catpat(element('LEX',[value=of],_),A,[prep|A]):- !.
  203catpat(element('LEX',_,_),A,[lex|A]):- !.
  204catpat(element('VERB',_,_),A,[v|A]):- !.
  205catpat(element('ADJ',_,_),A,[adj|A]):- !.
  206catpat(element('ADV',_,_),A,[adv|A]):- !.
  207catpat(_,A,[unk|A]):- !.
  208
  209
  210/* ----------------------------------------------------------------------
  211   Processing elements of the XML tree
  212---------------------------------------------------------------------- */ 
  213
  214elements([element(X,F,L)|_],[X],f(F,L)).
  215elements([element(X,_,L)|_],[X|R],A):- elements(L,R,A).
  216elements([_|L],X,A):- elements(L,X,A).
  217
  218
  219/* ----------------------------------------------------------------------
  220   Accessing a value
  221---------------------------------------------------------------------- */ 
  222
  223value([Name=Value|_],Name,Value):- !.
  224value([_|L],Name,Value):- value(L,Name,Value).
  225
  226
  227/* ----------------------------------------------------------------------
  228   VerbNet Directory
  229---------------------------------------------------------------------- */ 
  230
  231verbnet_dir('ext/VerbNet/').
  232
  233
  234/* ----------------------------------------------------------------------
  235   Processing all XML files
  236---------------------------------------------------------------------- */ 
  237
  238process([]).
  239process([File|L]):-
  240%   test(File),
  241   verbnet2prolog(File),
  242   process(L).
  243
  244
  245/* ----------------------------------------------------------------------
  246   Start Predicate
  247---------------------------------------------------------------------- */ 
  248
  249run:- 
  250   verbnet_dir(Dir), 
  251   exists_directory(Dir),
  252   WildCard = '*.xml',
  253%  WildCard = 'put-9.1.xml',
  254   atom_concat(Dir,WildCard,Expand),
  255   expand_file_name(Expand,Files),
  256   write(':- dynamic verbnet/5.'), nl,
  257   process(Files), 
  258   halt.
  259
  260:- run.