% ebl_include_lex.pl :- ensure_loaded('$REGULUS/PrologLib/compatibility'). /* - Add declarations saying which lexicon entries are to be imported - Specify list of subdomains - No specification = [default] - Specify category + optional surface or sem - Surface form specified exactly - Sem matches some part of sem - Can also allow negative declarations - Format: dont_include_lex/1 or /2 - Negative take precedence over positive - Extract entries from "reflective DCG" format after doing rest of EBL training - Expand disjunctive surface forms non-deterministically - "Context" is "lexicon" - "Tags" (last arg of raw EBL training result) from ebl_include_lex decl Sample declarations: include_lex(v:[]). include_lex(n:[]). include_lex(v:[words=start]). include_lex(n:[words=(high, blood, pressure)]). include_lex(v:[sem=start_happening]). include_lex(n:[sem=high_blood_pressure]). include_lex(v:[words=start], [default]). include_lex(v:[sem=high_blood_pressure], [chest_pain, abdominal_pain]). dont_include_lex(n:[sem=high_blood_pressure]). dont_include_lex(n:[words=(high, blood, pressure)]). Reflective DCG rule with disjunctive lexical elt: dcg_clause(d(phrase(d,A),[agr=bv(0,B,B,C,C,1,1),article=bv(0,1,1),can_be_np=bv(0,0,1),def=bv(0,0,1),det_type=bv(0,0,0,1,1,1,1,1),prenumber=bv(0,0,1),syn_type=bv(0,0,0,0,0,0,1,1,1,1),wh=bv(0,0,1,1)],a,_,D,E), ('C'(D,a,E),A=lex(a);'C'(D,an,E),A=lex(an))). Raw EBL training result: rule((p(phrase(p,lex(on)),[def=_,obj_sem_n_type=bv(0,0,0,0,0,0,0,1,1,1,1,1,1),postposition=bv(0,0,1),sem_p_type=bv(0,1,1,1,1),sem_pp_type=bv(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)],[[prep,on]],_,A,B):-'C'(A,on,B)), (p(_,[def=_,obj_sem_n_type=bv(0,0,0,0,0,0,0,1,1,1,1,1,1),postposition=bv(0,0,1),sem_p_type=bv(0,1,1,1,1),sem_pp_type=bv(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)],_,_,_,_):-'C'(_,_,_)), [switch,on,the,light], [on], [default]). */ %--------------------------------------------------------------- :- module(ebl_include_lex, [create_ebl_included_lex_entries/4] ). %--------------------------------------------------------------- :- use_module('$REGULUS/Prolog/regulus_utilities'). :- use_module('$REGULUS/PrologLib/utilities'). :- use_module(library(lists)). :- use_module(library(ordsets)). :- use_module(library(terms)). %--------------------------------------------------------------- create_ebl_included_lex_entries(EBLIncludeLexFiles, StoredLexData, IgnoredSubdomains, LexEntries) :- internalise_include_files(EBLIncludeLexFiles, IgnoredSubdomains), create_lex_includes_from_conditional_lex_includes(StoredLexData, IgnoredSubdomains), create_ebl_included_lex_entries1(LexEntries), !. %--------------------------------------------------------------- :- dynamic ebl_include_lex_decl/2, ebl_dont_include_lex_decl/2, ebl_conditional_include_lex_decl/4. internalise_include_files(Files, IgnoredSubdomains) :- retractall(ebl_include_lex_decl(_, _)), retractall(ebl_dont_include_lex_decl(_, _)), retractall(ebl_conditional_include_lex_decl(_, _, _, _)), internalise_include_files1(Files, IgnoredSubdomains), !. internalise_include_files(Files, IgnoredSubdomains) :- format2error('~N*** Error: bad call: ~w~n', [internalise_include_files(Files, IgnoredSubdomains)]), fail. internalise_include_files1([], _IgnoredSubdomains) :- !. internalise_include_files1(File, IgnoredSubdomains) :- \+ is_list(File), !, internalise_include_files1([File], IgnoredSubdomains). internalise_include_files1([F | R], IgnoredSubdomains) :- internalise_include_file(F, IgnoredSubdomains), !, internalise_include_files1(R, IgnoredSubdomains). internalise_include_file(File, IgnoredSubdomains) :- prolog_file_to_list(File, List), internalise_include_list(List, IgnoredSubdomains), !. internalise_include_list([], _IgnoredSubdomains). internalise_include_list([F | R], IgnoredSubdomains) :- internalise_include_item(F, IgnoredSubdomains), !, internalise_include_list(R, IgnoredSubdomains). internalise_include_item((Head :- Body), _IgnoredSubdomains) :- parse_conditional_lex_include(Head, Body, Cat, Sem, Tags), assertz(ebl_conditional_include_lex_decl(Head, Cat, Sem, Tags)), !. internalise_include_item(include_lex(Spec), IgnoredSubdomains) :- %assertz(ebl_include_lex_decl(Spec, [default])), internalise_include_item(include_lex(Spec, [default]), IgnoredSubdomains), !. internalise_include_item(include_lex(Spec, Tags), IgnoredSubdomains) :- remove_ignored_subdomains_from_tags(Tags, IgnoredSubdomains, Tags1), ( Tags1 = [] -> true ; ebl_include_lex_decl(Spec, Tags1) -> true; assertz(ebl_include_lex_decl(Spec, Tags1)) ), !. internalise_include_item(dont_include_lex(Spec), IgnoredSubdomains) :- %assertz(ebl_dont_include_lex_decl(Spec, [default])), internalise_include_item(dont_include_lex(Spec, [default]), IgnoredSubdomains), !. internalise_include_item(dont_include_lex(Spec, Tags), IgnoredSubdomains) :- remove_ignored_subdomains_from_tags(Tags, IgnoredSubdomains, Tags1), ( Tags1 = [] -> true ; ebl_dont_include_lex_decl(Spec, Tags1) -> true ; assertz(ebl_dont_include_lex_decl(Spec, Tags1)) ), !. internalise_include_item(Item, IgnoredSubdomains) :- format2error('~N*** Error: bad call: ~w~n', [internalise_include_item(Item, IgnoredSubdomains)]), fail. %--------------------------------------------------------------- /* include_lex(v:[sem=[Type, Value]], Tags) :- rule_exists(v:[sem=[[Type, Value]]], Tags). include_lex(v:[sem=[Type, Value]], Tags) :- rule_exists(v:[sem=[[tense, Tense], [Type, Value]]], Tags). */ parse_conditional_lex_include(Head, Body, Cat, BodySem, BodyTags) :- Head = include_lex(Cat:[sem=_HeadSem], _HeadTags), Body = rule_exists(Cat:[sem=BodySem], BodyTags), all_vars_in_head_are_in_body(Head, Body), !. all_vars_in_head_are_in_body(Head, Body) :- term_variables(Head, HeadVars), term_variables(Body, BodyVars), all_vars_in_first_arg_are_in_second_arg(HeadVars, BodyVars). all_vars_in_first_arg_are_in_second_arg(HeadVars, BodyVars) :- \+ var_is_in_first_arg_but_not_in_second(_Var, HeadVars, BodyVars). var_is_in_first_arg_but_not_in_second(Var, HeadVars, BodyVars) :- member(Var, HeadVars), \+ id_member(Var, BodyVars). %--------------------------------------------------------------- create_lex_includes_from_conditional_lex_includes(StoredLexData, IgnoredSubdomains) :- get_all_stored_conditional_lex_includes(ConditionalLexIncludes), create_lex_includes_from_conditionals(ConditionalLexIncludes, StoredLexData, IgnoredSubdomains), !. create_lex_includes_from_conditional_lex_includes(StoredLexData, IgnoredSubdomains) :- format2error('~N*** Error: bad call: ~w~n', [create_lex_includes_from_conditional_lex_includes(StoredLexData, IgnoredSubdomains)]), fail. get_all_stored_conditional_lex_includes(ConditionalLexIncludes) :- findall([Rule, Cat, Sem, Tags], ebl_conditional_include_lex_decl(Rule, Cat, Sem, Tags), ConditionalLexIncludes). create_lex_includes_from_conditionals([], _StoredLexData, _IgnoredSubdomains). create_lex_includes_from_conditionals([F | R], StoredLexData, IgnoredSubdomains) :- create_lex_includes_from_conditional(F, StoredLexData, IgnoredSubdomains), !, create_lex_includes_from_conditionals(R, StoredLexData, IgnoredSubdomains). create_lex_includes_from_conditional(_Record, [], _IgnoredSubdomains). create_lex_includes_from_conditional(Record, [F | R], IgnoredSubdomains) :- create_lex_include_from_conditional_and_lex_item(Record, F, IgnoredSubdomains), !, create_lex_includes_from_conditional(Record, R, IgnoredSubdomains). create_lex_include_from_conditional_and_lex_item(CondRecord, LexRecord, IgnoredSubdomains) :- copy_term(CondRecord, CondRecord1), CondRecord1 = [Rule, Cat, Sem, Tags], LexRecord = stored_lex_data(Cat, Sem, Tags), internalise_include_item(Rule, IgnoredSubdomains), %format('~N~nSuccessful call: ~w~n', % create_lex_include_from_conditional_and_lex_item(CondRecord, LexRecord, IgnoredSubdomains)), !. create_lex_include_from_conditional_and_lex_item(_CondRecord, _LexRecord, _IgnoredSubdomains). %--------------------------------------------------------------- remove_ignored_subdomains_from_tags(Tags, _IgnoredSubdomains, Tags1) :- \+ is_list(Tags), Tags = Tags1, !. remove_ignored_subdomains_from_tags(Tags, IgnoredSubdomains, Tags1) :- is_list(Tags), list_to_ord_set(Tags, TagsOS), ord_subtract(TagsOS, IgnoredSubdomains, Tags1), !. remove_ignored_subdomains_from_tags(Tags, IgnoredSubdomains, Tags1) :- format2error('~N*** Error: bad call: ~w~n', [remove_ignored_subdomains_from_tags(Tags, IgnoredSubdomains, Tags1)]), fail. %--------------------------------------------------------------- create_ebl_included_lex_entries1(LexEntries) :- findall(LexEntry, ebl_included_lex_entry(LexEntry), LexEntries), !. ebl_included_lex_entry(LexEntry) :- expanded_reflective_dcg_lex_entry(Head, Body), words_from_body(Body, Words), sem_and_cat_from_head(Head, Sem, Cat), entry_matches_decl(Cat, Words, Sem, Tags), filter_tags_using_negative_entries(Tags, Cat, Words, Sem, Tags1), %format('~N~n~w~n', [filter_tags_using_negative_entries(Tags, Cat, Words, Sem, Tags1)]), Tags1 \== [], format_lex_entry_as_raw_ebl_rule(Head, Body, Tags, LexEntry). %--------------------------------------------------------------- filter_tags_using_negative_entries(TagsIn, Cat, Words, Sem, TagsOut) :- findall(BadTags, entry_matches_negative_decl(Cat, Words, Sem, BadTags), AllBadTags), append_list(AllBadTags, AllBadTagsList), findall(Tag, ( member(Tag, TagsIn), \+ member(Tag, AllBadTagsList) ), TagsOut), !. filter_tags_using_negative_entries(TagsIn, _Cat, _Words, _Sem, TagsIn) :- !. filter_tags_using_negative_entries(Tags, Cat, Words, Sem, Tags1) :- format2error('~N*** Error: bad call: ~w~n', [filter_tags_using_negative_entries(Tags, Cat, Words, Sem, Tags1)]), fail. %--------------------------------------------------------------- /* Reflective DCG rule with disjunctive lexical elt: dcg_clause(d(phrase(d,A),[agr=bv(0,B,B,C,C,1,1),article=bv(0,1,1),can_be_np=bv(0,0,1),def=bv(0,0,1),det_type=bv(0,0,0,1,1,1,1,1),prenumber=bv(0,0,1),syn_type=bv(0,0,0,0,0,0,1,1,1,1),wh=bv(0,0,1,1)],a,_,D,E), ('C'(D,a,E),A=lex(a);'C'(D,an,E),A=lex(an))). */ expanded_reflective_dcg_lex_entry(Head, Body) :- user:dcg_clause(Head0, Body0), ( normalise_prolog_dcg_clause_to_c_version((Head0 :- Body0), (Head1 :- Body1)) -> true ; (Head0 :- Body0) = (Head1 :- Body1) ), Head = Head1, expand_lexical_body(Body1, Body2), remove_trivial_conjuncts(Body2, Body). expand_lexical_body((P ; Q), Expanded) :- !, ( expand_lexical_body(P, Expanded) ; expand_lexical_body(Q, Expanded) ). expand_lexical_body((P, Q), (P1, Q1)) :- !, expand_lexical_body(P, P1), expand_lexical_body(Q, Q1). expand_lexical_body(X = Y, true) :- X = Y, !. expand_lexical_body('C'(In, Word, Out), 'C'(In, Word, Out)) :- !. remove_trivial_conjuncts((P, true), P1) :- !, remove_trivial_conjuncts(P, P1). remove_trivial_conjuncts((true, P), P1) :- !, remove_trivial_conjuncts(P, P1). remove_trivial_conjuncts((P, Q), Result) :- !, remove_trivial_conjuncts(P, P1), remove_trivial_conjuncts(Q, Q1), ( ( P == P1, Q == Q1 ) -> Result = (P1, Q1) ; otherwise -> remove_trivial_conjuncts((P1, Q1), Result) ). remove_trivial_conjuncts(P, P). %--------------------------------------------------------------- /* Reflective DCG rule with disjunctive lexical elt: dcg_clause(d(phrase(d,A),[agr=bv(0,B,B,C,C,1,1),article=bv(0,1,1),can_be_np=bv(0,0,1),def=bv(0,0,1),det_type=bv(0,0,0,1,1,1,1,1),prenumber=bv(0,0,1),syn_type=bv(0,0,0,0,0,0,1,1,1,1),wh=bv(0,0,1,1)],a,_,D,E), ('C'(D,a,E),A=lex(a);'C'(D,an,E),A=lex(an))). */ words_from_body((B1, B2), (W1, W2)) :- !, words_from_body(B1, W1), words_from_body(B2, W2). words_from_body('C'(_From, Word, _To), Word) :- !. sem_and_cat_from_head(Head, Sem, Cat) :- functor(Head, Cat, 6), arg(3, Head, Sem), !. sem_and_cat_from_head(Head, Sem, Cat) :- format2error('~N*** Error: bad call: ~w~n', [sem_and_cat_from_head(Head, Sem, Cat)]), fail. %--------------------------------------------------------------- entry_matches_decl(Cat, Words, Sem, Tags) :- ebl_include_lex_decl(Decl, Tags), \+ cat_fails_to_match_decl(Cat, Decl), \+ words_fail_to_match_decl(Words, Decl), \+ sem_fails_to_match_decl(Sem, Decl). %format('~N~nSuccessful call: (decl = ~q): ~q~n', % [ebl_include_lex_decl(Decl, Tags), entry_matches_decl(Cat, Words, Sem, Tags)]). entry_matches_negative_decl(Cat, Words, Sem, Tags) :- ebl_dont_include_lex_decl(Cat:Body, Tags), ( Body = [words=Words1] -> Words = Words1 ; Body = [sem=Sem1] -> term_contains_subterm(Sem, Sem1) ), !. cat_fails_to_match_decl(Cat, Decl) :- Decl = Cat1:_Rest, Cat \== Cat1, !. words_fail_to_match_decl(Words, Decl) :- Decl = _Cat:[words=Words1], Words \== Words1, !. sem_fails_to_match_decl(Sem, Decl) :- Decl = _Cat:[sem=Sem1], \+ term_contains_subterm(Sem, Sem1), !. %--------------------------------------------------------------- /* Raw EBL training result: rule((p(phrase(p,lex(on)),[def=_,obj_sem_n_type=bv(0,0,0,0,0,0,0,1,1,1,1,1,1),postposition=bv(0,0,1),sem_p_type=bv(0,1,1,1,1),sem_pp_type=bv(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)],[[prep,on]],_,A,B):-'C'(A,on,B)), (p(_,[def=_,obj_sem_n_type=bv(0,0,0,0,0,0,0,1,1,1,1,1,1),postposition=bv(0,0,1),sem_p_type=bv(0,1,1,1,1),sem_pp_type=bv(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)],_,_,_,_):-'C'(_,_,_)), [switch,on,the,light], [on], [default]). */ format_lex_entry_as_raw_ebl_rule(Head0, Body0, Tags, LexEntry) :- change_format_of_lexical_dcg_rule_if_sicstus4((Head0 :- Body0), (Head :- Body)), LexEntry = rule((Head :- Body), (Head :- Body), [lexicon], [lexicon], Tags), !. format_lex_entry_as_raw_ebl_rule(Head, Body, Tags, LexEntry) :- format2error('~N*** Error: bad call: ~w~n', [format_lex_entry_as_raw_ebl_rule(Head, Body, Tags, LexEntry)]), fail. /* This ugly hack is caused by the fact that Sicstus 3 and Sicstus 4 represent DCGs differently. If we're in Sicstus 4, we need to force the lexical rule into the Sicstus 4 DCG format. */ change_format_of_lexical_dcg_rule_if_sicstus4(Rule, Rule) :- \+ user:sicstus_version(4), !. change_format_of_lexical_dcg_rule_if_sicstus4((Head0 :- Body0), (Head :- true)) :- copy_term((Head0 :- Body0), (Head :- Body)), evaluate_dcg_body(Body), !. change_format_of_lexical_dcg_rule_if_sicstus4(X, Y) :- format2error('~N*** Error: bad call: ~w~n', [change_format_of_lexical_dcg_rule_if_sicstus4(X, Y)]), fail. evaluate_dcg_body('C'(In, Word, Out)) :- In = [Word | Out], !. evaluate_dcg_body((P, Q)) :- evaluate_dcg_body(P), evaluate_dcg_body(Q), !. evaluate_dcg_body(true) :- !. evaluate_dcg_body(Other) :- format2error('~N*** Error: bad call: ~w~n', [evaluate_dcg_body(Other)]), fail.