% regulus2dcg.pl %--------------------------------------------------------------- :- module(regulus_binarise, [binarise_grammar_alist/4] ). %--------------------------------------------------------------- :- use_module('$REGULUS/Prolog/regulus_utilities'). :- use_module('$REGULUS/Prolog/regulus_declarations'). :- use_module('$REGULUS/PrologLib/utilities'). :- use_module(library(lists)). :- use_module(library(terms)). :- use_module(library(ordsets)). :- dynamic cat_counter/1. binarise_grammar_alist(InGrammarUnits, OutGrammarUnits, NewCats, NewFeats) :- binarise_grammar_init, binarise_grammar_units(InGrammarUnits, OutGrammarUnits-[], NewCats-[], NewFeats0-[]), safe_remove_duplicates(NewFeats0, NewFeats). %--------------------------------------------------------------- binarise_grammar_init :- retractall(cat_counter(_)), assertz(cat_counter(0)). %--------------------------------------------------------------- binarise_grammar_units([], GrammarIn-GrammarIn, CatsIn-CatsIn, FeatsIn-FeatsIn). binarise_grammar_units([ignore(_F) | R], GrammarIn-GrammarOut, CatsIn-CatsOut, FeatsIn-FeatsOut) :- !, binarise_grammar_units(R, GrammarIn-GrammarOut, CatsIn-CatsOut, FeatsIn-FeatsOut). binarise_grammar_units([F | R], GrammarIn-GrammarOut, CatsIn-CatsOut, FeatsIn-FeatsOut) :- binarise_grammar_unit(F, GrammarIn-GrammarNext, CatsIn-CatsNext, FeatsIn-FeatsNext), !, binarise_grammar_units(R, GrammarNext-GrammarOut, CatsNext-CatsOut, FeatsNext-FeatsOut). binarise_grammar_unit(examples(RuleSummary, Examples), RulesIn-RulesOut, CatsIn-CatsIn, FeatsIn-FeatsIn) :- RulesIn = [examples(RuleSummary, Examples) | RulesOut], !. binarise_grammar_unit(frequency_labelled_rule(Freq, Rule), RulesIn-RulesOut, CatsIn-CatsOut, FeatsIn-FeatsOut) :- binarise_grammar_unit1(Rule, RulesIn-RulesOut, CatsIn-CatsOut, FeatsIn-FeatsOut, Freq), !. binarise_grammar_unit(GrammarUnit, Rules, Cats, Feats) :- format2error('~N*** Error: bad call: ~w~n', [binarise_grammar_unit(GrammarUnit, Rules, Cats, Feats)]), fail. binarise_grammar_unit1(Rule, RulesIn-RulesOut, CatsIn-CatsOut, FeatsIn-FeatsOut, Freq) :- is_non_lexical_regulus_rule(Rule), !, binarise_non_lexical_rule(Rule, RulesIn-RulesOut, CatsIn-CatsOut, FeatsIn-FeatsOut, Freq). binarise_grammar_unit1(Other, [frequency_labelled_rule(Freq, Other) | RulesOut]-RulesOut, CatsIn-CatsIn, FeatsIn-FeatsIn, Freq). binarise_non_lexical_rule((Head --> Body), RulesIn-RulesOut, CatsIn-CatsOut, FeatsIn-FeatsOut, Freq) :- length_of_comma_list(Body, 0-Length), Length =< 2, !, RulesIn = [frequency_labelled_rule(Freq, (Head --> Body)) | RulesOut], CatsIn = CatsOut, FeatsIn = FeatsOut. binarise_non_lexical_rule((Head --> Body), RulesIn-RulesOut, CatsIn-CatsOut, FeatsIn-FeatsOut, Freq) :- comma_list_to_list(Body, BodyList), binarise_non_lexical_rule1([Head | BodyList], BinarisedBodies-[], CatsIn-CatsOut, FeatsIn-FeatsOut), binarised_bodies_to_new_rules(BinarisedBodies, RulesIn-RulesOut, Freq). %--------------------------------------------------------------- % binarise_non_lexical_rule1(Body, BinarisedIn-BinarisedOut, CatsIn-CatsOut, FeatsIn-FeatsOut) binarise_non_lexical_rule1(Body, BinarisedIn-BinarisedOut, CatsIn-CatsIn, FeatsIn-FeatsIn) :- length(Body, Length), Length =< 3, BinarisedIn = [Body | BinarisedOut], !. binarise_non_lexical_rule1(Body, BinarisedIn-BinarisedOut, CatsIn-CatsOut, FeatsIn-FeatsOut) :- Body = [Mother, FirstDaughter | RestDaughters], new_mother_cat(Mother, FirstDaughter, RestDaughters, NewMother, FeatsIn-FeatsNext), cat_name_and_features_in_cat(NewMother, CatName, Features), NewCatSpec = category(CatName, Features), CatsIn = [NewCatSpec | CatsNext], BinarisedBody = [Mother, FirstDaughter, NewMother], BinarisedIn = [BinarisedBody | BinarisedNext], binarise_non_lexical_rule1([NewMother | RestDaughters], BinarisedNext-BinarisedOut, CatsNext-CatsOut, FeatsNext-FeatsOut). %--------------------------------------------------------------- new_mother_cat(Mother, FirstDaughter, RestDaughters, NewMother, FeatsIn-FeatsOut) :- term_variables([Mother, FirstDaughter], MotherFirstDaughterVars), term_variables(RestDaughters, RestDaughtersVars), sem_variables_in_cat(Mother, MotherSemVars), list_to_ord_set(MotherFirstDaughterVars, MotherFirstDaughterVarsOS), list_to_ord_set(RestDaughtersVars, RestDaughtersVarsOS), list_to_ord_set(MotherSemVars, MotherSemVarsOS), ord_intersection(MotherFirstDaughterVarsOS, RestDaughtersVarsOS, CommonVarsOS), ord_intersection(CommonVarsOS, MotherSemVarsOS, CommonSemVarsOS), ord_subtract(CommonVarsOS, MotherSemVarsOS, CommonSynVarsOS), assign_feats_to_syn_vars(CommonSynVarsOS, RestDaughters, SynFeatValList, [], FeatsIn-FeatsOut), assign_feats_to_sem_vars(CommonSemVarsOS, SemFeatValList), ( SemFeatValList = [] -> FeatValList = SynFeatValList ; FeatValList = [sem=SemFeatValList | SynFeatValList] ), new_cat_id(NewCat), NewMother = NewCat:FeatValList. %--------------------------------------------------------------- % assign_feats_to_syn_vars(Vars, Cats, FeatVals, PreviousFeats, FeatsIn-FeatsOut) assign_feats_to_syn_vars([], _Cats, [], _PreviousFeats, FeatsIn-FeatsIn). assign_feats_to_syn_vars([Var | RVars], Cats, [(Feat = Var) | RFeatVals], PreviousFeats, FeatsIn-FeatsOut) :- assign_feat_to_syn_var_from_cat_list(Var, Cats, Feat, PreviousFeats, FeatsIn-FeatsNext), assign_feats_to_syn_vars(RVars, Cats, RFeatVals, [Feat | PreviousFeats], FeatsNext-FeatsOut). assign_feat_to_syn_var_from_cat_list(Var, Cats, Feat, PreviousFeats, FeatsIn-FeatsOut) :- assign_feat_to_syn_var_from_cat_list1(Var, Cats, Feat, PreviousFeats, FeatsIn-FeatsOut), !. assign_feat_to_syn_var_from_cat_list(Var, Cats, Feat, Prev, Feats) :- format2error('~N*** Error: bad call: ~q.~n', [assign_feat_to_syn_var_from_cat_list(Var, Cats, Feat, Prev, Feats)]), fail. assign_feat_to_syn_var_from_cat_list1(Var, [Cat | _Cats], Feat, PreviousFeats, FeatsIn-FeatsOut) :- assign_feat_to_syn_var_from_cat(Var, Cat, Feat, PreviousFeats, FeatsIn-FeatsOut), !. assign_feat_to_syn_var_from_cat_list1(Var, [_Cat | Cats], Feat, PreviousFeats, FeatsIn-FeatsOut) :- assign_feat_to_syn_var_from_cat_list1(Var, Cats, Feat, PreviousFeats, FeatsIn-FeatsOut). assign_feat_to_syn_var_from_cat(Var, _Cat:FeatVals, Feat, PreviousFeats, FeatsIn-FeatsOut) :- assign_feat_to_syn_var_from_feat_vals(Var, FeatVals, Feat, PreviousFeats, FeatsIn-FeatsOut). assign_feat_to_syn_var_from_feat_vals(Var, [(Feat1 = Var1) | R], Feat, PreviousFeats, FeatsIn-FeatsOut) :- ( Var == Var1 -> assign_feat_to_syn_var_from_feat_val(Feat1, PreviousFeats, Feat, FeatsIn-FeatsOut) ; assign_feat_to_syn_var_from_feat_vals(Var, R, Feat, PreviousFeats, FeatsIn-FeatsOut) ). assign_feat_to_syn_var_from_feat_val(Feat1, PreviousFeats, Feat, FeatsIn-FeatsOut) :- \+ member(Feat1, PreviousFeats), !, Feat = Feat1, FeatsOut = FeatsIn. assign_feat_to_syn_var_from_feat_val(Feat1, PreviousFeats, Feat, FeatsIn-FeatsOut) :- feat_variant_not_in_list(Feat1, PreviousFeats, 1, Feat), get_feat_val_space_for_feat(Feat1, FeatValSpaceId), NewFeatDecl = feature(Feat, FeatValSpaceId), ( ignored_feat(Feat1) -> NewIgnoreDecl = ignore_feature(Feat), FeatsIn = [NewFeatDecl, NewIgnoreDecl | FeatsOut] ; otherwise -> FeatsIn = [NewFeatDecl | FeatsOut] ). get_feat_val_space_for_feat(Feat, FeatValSpaceId) :- feature(Feat, FeatValSpaceId), !. get_feat_val_space_for_feat(Feat, _FeatValSpaceId) :- format2error('~NError: unable to find feature declaration for feature "~w"~n', [Feat]), fail. feat_variant_not_in_list(Feat1, PreviousFeats, I, Feat) :- join_with_underscore([Feat1, I], FeatI), ( \+ member(FeatI, PreviousFeats) -> Feat = FeatI ; I1 is I + 1, feat_variant_not_in_list(Feat1, PreviousFeats, I1, Feat) ). %--------------------------------------------------------------- assign_feats_to_sem_vars(Vars, FeatVals) :- assign_feats_to_sem_vars1(Vars, FeatVals, 1). assign_feats_to_sem_vars1([], [], _Counter). assign_feats_to_sem_vars1([Var | RestVars], [(SemFeatName = Var) | RestFeatVars], Counter) :- join_with_underscore([sem, Counter], SemFeatName), Counter1 is Counter + 1, !, assign_feats_to_sem_vars1(RestVars, RestFeatVars, Counter1). %--------------------------------------------------------------- % binarised_bodies_to_new_rules(BinarisedBodies, RulesIn-RulesOut, Freq) binarised_bodies_to_new_rules([], RulesIn-RulesIn, _Freq). binarised_bodies_to_new_rules([F | R], [Rule | RulesNext]-RulesOut, Freq) :- binarised_body_to_new_rule(F, Rule, Freq), !, binarised_bodies_to_new_rules(R, RulesNext-RulesOut, Freq). binarised_body_to_new_rule(RuleBody, frequency_labelled_rule(Freq, Rule), Freq) :- RuleBody = [Mother | Daughters], list_to_comma_list(Daughters, Daughters1), copy_term( ( Mother --> Daughters1 ), Rule). %--------------------------------------------------------------- new_cat_id(NewId) :- new_cat_counter(Counter), safe_number_codes(Counter, CounterChars), append_list(["tmp_cat_", CounterChars], NewIdChars), atom_codes(NewId, NewIdChars). %--------------------------------------------------------------- new_cat_counter(Counter) :- cat_counter(Counter), NextCounter is Counter + 1, retractall(cat_counter(_)), assertz(cat_counter(NextCounter)). %--------------------------------------------------------------- cat_name_and_features_in_cat(CatName:FeatVals, CatName, Features) :- features_in_feat_val_list(FeatVals, Features). features_in_feat_val_list([], []). features_in_feat_val_list([(Feat=_Val) | R], [Feat | R1]) :- features_in_feat_val_list(R, R1). %--------------------------------------------------------------- sem_variables_in_cat(Cat, AllSemVars) :- Cat = _CatName:FeatVals, ( member((sem=SemVal), FeatVals) -> term_variables(SemVal, SemVars) ; SemVars = []), ( member((gsem=GSemVal), FeatVals) -> term_variables(GSemVal, GSemVars) ; GSemVars = []), append(SemVars, GSemVars, AllSemVars), !. %--------------------------------------------------------------- ignored_feat(Feat) :- regulus_preds:ignore_feature(Feat), !. ignored_feat(Feat) :- get_ebl_ignore_feats(IgnoredFeats), member(Feat, IgnoredFeats), !. get_ebl_ignore_feats(IgnoredFeats) :- current_predicate(user:regulus_config/2), user:regulus_config(ebl_ignore_feats, IgnoredFeats), !.