/* I tried to separate out 3 functions from the G2R compiler that contain source code that RIACS owns, and can be contributed to Regulus. I may not have gotten all the predicates necessary, or there may be some that are necessary in Gemini, but not in Regulus. Attached are mjohnson_lrec.pl, kiefer.pl, and compaction.pl. */ mark_johnson_transform_proper :- clear_dynamic_predicates([new_cfg_rule/2]), compute_left_rec_table, % compute this again find_retained_nonterminals, mark_johnson_rule1, cfg_rule(NT, Body), apply_transform_list(Body, NT), fail. mark_johnson_transform:- mark_johnson_transform_proper, fail. mark_johnson_transform:- mj_remove_empties, mj_remove_useless_nonterminals, num_clauses(new_cfg_rule(_,_), N), format('Found ~d rules after applying Left-recursion elimination~n', [N]), copy_to_cfg_rule. copy_to_cfg_rule :- clear_dynamic_predicates([cfg_rule/2]), atomic_cat(_Cat,AtomicCat), mj_helper(AtomicCat,RHSList), \+ RHSList = [], sort(RHSList, FinalRHS), assert(cfg_rule(AtomicCat,FinalRHS)), fail. copy_to_cfg_rule. new_mj_nt(lt(A), lt(X), AX):- atomic(A), atomic(X), !, concat_atom([A,'-',X], AX), assert_once(atomic_cat(introduced_by_mj, AX)). new_mj_nt(A, lt(X), AX):- atomic(A), atomic(X), !, concat_atom([A,'-',X], AX), assert_once(atomic_cat(introduced_by_mj, AX)). new_mj_nt(lt(A), X, AX):- atomic(A), atomic(X), !, concat_atom([A,'-',X], AX), assert_once(atomic_cat(introduced_by_mj, AX)). new_mj_nt(A, X, AX):- atomic(A), atomic(X), !, concat_atom([A,'-',X], AX), assert_once(atomic_cat(introduced_by_mj, AX)). mj_helper(AtomicCat,[FirstRHS|RestRHSs]) :- retract(new_cfg_rule(AtomicCat,FirstRHS)), !, mj_helper(AtomicCat,RestRHSs). mj_helper(_AtomicCat,[]). mark_johnson_rule1 :- left_corner(A, X), left_recursive(A), retained_nonterminal(A), (X = lt(_) -> true ; \+left_recursive(X)), new_mj_nt(A, X, AX), assert(new_cfg_rule(A, [X, AX])), fail. mark_johnson_rule1. apply_transform_list([], _NT). apply_transform_list([Body|RestList], NT):- apply_transform(Body, NT), apply_transform_list(RestList, NT). /* % rules for the original Mark Johnson Transform apply_transform([X|Beta], B):- left_corner(A,B), new_mj_nt(A, X, AX), new_mj_nt(A, B, AB), append(Beta, [AB], NewBody), assert(new_cfg_rule(AX,NewBody)), fail. apply_transform([X|Beta], A):- left_corner(A, X), new_mj_nt(A, X, AX), assert(new_cfg_rule(AX,Beta)), fail. apply_transform(_, _). */ % rule 2 apply_transform([X|Beta], B):- left_recursive(B), left_corner(A, B), left_recursive(A), retained_nonterminal(A), new_mj_nt(A, X, AX), new_mj_nt(A, B, AB), append(Beta, [AB], NewBody), assert(new_cfg_rule(AX,NewBody)), fail. % rule 3 apply_transform([X|Beta], A):- left_recursive(A), retained_nonterminal(A), left_corner(A, X), new_mj_nt(A, X, AX), assert(new_cfg_rule(AX,Beta)), fail. % rule 4 apply_transform(Beta, A):- \+ left_recursive(A), assert(new_cfg_rule(A,Beta)), fail. apply_transform(_, _). mj_remove_empties:- clear_dynamic_predicates([mj_empty/1]), format('Entering empty elimination~n', []), ctr_set(5,0), retract(new_cfg_rule(AtomicCat, [])), assert(mj_empty(AtomicCat)), format(' found new empty cat ~a~n', [AtomicCat]), fail. mj_remove_empties:- mj_remove_empties_iterate. mj_remove_empties_iterate:- new_cfg_rule(AtomicCat, Body), mj_remove_empty_nonterminals(Body, RestBody), \+ Body = RestBody, (RestBody = [] -> assert_unique(mj_empty(AtomicCat)), format(' found new empty cat ~a~n', [AtomicCat]), ctr_set(5, 1) ; assert_unique(new_cfg_rule(AtomicCat, RestBody))), fail. mj_remove_empties_iterate:- ctr_is(5,1), !, ctr_set(5,0), num_clauses(new_cfg_rule(_,_), Rules), format('Found ~d derived rules during empty elimination~n', [Rules]), mj_remove_empties_iterate. mj_remove_empties_iterate:- mj_eliminate_useless_rules, num_clauses(new_cfg_rule(_,_), Rules), format('Found ~d rules after empty elimination~n', [Rules]). mj_eliminate_useless_rules:- new_cfg_rule(AtomicCat, Body), mj_useless_body(Body), retract(new_cfg_rule(AtomicCat, Body)), fail. mj_eliminate_useless_rules. mj_useless_body([lt(_Head)|Tail]):- !, mj_useless_body(Tail). mj_useless_body([Head|_Tail]):- \+ new_cfg_rule(Head, _), !. mj_useless_body([_Head|Tail]):- mj_useless_body(Tail). mj_remove_empty_nonterminals([], []). mj_remove_empty_nonterminals([Empty|Rest], Result):- mj_empty(Empty), !, mj_remove_empty_nonterminals(Rest, Result). mj_remove_empty_nonterminals([Head|Rest], [Head|Result]):- mj_remove_empty_nonterminals(Rest, Result). mj_remove_useless_nonterminals:- retract(new_cfg_rule(X, [X])), fail. mj_remove_useless_nonterminals:- clear_dynamic_predicates([reachable_nt/1]), mj_top_down(sigma), mj_remove_nonreachables, num_clauses(new_cfg_rule(_,_), Rules), format('Found ~d rules after removing useless productions~n', [Rules]). mj_top_down(NT):- reachable_nt(NT), !. mj_top_down(NT):- assert(reachable_nt(NT)), new_cfg_rule(NT, Body), mj_top_down_body(Body), fail. mj_top_down(_). mj_top_down_body([]). mj_top_down_body([Head|Body]):- mj_top_down(Head), mj_top_down_body(Body). mj_remove_nonreachables:- new_cfg_rule(NT, Body), \+ reachable_nt(NT), retract(new_cfg_rule(NT, Body)), fail. mj_remove_nonreachables. kiefer :- clear_dynamic_predicates([kiefer_category/1]), preprocess_kiefer_lexicon, preprocess_kiefer_rules, ctr_set(5, 0), iterate_bottom_up(1), instantiate_kiefer_cfg_rules. preprocess_kiefer_lexicon :- clear_dynamic_predicates([kiefer_category/1,kiefer_lex/2]), switch_value(rename_feats,RenameFeats), switch_value(canonical_only,CanonicalOnly), dynamic_prefix(Prefix), cat(Word,FullCat), filter_features(FullCat, Cat), (CanonicalOnly = yes -> check_canonical(Cat) ; true), process_dynamic_lex_entry(Word,Prefix,Cat,NewWord), ( RenameFeats = true -> instantiate_rename_feat(Cat) ; true), % mark_vals_cat(Cat, NewCat), % mark_singletons(Cat, true), assert_general(kiefer_lex(Cat, [NewWord])), assert_general(kiefer_category(Cat)), fail. preprocess_kiefer_lexicon :- % This filters out single word "multiwords", which are % used only with typed input (typically numerals) multi_word_cat(LastWord,[PrevWord|RestWords],FullCat), filter_features(FullCat, Cat), (switch_value(canonical_only,yes) -> check_canonical(Cat) ; true), (switch_value(rename_feats,true) -> instantiate_rename_feat(Cat) ; true), rev([LastWord,PrevWord|RestWords],MultiWordList), % mark_vals_cat(Cat, NewCat), % mark_singletons(Cat, true), assert_general(kiefer_lex(Cat,MultiWordList)), assert_general(kiefer_category(Cat)), fail. preprocess_kiefer_lexicon:- num_clauses(kiefer_category(_), Cat), num_clauses(kiefer_lex(_,_), U), format('Found ~d unique categories for a lexicon of size ~d~n', [Cat, U]). preprocess_kiefer_rules:- clear_dynamic_predicates([restricted_rule/2]), ((top_down_rule(FullMother,FullDaughters,RuleName), (start_cat(FullMother), if_exists(application_sigma_rule(_,_,_,_,_)) -> application_sigma_rule(_,_,RuleName,yes,_) % only use sigma rules that have recognition flag = yes ; true)) ; rule(FullMother,FullDaughters,RuleName)), (ctr_set(4, 0) ; (ctr_is(4, ThisRuleCount), (ThisRuleCount > 1 -> format('Rule ~a derived ~d rules~n', [RuleName, ThisRuleCount]), fail ; fail))), filter_rules([FullMother|FullDaughters], [Mother|Daughters]), % mark_vals_cat(Mother,MarkedMother), % mark_vals_daus(Daughters,MarkedDaughters), % mark_singletons(Mother, Daughters), ctr_inc(4), assert_unique(restricted_rule(Mother, Daughters)), fail. preprocess_kiefer_rules:- num_clauses(restricted_rule(_,_), NumDerivedRules), num_clauses(top_down_rule(_,_,_,_), T), num_clauses(rule(_,_,_,_), R), TotalRule is T + R, assert(gemini_rule_count(TotalRule)), format('Found ~d derived rules from a grammar of size ~d~n', [NumDerivedRules, TotalRule]). iterate_bottom_up(_) :- clear_dynamic_predicates([new_kiefer_category/1]), ctr_set(5, 0), % setting a arbitary counter to 0 restricted_rule(Mother, Daughters), kiefer_match_daughters(Daughters), assert_general(new_kiefer_category(Mother)), fail. iterate_bottom_up(_):- retract(new_kiefer_category(Cat)), assert_general(kiefer_category(Cat)), ctr_set(5, 1), fail. iterate_bottom_up(N):- NextN is N + 1, num_clauses(kiefer_category(_), C), format('Found ~d categories after ~d iterations~n', [C,N]), (ctr_is(5, 1) -> iterate_bottom_up(NextN) ; true). kiefer_match_daughters([]). kiefer_match_daughters([Dau|RestDaus]):- kiefer_category(Dau), kiefer_match_daughters(RestDaus). kiefer_cfg_term(sigma, sigma):- assert_unique(atomic_cat(sigma, sigma)), !. kiefer_cfg_term(Cat, AtomicCat):- copy_term(Cat, Copy), numbervars(Copy, 0, _), clause(atomic_cat(Copy, AtomicCat), _, Ref), instance(Ref, (atomic_cat(FreshCopy, AtomicCat):-true)), numbervars(FreshCopy, 0, _), FreshCopy = Copy, !. kiefer_cfg_term(Cat, AtomicCat):- functor(Cat, Functor, _), gensym(Functor, AtomicCat), assert(atomic_cat(Cat, AtomicCat)). subsuming_mother(Cat, AtomicCat):- copy_term(Cat, Copy), clause(atomic_cat(Copy, AtomicCat), _, Ref), instance(Ref, (atomic_cat(FreshCopy, AtomicCat):-true)), numbervars(Copy, 0, _), FreshCopy = Copy. /* subsuming_mother(Cat, AtomicCat):- copy_term(Cat, Copy), clause(atomic_cat(Copy, AtomicCat), _, Ref), instance(Ref, (atomic_cat(FreshCopy, AtomicCat):-true)), numbervars(FreshCopy, 0, _), FreshCopy = Copy. subsuming_mother(Cat, AtomicCat):- atomic_cat(Cat, AtomicCat). */ instantiate_kiefer_cfg_rules_helper :- kiefer_category(Cat), % find_singleton_daughter_vars(false,[Cat],SingletonList), % instantiate_singletons(SingletonList), kiefer_cfg_term(Cat, _AtomicCat), fail. instantiate_kiefer_cfg_rules:- clear_dynamic_predicates([atomic_cat/2]), instantiate_kiefer_cfg_rules_helper. instantiate_kiefer_cfg_rules :- clear_dynamic_predicates([atomic_cat_rule/2]), add_kiefer_lexical_rules, restricted_rule(Mother, Daughters), instantiate_daughters(Daughters, AtomicDaughters), % atomic_cat(Mother, AtomicMother), subsuming_mother(Mother, AtomicMother), assert_unique(atomic_cat_rule(AtomicMother, AtomicDaughters)), fail. instantiate_kiefer_cfg_rules :- num_clauses(atomic_cat_rule(_,_), Rules), format('Found ~d total rules~n', [Rules]). instantiate_daughters([],[]). instantiate_daughters([Dau|RestDaus], [AtomicDau|RestAtomicDaus]):- atomic_cat(Dau, AtomicDau), instantiate_daughters(RestDaus, RestAtomicDaus). add_kiefer_lexical_rules:- kiefer_lex(Cat, Words), subsuming_mother(Cat, AtomicCat), add_lt_wrapper(Words, LtWords), assert(atomic_cat_rule(AtomicCat, LtWords)), fail. add_kiefer_lexical_rules. add_lt_wrapper([], []). add_lt_wrapper([Word|RestWords], [lt(Word)|RestResult]):- add_lt_wrapper(RestWords, RestResult). kiefer_remove_empties:- clear_dynamic_predicates([kiefer_empty/1]), format('Entering empty elimination~n', []), ctr_set(5,0), retract(atomic_cat_rule(AtomicCat, [])), assert(kiefer_empty(AtomicCat)), format(' found new empty cat ~a~n', [AtomicCat]), fail. kiefer_remove_empties:- kiefer_remove_empties_iterate. kiefer_remove_empties_iterate:- atomic_cat_rule(AtomicCat, Body), remove_empty_nonterminals(Body, RestBody), \+ Body = RestBody, (RestBody = [] -> assert_unique(kiefer_empty(AtomicCat)), format(' found new empty cat ~a~n', [AtomicCat]), ctr_set(5, 1) ; assert_unique(atomic_cat_rule(AtomicCat, RestBody))), fail. kiefer_remove_empties_iterate:- ctr_is(5,1), !, ctr_set(5,0), num_clauses(atomic_cat_rule(_,_), Rules), format('Found ~d derived rules during empty elimination~n', [Rules]), kiefer_remove_empties_iterate. kiefer_remove_empties_iterate:- kiefer_eliminate_useless_rules, num_clauses(atomic_cat_rule(_,_), Rules), format('Found ~d rules after empty elimination~n', [Rules]). kiefer_eliminate_useless_rules:- atomic_cat_rule(AtomicCat, Body), kiefer_useless_body(Body), retract(atomic_cat_rule(AtomicCat, Body)), fail. kiefer_eliminate_useless_rules. kiefer_useless_body([lt(_Head)|Tail]):- !, kiefer_useless_body(Tail). kiefer_useless_body([Head|_Tail]):- \+ atomic_cat_rule(Head, _), !. kiefer_useless_body([_Head|Tail]):- kiefer_useless_body(Tail). remove_empty_nonterminals([], []). remove_empty_nonterminals([Empty|Rest], Result):- kiefer_empty(Empty), !, remove_empty_nonterminals(Rest, Result). remove_empty_nonterminals([Head|Rest], [Head|Result]):- remove_empty_nonterminals(Rest, Result). kiefer_remove_useless_nonterminals:- retract(atomic_cat_rule(X, [X])), fail. kiefer_remove_useless_nonterminals:- clear_dynamic_predicates([reachable_nt/1]), kiefer_top_down(sigma), kiefer_remove_nonreachables, num_clauses(atomic_cat_rule(_,_), Rules), format('Found ~d rules after removing useless productions~n', [Rules]). kiefer_top_down(NT):- reachable_nt(NT), !. kiefer_top_down(NT):- assert(reachable_nt(NT)), atomic_cat_rule(NT, Body), kiefer_top_down_body(Body), fail. kiefer_top_down(_). kiefer_top_down_body([]). kiefer_top_down_body([Head|Body]):- kiefer_top_down(Head), kiefer_top_down_body(Body). kiefer_remove_nonreachables:- atomic_cat_rule(NT, Body), \+ reachable_nt(NT), retract(atomic_cat_rule(NT, Body)), fail. kiefer_remove_nonreachables. remove_redundant_rules:- num_clauses(cfg_rule(_,_), NumRules), format('removing redundant nonterminals: ~d total rules~n', [NumRules]), clear_dynamic_predicates([rewrite_nt/2]), ctr_set(5, 0), cfg_rule(NonTerminal, RHS), cfg_rule(OtherNonTerminal, RHS), \+ NonTerminal = OtherNonTerminal, \+ rewrite_nt(NonTerminal, OtherNonTerminal), \+ rewrite_nt(_, OtherNonTerminal), \+ rewrite_nt(OtherNonTerminal, _), \+ rewrite_nt(_, NonTerminal), \+ rewrite_nt(NonTerminal, _), retract(cfg_rule(OtherNonTerminal, RHS)), assert(rewrite_nt(OtherNonTerminal, NonTerminal)), ctr_set(5,1), fail. remove_redundant_rules:- cfg_rule(NonTerminal, [[OtherNonTerminal]]), NonTerminal \== sigma, \+ NonTerminal = OtherNonTerminal, \+ rewrite_nt(NonTerminal, OtherNonTerminal), \+ rewrite_nt(OtherNonTerminal, NonTerminal), \+ rewrite_nt(_, NonTerminal), \+ rewrite_nt(NonTerminal, _), \+ rewrite_nt(_, OtherNonTerminal), \+ rewrite_nt(OtherNonTerminal, _), retract(cfg_rule(NonTerminal, [[OtherNonTerminal]])), assert(rewrite_nt(NonTerminal, OtherNonTerminal)), ctr_set(5,1), fail. remove_redundant_rules:- update_rule_set, fail. remove_redundant_rules:- retract(cfg_rule(NT, RHS)), clean_up_rhs(RHS, NT, NewRHS), (RHS \== NewRHS -> ctr_set(5,1) % format(' Removing redundant rule option for ~p~n ~p~n', % [(NT->RHS),(NT->NewRHS)]) ; true), sort(NewRHS, FinalRHS), asserta(cfg_rule(NT, FinalRHS)), fail. remove_redundant_rules:- (ctr_is(5,1) -> remove_redundant_rules ; num_clauses(cfg_rule(_,_), NumRules), format('CFG rules after duplicate NT removal: ~d~n', [NumRules])). update_rule_set:- retract(cfg_rule(NT, RHS)), substitute_in_rhs(RHS, NewRHS), asserta(cfg_rule(NT, NewRHS)), fail. update_rule_set. % eliminate productions that are duplicated in the rule clean_up_rhs([], _NT, []). clean_up_rhs([Head|Tail], NT, Result):- member(Head,Tail), !, clean_up_rhs(Tail, NT, Result). clean_up_rhs([Head|Tail], NT, [Head|Result]):- clean_up_rhs(Tail, NT, Result). substitute_in_rhs([], []):- !. substitute_in_rhs([Head|Tail], [NewHead|NewTail]):- !, substitute_in_rhs(Head, NewHead), substitute_in_rhs(Tail, NewTail). substitute_in_rhs(Symbol, NewSymbol):- atomic(Symbol), !, (rewrite_nt(Symbol, NewSymbol) -> true ; Symbol = NewSymbol). substitute_in_rhs(Symbol, Symbol). destructively_build_rule_list(AtomicCat,[FirstRHS|RestRHSs]) :- retract(atomic_cat_rule(AtomicCat,FirstRHS)), !, destructively_build_rule_list(AtomicCat,RestRHSs). destructively_build_rule_list(_AtomicCat,[]).