2% DCTG based GP reproduction operators: crossover & mutation
    3% Brian Ross
    4% January 25, 1999
    5
    6% crossover(Parent1, Parent2, Child1, Child2):
    7%	Parent1, Parent2 - parent trees to reproduce
    8%	Child1, Child2 - resulting children 
    9% Performs grammar tree expression crossover on two parents.
   10% If internal crossover probability set, then nodes of the specified type
   11% are selected; else all nodes initially counted.
   12% The rules for crossover are:
   13%	- only nodes of same rule name from each parent are crossed 
   14%	- crossover is attempted a max N number of times until successful
   15%	  (user-specified parameter)
   16% 	- an attempt fails if the offspring exceed max depth parameter
   17% 	- if no internal/leaf counting, then counts on all node names done.
   18% 	- if internal/leaf counting to be done (case 1), then it is done only
   19%	  for one parent. (If it fails, then 2nd parent tried; if that fails,
   20%	  then all nodes counted from first parent). Other parent just uses 
   21%	  terminal name count (increases odds that a crossover will be 
   22%	  possible).
   23
   24crossover(P1, P2, C1, C2) :-    % case 2
   25	prob_internal_crossover_P(PI),
   26	\+ (P1 == no),
   27	(maybe(PI) -> Type=internal ; Type=leaf),
   28	reprod_P(Tries),
   29	(once(count_nodes(P1, Type, N1)),	
   30		(Parent1, Parent2) = (P1, P2)
   31		;
   32		once(count_nodes(P2, Type, N1)),
   33		(Parent1, Parent2) = (P2, P1)),
   34	do_crossover(Tries, Parent1, N1, Parent2, C1, C2),
   35	!. 
   36crossover(P1, P2, C1, C2) :-    % case 1
   37	reprod_P(Tries),
   38	once(count_nodes(P1, all, N1)),
   39	do_crossover(Tries, P1, N1, P2, C1, C2),
   40	!. 
   41
   42do_crossover(0, _, _, _, _, _) :- 
   43	!, 
   44	fail.
   45do_crossover(_, Parent1, N1, Parent2, Child1, Child2) :-
   46	my_random(N1, K1),
   47	%writel(['A:rand pick ', K1, ' from ', N1, '.', nl]),
   48	select_subtree(Parent1, K1, _, Child1, Subtree1, Subtree2, NodeName),
   49	count_nodes(Parent2, NodeName, N2),
   50	my_random(N2, K2),
   51	%writel(['B:rand pick ', K2, ' from ', N2, ' ', NodeName, ' nodes.', nl]),
   52	select_subtree(Parent2, K2, _, Child2, Subtree2, Subtree1, NodeName),
   53	tree_verification(Child1),
   54	tree_verification(Child2),
   55	!.
   56do_crossover(Tries, Parent1, N1, Parent2, Child1, Child2) :-
   57	Tries2 is Tries - 1,
   58	%writel(['Try ', Tries2, nl]),
   59	do_crossover(Tries2, Parent1, N1, Parent2, Child1, Child2),
   60	!.
   61
   62% check that a new Tree doesn't fail due to
   63% failed embedded code in DCTG rules.
   64
   65tree_verification(Child) :-
   66	%writel(['tree_verif: testing child:',nl]),
   67	%prettyprint(Child),
   68	(reprod_verif_P(yes) ->
   69		user_args_P(Args),
   70		verification(Child, Args, _)
   71		%writel(['tree_verif: verification succeeded.',nl])
   72		;
   73		true),
   74	!.
   75
   76% count_nodes(Tree, NodeName, NumNodes):
   77%	Tree - DCTG expression structure 
   78%	NodeName - name of node to count (otherwise: all nodes = 'all'; 
   79%			all internal = 'internal'; all leaf = 'leaf')
   80%	NumNodes - number of nodes in Tree
   81% Scans Tree and counts number of nodes.
   82
   83count_nodes(node(_, Children, _), all, NumNodes) :-
   84	!,
   85	count_children_nodes(Children, all, NumNodes2),
   86	NumNodes is NumNodes2 + 1.
   87count_nodes(node(_, Children, ID), Type, NumNodes) :-
   88	Type == internal,
   89	fast:dctg_rule_info(_, ID, _, _, nonterminal),
   90	!,
   91	count_children_nodes(Children, Type, NumNodes2),
   92	NumNodes is NumNodes2 + 1.
   93count_nodes(node(_, Children, ID), Type, NumNodes) :-
   94	Type == leaf,
   95	fast:dctg_rule_info(_, ID, _, _, terminal),
   96	!,
   97	count_children_nodes(Children, Type, NumNodes2),
   98	NumNodes is NumNodes2 + 1.
   99count_nodes(node(_, Children, ID), NodeName, NumNodes) :-
  100	fast:dctg_rule_info(NodeName, ID, _, _, _),
  101	!,
  102	count_children_nodes(Children, NodeName, NumNodes2),
  103	NumNodes is NumNodes2 + 1.
  104count_nodes(node(_, Children, _), NodeName, NumNodes) :-
  105	!,
  106	count_children_nodes(Children, NodeName, NumNodes).
  107count_nodes(_, _, 0).
  108
  109count_children_nodes([], _, 0).
  110count_children_nodes([Node|Rest], NodeName, NumNodes) :-
  111	count_nodes(Node, NodeName, NumNodes2),
  112	count_children_nodes(Rest, NodeName, NumNodes3),
  113	NumNodes is NumNodes2 + NumNodes3,
  114	!.
  115
  116% select_subtree(Parent, K, K2, NewParent, SubTree, Hole, NodeName):
  117%	Parent - parent tree structure
  118%	K - Kth node to select in Parent; must be < number nodes in Parent.
  119%	K2 - final K during structure traversal
  120%	NewParent - Parent structure with variable Hole in place of removed
  121%		subtree Subtree
  122%	Subtree - subtree to swap
  123%	Hole - location of hole in ParentWithHole (variable)
  124%	NodeName - node name of Subtree to select from; if variable, then 
  125%		select from all nodes
  126% Selects a Kth node in tree for crossover of type NodeName (or all, if 
  127% NodeName not set). Sets up the new tree with Hole placeholder for selected 
  128% subtree. Hole may be already unified with other parent's subtree. 
  129% Cases:
  130%	1. Count = 0, var name --> use that node
  131%	2. Count = 0, name matches given --> use that node
  132% 	3. Count > 0, var name or name match -> count and continue
  133%	4. name doesn't match given --> skip and continue
  134%	5. else stop at given count (we've exhausted tree, and we're at 
  135%	   non-node component)
  136
  137select_subtree(node(_, Kids, ID), 1, 0, NewParent, 
  138		node(NodeName, Kids, ID), NewParent, NodeName) :-  % cases 1, 2
  139	(var(NodeName) ; fast:dctg_rule_info(NodeName,ID,_,_,_)),
  140	!,
  141	fast:dctg_rule_info(NodeName,ID,_,_,_).
  142select_subtree(node(Name, Kids, ID), K, K2, node(Name, Kids2, ID), 
  143		Subtree, Hole, NodeName) :- % case 3
  144	(var(NodeName) ; fast:dctg_rule_info(NodeName,ID,_,_,_)),
  145	!,
  146	K3 is K-1,
  147	select_subtree_children(Kids, K3, K2, Kids2, Subtree, Hole, NodeName).
  148select_subtree(node(Name, Kids, ID), K, K2, node(Name, Kids2, ID), 
  149		Subtree, Hole, NodeName) :- % case 4
  150	!,
  151	select_subtree_children(Kids, K, K2, Kids2, Subtree, Hole, NodeName).
  152select_subtree(Node, K, K, Node, _, _, _). % case 5
  153
  154% select_subtree_children applies select_subtree to list of nodes.
  155
  156select_subtree_children([], K, K, [], _, _, _) :- !.
  157select_subtree_children([Node|T], K, K2, [Node2|T2], Subtree, Hole, Name) :- 
  158	select_subtree(Node, K, K3, Node2, Subtree, Hole, Name),
  159	(K3 == 0 ->
  160		T=T2, 
  161		K3=K2
  162		;
  163		select_subtree_children(T, K3, K2, T2, Subtree, Hole, Name)).
  164
  165debug_crossover :-
  166	dctg_root_P(Root),
  167	writel(['Generate tree 1...', nl]),
  168	generate_tree(Root, full, 6, _, P1, _),
  169	writel(['Generate tree 2...', nl]),
  170	generate_tree(Root, full, 6, _, P2, _),
  171	writel(['Parent1...', nl]),
  172	prettyprint(P1),
  173	writel(['Parent2...', nl]),
  174	prettyprint(P2),
  175	writel(['Do the crossover...', nl]),
  176	crossover(P1, P2, C1, C2),
  177	writel(['Child1...', nl]),
  178	prettyprint(C1),
  179	writel(['Child2...', nl]),
  180	prettyprint(C2).
  181
  182debug_crossover2 :-
  183	generate_tree(sentence, grow, 10, _, P1, _),
  184	generate_tree(sentence, grow, 10, _, P2, _),
  185	crossover(P1, P2, C1, C2),
  186	writel(['Parent1...', nl]),
  187	prettyprint(P1),
  188	writel(['Parent2...', nl]),
  189	prettyprint(P2),
  190	writel(['Child1...', nl]),
  191	prettyprint(C1),
  192	writel(['Child2...', nl]),
  193	prettyprint(C2).
  194
  195% ---------------------------
  196
  197% mutation(Parent, Child):
  198%	Parent - tree to mutate
  199% 	Child - mutated result
  200% Performs mutation on a tree. A subtree is randomly selected. Then a
  201% new subtree of the same type as selected one is generated using grow 
  202% generation, and it replaces the selected subtree. If the resulting tree 
  203% is too deep, then it is repeated a maximum number of user-specified times.
  204% If the user is using terminal mutation probability (Case 1) then all nodes
  205% of that type (if it succeeds statisticall) are counted. If none exist, then
  206% all nodes counted (case 2).
  207
  208mutation(Parent, Child) :-
  209	reprod_P(Tries),
  210	do_mutation(Tries, Parent, Child),
  211	!.
  212
  213do_mutation(0, _, _) :-
  214	!,
  215	fail.
  216do_mutation(_, Parent, Child) :-   % case 1
  217	prob_terminal_mutation_P(PT),
  218	\+ (PT==no),
  219	(maybe(PT) -> Type=leaf ; Type=internal),
  220	count_nodes(Parent, Type, N),
  221	max_depth_P(_, MaxDepth),
  222	my_random(N, K),
  223	%writel(['rand pick ', K, ' from ', N, '.', nl]),
  224	select_subtree(Parent, K, _, Child, _, NewTree, NodeName),
  225	NewDepth is MaxDepth - 2, % a subtree with a node type has depth > 1
  226	generate_tree(NodeName, grow, NewDepth, _, NewTree, _),
  227	tree_verification(Child),
  228	!.
  229do_mutation(_, Parent, Child) :-   % case 2
  230	max_depth_P(_, MaxDepth),
  231	count_nodes(Parent, all, N),
  232	my_random(N, K),
  233	%writel(['rand pick ', K, ' from ', N, '.', nl]),
  234	select_subtree(Parent, K, _, Child, _, NewTree, NodeName),
  235	NewDepth is MaxDepth - 2, % a subtree with a node type has depth > 1
  236	generate_tree(NodeName, grow, NewDepth, _, NewTree, _),
  237	tree_verification(Child),
  238	!.
  239do_mutation(Tries, Parent, Child) :-
  240	Tries2 is Tries - 1,
  241	%writel(['Try countdown... ', Tries2, nl]),
  242	do_mutation(Tries2, Parent, Child),
  243	!.
  244
  245debug_mutation :-
  246	dctg_root_P(Root),
  247	generate_tree(Root, full, 6, _, Parent, _),
  248	mutation(Parent, Child),	
  249	writel(['Parent...', nl]),
  250	prettyprint(Parent),
  251	writel(['Child...', nl]),
  252	prettyprint(Child).
  253
  254% ---------------------------
  255
  256% verification(Tree, UserArgs, Expr):
  257%	Tree - DCTG tree to verify
  258%	UserArgs - Argument list to pass to DCTG rules 
  259%	Expr - list expression for Tree
  260% The DCTG tree is verified by interpreting the Prolog DCTG rules
  261% in concert with the Tree structure. The purpose of this is to
  262% execute any embedded Prolog in the rules, which are 
  263% not retained in the tree data structure itself. User args as set by user_args
  264% parameter are also used (those embedded in Prolog structure are irrelevant).
  265% This routine may cause a tree to fail, in that embedded Prolog goals or
  266% user args fail.
  267
  268% verification embeds user args into initial call of tree.
  269
  270verification(node(Name, Kids, ID), UserArgs, Expr) :-
  271	fast:dctg_rule_info(_, ID, Call, _, _),
  272	Call =.. [Name|Args],
  273	append(_, [node(X,Y,Z),Expr,_], Args),
  274	append(UserArgs, [node(X,Y,Z),Expr,[]], Args2), 
  275	RuleHead2 =.. [Name|Args2],     % embed user args, empty diff list
  276	!,
  277	verify_tree(RuleHead2, node(Name, Kids, ID)).
  278
  279verify_tree(Call, node(_, Kids, ID)) :-
  280	clause(Call, Body),
  281	same_id(Call, ID),
  282	!,
  283	%writel(['verify_tree: Call=', Call, 'node = ', N, ID, nl]),
  284	%writel(['verify_tree: Body= ', Body, 'Kids=', Kids,nl]),
  285	verify_kids(Body, Kids, _).
  286verify_tree(_, _) :- 
  287	%writel(['verify_tree: failed', nl]),
  288	!,
  289	fail.
  290	
  291verify_kids((A,B), Kids, Kids3) :-
  292	!,
  293	verify_kids(A, Kids, Kids2),
  294	verify_kids(B, Kids2, Kids3).
  295verify_kids(A, [node(_, Kids, ID)|Rest], Rest) :-
  296	is_a_rule_call(A),
  297	!,
  298	%writel(['v_k 2: Call=', A, 'Node name = ', N, ID, nl]),
  299	verify_tree(A, node(_, Kids, ID)).
  300verify_kids(c(A,X,B), [[H]|T], T) :-  % single constant
  301 	!,
  302 	% X == H,
  303 	X = H,
  304 	%writel(['v_k 3: Call=', c(A,X,B), 'List=', [[H]|T], nl]),
  305 	c(A,X,B).
  306verify_kids(c(A,X,B), [[H|T2]|T], [T2|T]) :-  % multiple constants
  307	!,
  308	% X == H,
  309	X = H,
  310	%writel(['v_k 4: Call=', c(A,X,B), 'List=', [[H|T2]|T], nl]),
  311	c(A,X,B).
  312verify_kids(A, Kids, Kids) :-
  313	!,
  314	%writel(['v_k 5: Call=', A, 'Kids=', Kids, nl]),
  315	call(A).
  316
  317% Warning: user cannot use node/3 structure in their user arg fields!
  318
  319same_id(Call, ID) :-
  320	Call =.. [_|Args],
  321	member(node(_, _, ID2), Args),
  322	% append(_, [node(_, _, ID)|_], Args),
  323	!,
  324	ID==ID2