1%
    2% pPEGutilities == SWI-Prolog module containing pPEG supporting functions
    3%
    4/*	The MIT License (MIT)
    5 *
    6 *	Copyright (c) 2021-23 Rick Workman
    7 *
    8 *	Permission is hereby granted, free of charge, to any person obtaining a copy
    9 *	of this software and associated documentation files (the "Software"), to deal
   10 *	in the Software without restriction, including without limitation the rights
   11 *	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   12 *	copies of the Software, and to permit persons to whom the Software is
   13 *	furnished to do so, subject to the following conditions:
   14 *
   15 *	The above copyright notice and this permission notice shall be included in all
   16 *	copies or substantial portions of the Software.
   17 *
   18 *	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   19 *	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   20 *	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   21 *	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   22 *	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   23 *	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   24 *	SOFTWARE.
   25 */
   26:- module(pPEGutilities,[
   27	ptree_json_term/2,   % map a PTree to/from a "new" JSON term
   28	ptree_pratt/2,       % map a pTree to a pratt tree using op rule bindings convention
   29	ptree_printstring/2, % pretty print string for a ptree
   30	ptree_printstring/3  % pretty print string for a ptree specifying a left margin indent.
   31]).   32
   33:-set_prolog_flag(optimise,true).  % mainly optimizes arithmetic (module scope only)
   34
   35%
   36% Many pPEG implementations specify a ptree using a JSON format. ptree_json_term/2 in
   37%	conjunction with library(http/json) can be used to covert the Prolog representation
   38%	of a ptree to the "new" JSON format. On input, string values must be left as strings
   39%	using the "value_string_as(string)" option if necessary.
   40%
   41ptree_json_term(PTree,[Name, Value]) :-
   42	(string(Name)
   43	 -> atom_string(PName,Name),    % JSON  -> PTree
   44	    PTree =.. [PName, PVal]
   45	 ;  PTree =.. [PName, PVal],    % PTree -> JSON
   46	    atom_string(PName,Name)
   47	),
   48	ptree_val_json_value(PVal,Value).
   49
   50ptree_val_json_value(Value,Value) :-
   51	string(Value), !.
   52ptree_val_json_value([], []) :- !.  % ! when first arg a var
   53ptree_val_json_value([PNode|PNodes], [JNode|Jnodes]) :-
   54	ptree_json_term(PNode, JNode),
   55	ptree_val_json_value(PNodes, Jnodes).
   56
   57%
   58% ptree_pratt/2 maps a ptree produced using a grammar with Pratt rule naming conventions
   59% to a "pratt tree", by applying precedence values to the operators and replacing the
   60% original node names with the operator symbols. A simple example:
   61% 
   62% ?- pratt_parse_expr("1+2*3",Tree),ptree_pratt(Tree,Pratt).
   63% Tree = expr([number("1"), addOp_3L("+"), expr([number("2"), mulOp_5L("*"), number("3")])]),
   64% Pratt = +[number("1"), *([number("2"), number("3")])].
   65%
   66% Any sub-expression nodes, i.e., nodes with the same name as the ptree root, are 
   67% similarly converted:
   68%
   69% ?- pratt_parse_expr("(1+2)*3",Tree),ptree_pratt(Tree,Pratt).
   70% Tree = expr(['Pexpr'([expr([number("1"), addOp_3L("+"), number("2")])]), mulOp_5L("*"), number("3")]),
   71% Pratt = *(['Pexpr'([+[number("1"), number("2")]]), number("3")]).
   72%
   73% The algorithm requires operator rule names containing a Pratt suffix of the form "_PA"
   74% where 'P' character usually in the range '0-9' but can be any  character permitted
   75% in rule names, and 'A' is the associativity, "L" or "R". Examples:
   76%
   77%	addOp_2L   = [-+]
   78%	mulOp_3L   = [*/]
   79%	expOp_4R   = '^'
   80%
   81% Precedence values for characters: [0-9] < [A-Z] < '_' < [a-z]
   82%
   83
   84ptree_pratt(Tree, Pratt) :-
   85	Tree =.. [PFunc,Args],         % separate rule functor from list of children
   86	(string(Args)                  % tree with value string is already a pratt tree
   87	 -> Pratt = Tree
   88	 ;  pratt_flatten_(Args,PFunc,T/T,List/[]),
   89	    pratt_(List,[Pratt])       % make pratt tree
   90	).
   91
   92pratt_flatten_([],_PFunc,List,List).
   93pratt_flatten_([Op|Ex],PFunc,LIn/[Pratt_op|Ts],LOut) :-   % substitute pratt operator definitions
   94	pratt_op(Op,Pratt_op), !, 
   95	pratt_flatten_(Ex,PFunc,LIn/Ts,LOut).
   96pratt_flatten_([Tree|Ex],PFunc,LIn,LOut) :-               % sub-expr nodes get flattened
   97	Tree =.. [PFunc,Args], !,	
   98	pratt_flatten_(Args,PFunc,LIn,LNxt),
   99	pratt_flatten_(Ex,PFunc,LNxt,LOut).
  100pratt_flatten_([ValIn|Ex],PFunc,LIn/[ValOut|Ts],LOut) :-  % non pratt expression, check arguments
  101	ValIn  =.. [F,ArgsIn],
  102	(string(ArgsIn)
  103	 -> ValOut = ValIn
  104	 ;  pratt_args_(ArgsIn,PFunc,ArgsOut),
  105	    ValOut =.. [F,ArgsOut]
  106	),
  107	pratt_flatten_(Ex,PFunc,LIn/Ts,LOut).
  108
  109pratt_args_([],_PFunc,[]).
  110pratt_args_([Arg|ArgsIn],PFunc,[Arg|ArgsOut]) :-
  111	atomic(Arg), !,
  112	pratt_args_(ArgsIn,PFunc,ArgsOut).	
  113pratt_args_([ArgIn|ArgsIn],PFunc,[ArgOut|ArgsOut]) :-
  114	ArgIn  =.. [F,FArgsIn],
  115	(F = PFunc
  116	 -> ptree_pratt(ArgIn,ArgOut)                          % pratt expression
  117	 ;  pratt_args_(FArgsIn,PFunc,FArgsOut),               % non-pratt, check arguments
  118	    ArgOut =.. [F,FArgsOut]
  119	),
  120	pratt_args_(ArgsIn,PFunc,ArgsOut).
  121
  122% apply precedence rules to token list of operators and operands
  123pratt_(['$pratt_op'(OpSym,_,_), V], [Term]) :-                % simple prefix
  124	not_op(V),
  125	!,
  126	Term =.. [OpSym,[V]].
  127
  128pratt_([V, '$pratt_op'(OpSym,_,_)], [Term]) :-                % simple postfix
  129	not_op(V),
  130	!,
  131	Term =.. [OpSym,[V]].
  132
  133pratt_([V1, '$pratt_op'(OpSym,_,_), V2], [Term]) :-           % simple infix
  134	not_op(V1), not_op(V2),
  135	!,
  136	Term =.. [OpSym,[V1,V2]].
  137
  138pratt_(['$pratt_op'(OpSym1,OpL1,OpR1), '$pratt_op'(OpSym2,OpL2,OpR2) |Etc], Term) :-     % prefix prefix
  139	OpL2 > OpR1,  % must associate to right
  140	!,
  141	pratt_right_(['$pratt_op'(OpSym2,OpL2,OpR2) |Etc], RHS),
  142	pratt_(['$pratt_op'(OpSym1,OpL1,OpR1) |RHS], Term).
  143
  144pratt_([V, '$pratt_op'(OpSym1,OpL1,OpR1), '$pratt_op'(OpSym2,OpL2,OpR2) |Etc], Term) :-  % postfix postfix ; infix prefix
  145	not_op(V),
  146	!,
  147	(OpL2 > OpR1
  148	 -> pratt_right_(['$pratt_op'(OpSym2,OpL2,OpR2) |Etc], RHS),
  149	    pratt_([V, '$pratt_op'(OpSym1,OpL1,OpR1) |RHS], Term)
  150	 ;  pratt_([V, '$pratt_op'(OpSym1,OpL1,OpR1)], [LHS]),
  151	    pratt_([LHS, '$pratt_op'(OpSym2,OpL2,OpR2) |Etc], Term)
  152	).
  153
  154pratt_(['$pratt_op'(OpSym1,OpL1,OpR1), V, '$pratt_op'(OpSym2,OpL2,OpR2) |Etc], Term) :-  % prefix infix / postfix postfix
  155	not_op(V),
  156	!,
  157	(OpL2 > OpR1
  158	 -> pratt_right_([V, '$pratt_op'(OpSym2,OpL2,OpR2) |Etc],RHS),
  159	    pratt_(['$pratt_op'(OpSym1,OpL1,OpR1) |RHS], Term)
  160	 ;  pratt_(['$pratt_op'(OpSym1,OpL1,OpR1), V], [LHS]),
  161	    pratt_([LHS, '$pratt_op'(OpSym2,OpL2,OpR2) |Etc], Term)
  162	).
  163
  164pratt_([V1, '$pratt_op'(OpSym1,OpL1,OpR1), V2, '$pratt_op'(OpSym2,OpL2,OpR2) |Etc], Term) :-  % infix (infix/postfix)
  165	not_op(V1),              % V2 check unnecessary (already handled above)
  166	!,
  167	(OpL2 > OpR1
  168	 -> pratt_right_([V2, '$pratt_op'(OpSym2,OpL2,OpR2) |Etc], RHS),
  169	    pratt_([V1, '$pratt_op'(OpSym1,OpL1,OpR1) |RHS], Term)
  170	 ;  pratt_([V1, '$pratt_op'(OpSym1,OpL1,OpR1), V2], [LHS]),
  171	    pratt_([LHS, '$pratt_op'(OpSym2,OpL2,OpR2) |Etc], Term)
  172	).
  173
  174pratt_(Exp, _Term) :-
  175	print_message(informational, prolog_parser(op_conflict(Exp))),
  176	fail.
  177
  178% build RHS recursively as far as possible, then return value and rest of tokens
  179pratt_right_(['$pratt_op'(OpSym1,OpL1,OpR1), '$pratt_op'(OpSym2,OpL2,OpR2) |Etc], Term) :-
  180	!,
  181	OpL2 > OpR1,  % must associate to right
  182	pratt_right_(['$pratt_op'(OpSym2,OpL2,OpR2) |Etc], RHS),
  183	Term = ['$pratt_op'(OpSym1,OpL1,OpR1) |RHS].
  184
  185pratt_right_(['$pratt_op'(OpSym1,OpL1,OpR1), V, '$pratt_op'(OpSym2,OpL2,OpR2) |Etc], Term) :-
  186	!,
  187	(OpL2 > OpR1
  188	 -> pratt_right_([V, '$pratt_op'(OpSym2,OpL2,OpR2) |Etc], RHS),
  189	    Term = ['$pratt_op'(OpSym1,OpL1,OpR1) |RHS]
  190	 ;  pratt_(['$pratt_op'(OpSym1,OpL1,OpR1), V], [LHS]),
  191	    Term = [LHS,'$pratt_op'(OpSym2,OpL2,OpR2) |Etc]
  192	).
  193
  194pratt_right_([V, '$pratt_op'(OpSym1,OpL1,OpR1), '$pratt_op'(OpSym2,OpL2,OpR2) |Etc], Term) :-
  195	!,
  196	(OpL2 > OpR1
  197	 -> pratt_right_(['$pratt_op'(OpSym2,OpL2,OpR2) |Etc], RHS),
  198	    Term = [V, '$pratt_op'(OpSym1,OpL1,OpR1) |RHS]
  199	 ;  pratt_([V, '$pratt_op'(OpSym1,OpL1,OpR1)], [LHS]),
  200	    Term = [LHS,'$pratt_op'(OpSym2,OpL2,OpR2) |Etc]
  201	).
  202
  203pratt_right_([V1, '$pratt_op'(OpSym1,OpL1,OpR1), V2, '$pratt_op'(OpSym2,OpL2,OpR2) |Etc], Term) :-
  204	!,
  205	(OpL2 > OpR1
  206	 -> pratt_right_([V2, '$pratt_op'(OpSym2,OpL2,OpR2) |Etc], RHS),
  207	    Term = [V1, '$pratt_op'(OpSym1,OpL1,OpR1) |RHS]
  208	 ;  pratt_([V1, '$pratt_op'(OpSym1,OpL1,OpR1), V2], [LHS]),
  209	    Term = [LHS,'$pratt_op'(OpSym2,OpL2,OpR2) |Etc]
  210	).
  211
  212pratt_right_(Exp, Term) :- pratt_(Exp, Term).
  213
  214
  215prolog:message(prolog_parser(op_conflict(Exp))) -->  % DCG
  216	['Error, operator clash in: ~p\n' - [Exp]].
  217
  218not_op(V) :- \+functor(V,'$pratt_op',_).
  219
  220% extract binding value pair from suffix of Op functor; suffix matches '_PA'
  221% Op = OpRule(OpSym)
  222pratt_op(Op, '$pratt_op'(OpSym,OpL,OpR)) :-
  223	Op =.. [OpRule,SOp], 
  224	sub_atom(OpRule,_,3,0,PSfx),                 % P code = precedence value
  225	atom_codes(PSfx,[95,P,A]),
  226	OpL is P*2,                                  % multiply code by 2 to leave a gap
  227	(A = 76 -> OpR is OpL+1                      % A = 'L'
  228	;A = 82 -> OpR is OpL-1                      % A = 'R'
  229	),                                           % ; fail
  230	atom_string(OpSym,SOp).                      % atom form for functor, string(SOp) implied
  231
  232
  233%
  234% ptree_printstring/N maps any ptree to a "pretty printed" string.
  235% 
  236% ptree_printstring/3 permits the use of the `Indent` argument, a string 
  237% forming the prefix of all lines. 
  238% ptree_printstring/2 defines a default prefix of ""
  239% An example:
  240/*
  241?- peg_parse(pPEG,"r1 = [a-z] [A-zA-z]*",Tree),ptree_printstring(Tree,"\t",S),write(S).
  242	Peg
  243	└─rule
  244	  ├─id "r1"
  245	  └─seq
  246	    ├─chs "[a-z]"
  247	    └─rep
  248	      ├─chs "[A-zA-z]"
  249	      └─sfx "*"
  250Tree = 'Peg'([rule([id("r1"), seq([chs("[a-z]"), rep([chs("[A-zA-z]"), sfx("*")])])])]),
  251S = "\tPeg\n\t└─rule\n\t  ├─id \"r1\"\n\t  └─seq\n\t    ├─chs \"[a-z]\"\n\t    └─rep\n\t      ├─chs \"[A-zA-z]\"\n\t      └─sfx \"*\"\n".
  252*/
  253ptree_printstring(PTree, PPstring) :-               % Arity 2
  254	ptree_printstring(PTree, "", PPstring).         % null indent
  255	
  256ptree_printstring(PTree, Indent, PPstring) :-       % Arity 3
  257	ptree_printstring(PTree, [Indent], T/T, PPlist/[]),
  258	atomics_to_string(PPlist, PPstring).            % build final string result
  259	
  260ptree_printstring(PTree, Indent, StrIn, StrOut) :-  % Arity 4
  261	PTree =.. [Name, Val],                          % decompose to name and value
  262	ptree_printstring_(Name, Val, Indent, StrIn, StrOut).
  263
  264ptree_printstring_(Name, AString, Indent, Str/Tail, Str/Etc) :-   % ptree value: string
  265	string(AString), !,
  266	format(string(OString),"~p",[AString]),  % quoted and escaped
  267	indent_term(Indent,[Name," ",OString,"\n"|Etc],Tail).
  268ptree_printstring_(Name, Children, Indent, Str/Tail, Str/Etc) :-  % ptree value: [children..]
  269	%  is_list(Children),
  270	indent_term(Indent,[Name,"\n"|Nxt],Tail),
  271	new_indent(Indent,NxtIndent),
  272	ptree_children(Children,NxtIndent,Str/Nxt,Str/Etc).     % add children
  273	
  274ptree_children([], _, Str, Str).                            % EOL, No child case
  275ptree_children([Term], [_|Indent], Str/Nxt, Str/Etc) :- !,  % EOL -> different prefix
  276	left_crn(LC),
  277	ptree_printstring(Term, [LC|Indent], Str/Nxt, Str/Etc).
  278ptree_children([Term|Terms], Indent, Str/Nxt1, Str/Etc) :- 
  279	ptree_printstring(Term, Indent, Str/Nxt1, Str/Nxt2),
  280	ptree_children(Terms, Indent, Str/Nxt2, Str/Etc).
  281
  282indent_term([],Tail,Tail).
  283indent_term([I|Is],In,Tail) :-
  284	indent_term(Is,[I|In],Tail).
  285
  286new_indent([LT|Indent], New) :- left_tee(LT), !,  %  "|-" ==> "| |-"
  287	vertical(Vrt), New = [LT,Vrt|Indent].
  288new_indent([LC|Indent], New) :- left_crn(LC), !,  %  "+-" ==> "  |-"
  289	left_tee(LT), space(SP), New = [LT,SP|Indent].
  290new_indent(Indent,[LT|Indent]) :-                 %  Any ==> Any "|-"  
  291	left_tee(LT).
  292
  293left_tee("\u251C\u2500").  % "|-"
  294left_crn("\u2514\u2500").  % "+-"
  295vertical("\u2502 ").       % "| "
  296space("  ").               % "  "