pPEG.pl

    1%
    2% pPEG == SWI-Prolog module for parsing strings with pPEG grammars
    3%
    4/*	The MIT License (MIT)
    5 *
    6 *	Copyright (c) 2021-2025 Rick Workman
    7 *
    8 *	Permission is hereby granted, free of charge, to any person obtaining a copy
    9 *	of this software and associated documentation files (the "Software"), to deal
   10 *	in the Software without restriction, including without limitation the rights
   11 *	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   12 *	copies of the Software, and to permit persons to whom the Software is
   13 *	furnished to do so, subject to the following conditions:
   14 *
   15 *	The above copyright notice and this permission notice shall be included in all
   16 *	copies or substantial portions of the Software.
   17 *
   18 *	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   19 *	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   20 *	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   21 *	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   22 *	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   23 *	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   24 *	SOFTWARE.
   25 */
   26:- module(pPEG,[            % module pPEG exports:
   27	 peg_compile/2,         % create a grammar from a source string
   28	 peg_compile/3,         % as above with option list
   29	 peg_parse/3,           % use a pPEG grammar to parse an Input string to a ptree Result
   30	 peg_parse/5,           % as above with unmatched residue and option list
   31	 peg_grammar/1,         % pPEG grammar source
   32	 peg_lookup_previous/3, % used by CSG extensions to lookup previous matches
   33	 pPEG/4                 % quasi-quotation hook for pPEG
   34	]).   35
   36version("2.1.0").
   37
   38:- version(pPEG(versionInfo)).      % add message to system version
   39
   40:- use_module(library(strings),[string/4]).         % for quasi-quoted strings
   41:- use_module(library(lists),[reverse/2]).          % gen calling path in error msgs
   42:- use_module(library(prolog_wrap)).                % for tracing (see peg_trace/0)
   43:- use_module(library(option),[option/3]).          % for option list processing
   44:- use_module(library(pcre),[re_matchsub/4]).       % uses a regular expression for error & trace output
   45:- use_module(library(quasi_quotations), [          % pPEG as quasi-quotation
   46    quasi_quotation_syntax/1, 
   47    with_quasi_quotation_input/3
   48   ]).   49
   50%
   51% the "standard" pPEG grammar source for bootstrapping and reference, e.g.,
   52% ?- peg_grammar(S), write_term(S,[]).
   53%
   54peg_grammar({|string||
   55	Peg   = _ rule+
   56	rule  = id _ def _ alt
   57	def   = '=' ':'? / ':' '='?
   58	
   59	alt   = seq ('/'_ seq)*
   60	seq   = rep+
   61	rep   = pre sfx? _
   62	pre   = pfx? term
   63	term  = call / quote / class / dot /group / extn
   64
   65	group = '('_ alt ')'
   66	call  = id _ !def
   67	id    = [a-zA-Z_] [a-zA-Z0-9_-]*
   68	pfx   = [&!~]
   69	sfx   = [+?] / '*' nums?
   70	nums  = min ('..' max)?
   71	min   = [0-9]+
   72	max   = [0-9]*
   73	quote = ['] ~[']* ['] 'i'?
   74	class = '[' ~']'* ']'
   75	dot   = '.'
   76	extn  = '<' ~'>'* '>'
   77	_     = ([ \t\n\r]+ / '#' ~[\n\r]* )*
   78
   79|}).
   80
   81%
   82% Bootstrap Peg grammar in ptree form
   83%
   84boot_grammar_def('Peg'([
   85	rule([id("Peg"),   def("="), seq([id("_"), rep([id("rule"), sfx("+")]), id("_")])]), 
   86	rule([id("rule"),  def("="), seq([id("id"), id("_"), id("def"), id("_"), id("alt")])]), 
   87	rule([id("def"),   def("="), quote("'='")]), 
   88	
   89	rule([id("alt"),   def("="), seq([id("seq"), rep([seq([quote("'/'"), id("_"), id("seq")]), sfx("*")])])]), 
   90	rule([id("seq"),   def("="), rep([id("rep"), sfx("*")])]), 
   91	rule([id("rep"),   def("="), seq([id("pre"), rep([id("sfx"), sfx("?")]), id("_")])]), 
   92	rule([id("pre"),   def("="), seq([rep([id("pfx"), sfx("?")]), id("term")])]), 
   93	rule([id("term"),  def("="), alt([id("call"), id("quote"), id("class"), id("group")])]), 
   94	
   95	rule([id("group"), def("="), seq([quote("'('"), id("_"), id("alt"), quote("')'")])]), 
   96	rule([id("pfx"),   def("="), class("[&!~]")]), 
   97	rule([id("sfx"),   def("="), class("[+?*]")]), 
   98	
   99	rule([id("call"),  def("="), seq([id("id"), id("_"), pre([pfx("!"), id("def")])])]), 
  100	rule([id("id"),    def("="), seq([class("[a-zA-Z_]"), rep([class("[a-zA-Z0-9_-]"), sfx("*")])])]), 
  101	rule([id("quote"), def("="), seq([class("[']"), rep([pre([pfx("~"), class("[']")]), sfx("*")]), class("[']"), rep([quote("'i'"), sfx("?")])])]), 
  102	rule([id("class"), def("="), seq([quote("'['"), rep([pre([pfx("~"), quote("']'")]), sfx("*")]), quote("']'")])]), 
  103	rule([id("_"),     def("="), rep([alt([seq([quote("'#'"), rep([pre([pfx("~"), class("[\\n\\r]")]), sfx("*")])]), rep([class("[ \\t\\n\\r]"), sfx("+")])]), sfx("*")])])
  104], _)).
  105
  106%
  107% initialization code
  108%
  109:-set_prolog_flag(optimise,false).  % for debug
  110
  111% provide debug support before turning optimization on 
  112debug_peg_trace(FString,Args) :- debug(pPEG(trace),FString,Args).
  113
  114:-set_prolog_flag(optimise,true).  % mainly optimizes arithmetic (module scope only)
  115
  116% called from :- initialization.
  117init_peg :-
  118	foreach((nb_current(Key,_), atom_concat('pPEG:',_,Key)), nb_delete(Key)),  % clear pPEG globals
  119	nodebug(pPEG(trace)),              % init trace
  120	bootstrap_grammar.                 % initial load
  121
  122user:exception(undefined_global_variable,'pPEG:$pPEG',retry) :-
  123	bootstrap_grammar.                 % support multi-threads (need copy for each thread)              
  124
  125bootstrap_grammar :-
  126	boot_grammar_def(BootPeg),         % bootstrap and optimize
  127	nb_setval('pPEG:$pPEG',BootPeg),
  128	peg_grammar(PegSrc),
  129	peg_compile(PegSrc,pPEG,[optimise(true)]).  % if successful, will overwrite boot parser
  130
  131%
  132% support pPEG grammar in quasi-quotation (compiles to a grammar term, Args are options)
  133%
  134:- quasi_quotation_syntax(pPEG).  135
  136pPEG(Content, Args, _Binding, Grammar) :-
  137	with_quasi_quotation_input(Content, Stream, read_string(Stream, _, String)),
  138	peg_compile(String,Grammar,Args).  % Args are compiler options
  139
  140%
  141% peg_compile/2, peg_compile/3 :create a grammar from a source string
  142%
  143peg_compile(Src, GrammarSpec) :-              % create an optimized parser
  144	peg_compile(Src, GrammarSpec, []).
  145
  146peg_compile(Src, GrammarSpec, OptionList) :-  % create parser, optionally optimized
  147	peg_parse(pPEG, Src, Ptree, _, OptionList),
  148	option_value(optimise(Opt),OptionList,true),
  149	make_grammar(Opt,Ptree,Grammar),
  150	(Grammar = GrammarSpec
  151	 -> true                                  % GrammarSpec unified with Grammar
  152	 ;  (atom(GrammarSpec)                    % GrammarSpec is name of a Grammar
  153	     -> atomic_concat('pPEG:$',GrammarSpec,GKey),
  154	        nb_setval(GKey,Grammar)
  155	     ;  current_prolog_flag(verbose,GVrbse),
  156	        option_value(verbose(Vrbse),OptionList,GVrbse), % default = global setting
  157	        peg_fail_msg(peg(argError('GrammarSpec',GrammarSpec)),Vrbse)
  158	    )
  159	).
  160
  161make_grammar(true,Ptree,Grammar) :- !,        % optimise grammar, refs created by optimiser
  162	optimize_peg(Ptree,Grammar).
  163make_grammar(_,'Peg'(Rules),'Peg'(Rules,_)).  % non-optimised, no refs needed
  164
  165%
  166% peg_parse/3 :use a Peg grammar to parse an Input string to a ptree Result
  167% peg_parse/5 :parse/3 with Match string and Options
  168%
  169peg_parse(GrammarSpec, Input, Result) :-
  170	peg_parse(GrammarSpec, Input, Result, _Residue, []).
  171
  172peg_parse(GrammarSpec, Input, Result, Residue, OptionList) :-
  173	% process options
  174	option_value(incomplete(Incomplete),OptionList,false),  % default = complete parse option
  175	option_value(tracing(TRules),OptionList,[]),            % default = no tracing
  176	current_prolog_flag(verbose,GVrbse),
  177	option_value(verbose(Vrbse),OptionList,GVrbse),         % default = global setting
  178	peg_setup_parse_(GrammarSpec,Input,Vrbse,TRules,Env,Eval),  % setup initial Env and Eval
  179	(eval_(Eval, Env, Input, 0, PosOut, Result0)            % parse using Eval
  180	 -> (Result0 = [] -> sub_string(Input,0,PosOut,_,Result) ; Result = Result0)  % parse successful, map [] to matched
  181	 ;  error_info(Env,_,ErrorInfo),                        % parse unsuccessful
  182	    peg_fail_msg(peg(errorinfo(Input,ErrorInfo,"")),Vrbse) % fail with message
  183	),
  184	(string_length(Input,PosOut)                            % did parse consume all input?
  185	 -> Residue = ""                                        % yes, set residue to empty
  186	 ;  (Incomplete == true                                 % no, incomplete allowed?
  187	     -> sub_string(Input,PosOut,_,0,Residue)            % yes, set Residue to remaining
  188	     ;  \+eval_(Eval, Env, Input, PosOut, _, _),        % rerun failed parse on residue ...
  189	        error_info(Env,_,ErrorInfo),                    % just to get error info
  190	        peg_fail_msg(peg(errorinfo(Input,ErrorInfo,"fell short, (?)")),Vrbse)  % fail with message
  191	    )
  192	).
  193
  194option_value(Option, Options, Default) :-
  195	(Options = []
  196	 -> arg(1,Option,Default)                      % faster option when empty list
  197	 ;  option(Option, Options, Default)           % else use option/3
  198	).
  199
  200peg_setup_parse_(GrammarSpec,Input,Vrbse,TRules,@(Grammar,[],0,([],[]),PEnv),Eval) :-
  201	(string(Input)
  202	 -> true
  203	 ;  peg_fail_msg(peg(argError('Input',Input)),Vrbse)
  204	),
  205	(copy_term(GrammarSpec,'Peg'(Grammar0,Grammar0))  % make a copy before substituting refs
  206	 -> true
  207	 ; % retrieving from globals makes it's own copy
  208	   (atom(GrammarSpec), atomic_concat('pPEG:$',GrammarSpec,GKey), nb_current(GKey,'Peg'(Grammar0,Grammar0))
  209	    -> true
  210	    ;  peg_fail_msg(peg(argError('Grammar',GrammarSpec)),Vrbse)
  211	   )
  212	),
  213	peg_add_tracing(TRules,Grammar0,Grammar),      % add required tracing
  214	(Vrbse == normal                               % init persistent environment
  215	 -> PEnv = (@([GName],0,id(GName)), "")        % error info and indent (for tracing)
  216	 ;  PEnv = (@(), "")
  217	),
  218	Grammar = [FirstRule|_],                       % first rule
  219	(FirstRule = rule([Eval|_])                    % GName is name of first rule
  220	 -> Eval = id(GName)                           % non-optimized version: id(Name)
  221	 ;  Eval = call_O(FirstRule),                  % optimized version
  222	    arg(1,FirstRule,GName)
  223	).
  224
  225% efficient way of hiding some details of environment structure using goal_expansion/2
  226% persistent environment not trailed - contains latest error info and trace indentation
  227% Note: arg/3 calls are handled in VM if last argument is a "first variable".
  228goal_expansion(persistent_env_(Env,PEnv), arg(5,Env,PEnv)).   % SWIP inline optimization
  229goal_expansion(grammar_(Env,Rules),       arg(1,Env,Rules)).  % SWIP inline optimization
  230goal_expansion(error_info(Env,PEnv,ErrorInfo),                % SWIP inline optimization
  231	 (persistent_env_(Env,PEnv), arg(1,PEnv,ErrorInfo))
  232	).
  233goal_expansion(update_error_info(PEnv,ErrorInfo),             % SWIP inline optimization
  234	  nb_linkarg(1,PEnv,ErrorInfo)
  235	).
  236% if no goal expansion
  237grammar_(Env,Rules)        :- arg(1,Env,Rules).
  238context_(Env,Ctxt)         :- arg(4,Env,Ctxt).
  239persistent_env_(Env,PEnv)  :- arg(5,Env,PEnv). 
  240error_info(Env,PEnv,ErrorInfo)    :- persistent_env_(Env,PEnv), arg(1,PEnv,ErrorInfo).
  241update_error_info(PEnv,ErrorInfo) :- nb_linkarg(1,PEnv,ErrorInfo).
  242
  243% messaging and definitions
  244peg_fail_msg(Msg, normal) :-                       % only print if verbose = normal
  245	print_message(informational, Msg),
  246	fail.
  247
  248:- multifile prolog:message/1.  249
  250prolog:message(pPEG(versionInfo)) -->
  251	{ version(Version) },
  252	[ '*** pPEG v~w ***.'-[Version] ].
  253
  254prolog:message(peg(argError(Arg,Value))) -->  % DCG
  255	[ "pPEG Error: invalid argument, ~w = ~w" - [Arg,Value] ].
  256
  257prolog:message(peg(errorinfo(Input,@(Names,Pos,Op),Etext))) -->  % DCG
  258	{reverse(Names,Path),
  259	 atomics_to_string(Path,' -> ',PathExp),
  260	 string_length(Input,InputLen),                     % Pos may be past Input length
  261	 (Pos == InputLen -> Caret = CPos ; Caret = EPos),  % adjust Caret if past Input length
  262	 StartPos is min(Pos,InputLen-1),
  263	 peg_line_pos(Input,StartPos,0,1,Text,EPos,ELineNum),  % source text information
  264	 CPos is EPos+1,                                    % cursor position is 1 based
  265	 vm_instruction(Op,Expected)
  266	},
  267	% a bit of format magic using tab settings to right justify LineNo and position cursor
  268	[ 'pPEG Error: ~w  failed.\n%             ~wexpected ~w at line ~w.~w:\n% ~|~` t~d~3+ | ~w\n% ~|~` t~3+   ~|~` t~*+^' 
  269	        - [PathExp,Etext,Expected,ELineNum,CPos,ELineNum,Text,Caret]
  270	].
  271
  272prolog:message(peg(undefined(RuleName))) -->  % DCG
  273	[ 'pPEG: ~w undefined' - [RuleName] ].               % from VM so limited info
  274
  275prolog:message(peg(scheme(RuleName,Scheme))) -->  % DCG
  276	[ 'pPEG: ~w ~w ... scheme undefined' - [RuleName,Scheme] ].
  277
  278% 
  279% lookup previous match of rule Name in Env
  280%
  281peg_lookup_previous(Name,Env,Match) :-
  282	arg(4,Env,Ctxt),                         % Env[4] = Ctxt for maintaining prior matches
  283	(var(Name)
  284	 -> lookup_match_(Ctxt,RName,Match),     % most recent match
  285	    atom_string(RName,Name)
  286	 ;  atom_string(RName,Name),             % previous named match
  287	    lookup_match_(Ctxt,RName,Match)
  288	).
  289
  290lookup_match_((Matches,Parent),Name,Match) :-
  291	(memberchk((Name,slice(Input,PosIn,PosOut)),Matches)
  292	 -> Len is PosOut-PosIn,                 % construct Match string from slice
  293	    sub_string(Input,PosIn,Len,_,Match)
  294	 ;  lookup_match_(Parent,Name,Match)     % at root, Parent = [] (see peg_setup_parse_/7)
  295	).
  296
  297%
  298% peg VM implementation - 9 native plus 4 "optimized" instructions (plus trace)
  299%
  300eval_(id(Name), Env, Input, PosIn, PosOut, R) :-                % id "instruction"
  301	atom_string(PName,Name),                     % map to call_O(Rule), requires atom Name
  302	arg(1,Env,Grammar),                          % Env[1] = Grammar
  303	(memberchk(rule([id(Name), def(Def), Exp]), Grammar)   % linear search, can be slow
  304	 -> scheme_treatment(Def,Name,Treat),        % defined for all valid
  305	    eval_(call_O(rule(PName,Treat,Exp)), Env, Input, PosIn, PosOut, R) % continue with call_O
  306	 ;  print_message(warning, peg(undefined(PName))),  % undefined rule, fail with warning
  307	    fail
  308	). 
  309
  310eval_(alt(Ss), Env, Input, PosIn, PosOut, R) :-                 % alt "instruction"
  311	error_info(Env,PEnv,ErrorInfo),
  312	(alt_eval(Ss, Env, Input, PosIn, PosOut, R)
  313	 -> update_error_info(PEnv,ErrorInfo)             % success so restore error info
  314	 ;  error_info(Env,_,FErrorInfo),                 % failure error Info
  315	    arg(2,FErrorInfo,FHWM),
  316	    PosIn == FHWM,                                % if nothing consumed before error
  317	    eval_fail_(alt(Ss),Env,PosIn)                 % back up error context to this rule
  318	).
  319
  320eval_(seq(Ss), Env, Input, PosIn, PosOut, R) :-                 % seq "instruction"
  321	seq_eval(Ss, PosIn, Env, Input, PosIn, PosOut, R).
  322
  323eval_(rep([Exp, ROp]), Env, Input, PosIn, PosOut, R) :-         % rep "instruction"
  324	rep_counts(ROp,Min,Max), !,                       % green cut for rep_counts
  325	eval_(rep_O(Exp, Min, Max), Env, Input, PosIn, PosOut, R).
  326
  327eval_(pre([pfx(POp), Exp]), Env, Input, PosIn, PosOut, []) :-   % pre "instruction"
  328	error_info(Env,PEnv,ErrorInfo),
  329	(prefix_eval(POp, Exp, Env, Input, PosIn, PosOut) % lookahead eval
  330	 -> update_error_info(PEnv,ErrorInfo)             % success so restore error info
  331	 ;  update_error_info(PEnv,ErrorInfo),            % fail, ignore any lookahead failure ...
  332	    eval_fail_(pre([pfx(POp), Exp]),Env,PosIn)    % and backtrack with new info
  333	).
  334
  335eval_(quote(S), Env, Input, PosIn, PosOut, []) :-               % quote "instruction"
  336	(sub_string(S,_,1,0,"i")                  % case insensitive match test
  337	 -> sub_string(S,0,_,1,S1),               % strip i
  338	    literal_match_(S1,SMatch),            % string to match
  339	    string_upper(SMatch,UMatch),
  340	    string_length(SMatch,Len),
  341	    sub_string(Input,PosIn,Len,_,Match),
  342	    string_upper(Match,UMatch)	          % case insensitive match ... 
  343	 ;  literal_match_(S,Match),              % string to match
  344	    sub_string(Input,PosIn,Len,_,Match)   % case sensitive match
  345	) -> PosOut is PosIn+Len ; eval_fail_(quote(S),Env,PosIn).
  346
  347eval_(class(MatchSet), Env, Input, PosIn, PosOut, []) :-        % class "instruction"
  348 	match_chars(MatchSet,MChars),             % convert Match string to MChars list
  349	eval_(class_O(in,MChars), Env, Input, PosIn, PosOut, []).
  350
  351eval_(dot(D), Env, Input, PosIn, PosOut, []) :-
  352	string_length(Input, Len),
  353	(PosIn < Len                              % fails on end of Input
  354 	 -> PosOut is PosIn+1                     % consume 1 char
  355	 ;  eval_fail_(dot(D),Env,PosIn) 
  356	).
  357	
  358eval_(extn(S), Env, Input, PosIn, PosOut, R) :-                 % extn "instruction"
  359	(string(S) -> extn_pred(S,T) ; T = S),    % avoid extra work if already optimised
  360	(extn_call(T,Env,Input,PosIn,PosOut,R) -> true ; eval_fail_(extn(S),Env,PosIn)).
  361
  362% additional instructions produced by optimizer
  363eval_(call_O(rule(Name, Treat, Exp)), @(Grammar,Names,Dep,Ctxt,PEnv), Input, PosIn, PosOut, R) :-  % call_O "instruction"
  364	% also called from id instruction after lookup in non-optimised grammars
  365	nonvar(Exp),    % test for undefined rule called, warning would have been printed by optimizer
  366	Dep1 is Dep+1,  % increment call depth
  367	% recursion check - expensive, so use sparingly
  368	(Dep1 >= 64     % only check when call depth exceeds 64
  369	 -> recursive_loop_check(eval_(call_O(rule(Name,_,_)),_,_,P,_,_),P,PosIn,Name)
  370	 ;  true
  371	),
  372	eval_(Exp, @(Grammar,[Name|Names],Dep1,([],Ctxt),PEnv), Input, PosIn, PosOut, Res),  % with new context
  373	(Exp = trace(_)
  374	 -> R = Res  % if tracing, already done
  375	 ;  Match = slice(Input,PosIn,PosOut),  % Input slice matched
  376	    % add Match to siblings in context (undo on backtrack)
  377	    arg(1,Ctxt,Matches), setarg(1,Ctxt,[(Name,Match)|Matches]),
  378	    flatten_(Res,[],RRs),               % flatten args list
  379	    build_ptree(Treat,RRs,Match,Name,R) % and build
  380	).
  381
  382eval_(rep_O(Exp, Min, Max), Env, Input, PosIn, PosOut, R) :-    % rep_O "instruction"
  383	repeat_eval(0, Min, Max, Exp, Env, Input, PosIn, PosOut, R).
  384
  385eval_(quote_O(Case,Match), Env, Input, PosIn, PosOut, []) :-    % quote_O "instruction"
  386	(Case == exact
  387	 -> sub_string(Input,PosIn,Len,_,Match)   % will match "" with Len=0
  388	 ;  % assume Case=upper
  389	    string_length(Match,Len),
  390	    sub_string(Input,PosIn,Len,_,S),
  391	    string_upper(S,Match)
  392	) -> PosOut is PosIn+Len ; eval_fail_(quote_O(Case,Match),Env,PosIn).
  393
  394eval_(class_O(In,MChars), Env, Input, PosIn, PosOut, []) :-     % class_O "instruction"
  395	(sub_atom(Input, PosIn, 1, _, R),         % input char, fails if end of Input
  396 	 chars_in_match(MChars,R,In)              % character in/notin set
  397	 -> PosOut is PosIn+1                     % match succeeded, consume 1 char
  398	 ;  eval_fail_(class_O(In,MChars),Env,PosIn) 
  399	).
  400
  401eval_(trace(Rule), Env, Input, PosIn, PosOut, R) :-             % trace "instruction"
  402	% start tracing this rule
  403	(debugging(pPEG(trace),true)
  404	 -> eval_(call_O(Rule),Env,Input,PosIn,PosOut,R)  % already tracing, just call_O
  405	 ;  current_prolog_flag(debug,DF),  % save debug state
  406	    peg_trace,                      % enable tracing
  407	    persistent_env_(Env,PEnv),
  408	    nb_linkarg(2,PEnv," "),         % reset indent
  409	    (eval_(call_O(Rule),Env,Input,PosIn,PosOut,R)  % call_O with tracing enabled
  410	     -> peg_notrace,                % success, disable tracing and return a result
  411	        set_prolog_flag(debug,DF)   % restore saved debug state
  412	     ;  peg_notrace,                % fail, first disable tracing
  413	        set_prolog_flag(debug,DF),  % restore saved debug state
  414	        fail
  415	    )
  416	).
  417
  418%
  419% Support for VM instructions
  420%
  421
  422% capture error info and fail
  423eval_fail_(Op,Env,PosIn) :-
  424	error_info(Env,PEnv,ErrorInfo),
  425	arg(2,ErrorInfo,HWM),    % exploit optimized arg/3 
  426	PosIn >= HWM,            % new high water mark ?
  427	arg(2,Env,Names),
  428	update_error_info(PEnv,@(Names,PosIn,Op)),
  429	fail.
  430
  431
  432% alt instruction
  433alt_eval([S|Ss], Env, Input, PosIn, PosOut, R) :- 
  434	eval_(S, Env, Input, PosIn, PosOut, R)                  % try S
  435	 -> true                                                % succeed, committed choice
  436	 ;  alt_eval(Ss, Env, Input, PosIn, PosOut, R).         % S failed, keep trying
  437
  438
  439% seq instruction
  440% responsible for capturing error info on failure 
  441seq_eval([], _Start, _Env, _Input, PosIn, PosIn, []).
  442seq_eval([S|Ss], Start, Env, Input, PosIn, PosOut, R) :-
  443	eval_(S, Env, Input, PosIn, PosNxt, Re),                % try S
  444	(Re == []
  445	 -> seq_eval(Ss, Start, Env, Input, PosNxt, PosOut, R)  % nil result, loop to next in sequence
  446	 ;  R = [Re|Rs],                                        % collect result
  447	    seq_eval(Ss, Start, Env, Input, PosNxt, PosOut, Rs) % loop to next in sequence 
  448	).
  449
  450
  451% rep instruction 
  452% counts for repeat a match, -1 signifies any number for Max  
  453rep_counts(sfx("?"),0, 1).
  454rep_counts(sfx("+"),1,-1).
  455rep_counts(sfx("*"),0,-1).                         % *
  456rep_counts(min(StrN),N,N) :-                       % *N
  457	number_string(N,StrN).
  458rep_counts(nums([min(StrN),max("")]),N,-1) :-      % *N..
  459	number_string(N,StrN).
  460rep_counts(nums([min(StrM),max(StrN)]),M,N) :-     % *M..N
  461	number_string(M,StrM),
  462	number_string(N,StrN).
  463
  464% repeat evaluation loop, evaluates to a list
  465repeat_eval(Max, _Min, Max, _Exp, _Env, _Input, PosIn, PosIn, []) :- !.  % terminate if C=Max
  466repeat_eval(C,    Min, Max,  Exp,  Env,  Input, PosIn, PosOut, R) :- 
  467	eval_(Exp, Env, Input, PosIn, PosN, Re),
  468	PosN > PosIn,  % expressions in loops must consume
  469	!,
  470	C1 is C+1,     % increment count
  471	(Re == []      % don't accumulate empty results
  472	 -> repeat_eval(C1, Min, Max, Exp, Env, Input, PosN, PosOut, R) 
  473	 ;  R = [Re|Rs],
  474	    repeat_eval(C1, Min, Max, Exp, Env, Input, PosN, PosOut, Rs)
  475	).
  476repeat_eval(C,    Min,_Max, _Exp, _Env, _Input, PosIn, PosIn, []) :-  % eval failed
  477	C >= Min.      % C greater than or equal Min, else fail
  478
  479
  480% pre instruction (lookahead eval_)
  481prefix_eval("&", Exp, Env, Input, PosIn, PosIn) :- %'&'
  482	eval_(Exp, Env, Input, PosIn, _PosOut, _R).       % match
  483prefix_eval("!", Exp, Env, Input, PosIn, PosIn) :- %'!'
  484	\+eval_(Exp, Env, Input, PosIn, _PosOut, _R).     % not a match
  485prefix_eval("~", Exp, Env, Input, PosIn, PosOut) :- %'~'
  486	\+eval_(Exp, Env, Input, PosIn, _PosOut, _R),     % not a match and ...
  487	\+string_length(Input,PosIn),                     % not at end of input
  488	PosOut is PosIn+1.                                % consume 1 character
  489
  490
  491% sq instruction
  492% strip outer quotes and map escapes
  493literal_match_(S,Match) :-
  494	match_chars(S,Chars),                    % convert S string to escaped Chars list
  495	string_chars(Match,Chars).               % string to match
  496
  497
  498% chars instruction
  499% construct list of MChars for matching
  500match_chars(MatchSet, MChars) :- 
  501	sub_string(MatchSet,1,_,1,Str),  % strips outer [], ", '
  502	string_chars(Str,Chars),
  503	unescape_(Chars,MChars).
  504
  505unescape_([],[]).
  506unescape_(['\\',x,C1,C2|NxtChars],[Esc|MChars]) :-
  507	char_type(C1,xdigit(V1)), char_type(C2,xdigit(V2)), !,
  508	VEsc is (V1*16+V2),
  509	char_code(Esc,VEsc),
  510	unescape_(NxtChars,MChars).
  511unescape_(['\\',u,C1,C2,C3,C4|NxtChars],[Esc|MChars]) :-
  512	char_type(C1,xdigit(V1)), char_type(C2,xdigit(V2)), char_type(C3,xdigit(V3)), char_type(C4,xdigit(V4)), !,
  513	VEsc is ((V1*16+V2)*16+V3)*16+V4,
  514	char_code(Esc,VEsc),
  515	unescape_(NxtChars,MChars).
  516unescape_(['\\','U',C1,C2,C3,C4,C5,C6,C7,C8|NxtChars],[Esc|MChars]) :-
  517	char_type(C1,xdigit(V1)), char_type(C2,xdigit(V2)), char_type(C3,xdigit(V3)), char_type(C4,xdigit(V4)), 
  518	char_type(C5,xdigit(V5)), char_type(C6,xdigit(V6)), char_type(C7,xdigit(V7)), char_type(C8,xdigit(V8)), !,
  519	VEsc is ((((((V1*16+V2)*16+V3)*16+V4)*16+V5)*16+V6)*16+V7)*16+V8,
  520	char_code(Esc,VEsc),
  521	unescape_(NxtChars,MChars).
  522unescape_(['\\',CEsc|Chars],[Esc|MChars]) :-
  523	std_escape_(CEsc,Esc), !,
  524	unescape_(Chars,MChars).
  525unescape_([Char|Chars],[Char|MChars]) :-
  526	unescape_(Chars,MChars).
  527
  528std_escape_('n','\n').
  529std_escape_('r','\r').
  530std_escape_('t','\t').
  531
  532% search for Ch in list of MChars (including ranges)
  533chars_in_match([],_Ch,In) :- In == notin.                 % EOList, succeed if 'notin'
  534chars_in_match([Cl,'-',Cu|MChars],Ch,In) :- !,            % range
  535	(Cl@=<Ch,Ch@=<Cu -> In == in ; chars_in_match(MChars,Ch,In)).
  536chars_in_match([Cl|MChars],Ch,In) :-                      % equivalence
  537	(Cl==Ch -> In == in ; chars_in_match(MChars,Ch,In)).
  538
  539
  540% id/call instruction
  541% recursive loop check - SWI Prolog specific
  542recursive_loop_check(Goal,Last,Pos,Name) :-
  543	prolog_current_frame(F),                % this frame
  544	prolog_frame_attribute(F,parent,IPF),   % caller's frame
  545	prolog_frame_attribute(IPF,parent,GPF), % caller's predecessor's frame
  546	(once(prolog_frame_attribute(GPF,parent_goal,Goal)), Last=Pos 
  547	 -> % found a parent call with identical cursor position ==> infinte recursion
  548	    peg_notrace,
  549	    format(string(Message),"pPEG infinite recursion applying ~w",[Name]),
  550	    throw(error(resource_error(Message),_))
  551	 ;  true
  552	).
  553
  554% flatten arguments and remove [] (uses difference lists)
  555flatten_([], Tl, Tl) :-
  556	!.
  557flatten_([Hd|Tl], Tail, List) :-
  558	!,
  559	flatten_(Hd, FlatHeadTail, List),
  560	flatten_(Tl, Tail, FlatHeadTail).
  561flatten_(NonList, Tl, [NonList|Tl]).
  562
  563% maps a scheme name (string) and rule name to RType, member(Treat,[anonymous,dynamic,component])
  564% %:- multifile scheme_treatment/3.          % user hook
  565
  566scheme_treatment("=",Name,Treat) :- 
  567	sub_string(Name,0,1,_,C),
  568	pPEG_type(C,Treat).
  569scheme_treatment(":",_Name,anonymous).
  570scheme_treatment(":=",_Name,component).
  571scheme_treatment("=:",_Name,leaf).
  572
  573% "=" rules, Name dependent
  574pPEG_type("_",anonymous).
  575pPEG_type(C,dynamic) :- char_type(C,lower), !.
  576pPEG_type(_,component).
  577
  578% build a ptree from a flattened list of args
  579build_ptree(anonymous,_Args,_Match,_Name,[]) :- !. 
  580build_ptree(leaf,_,slice(Input,PosIn,PosOut),Name,R) :- !,  % matching text
  581	Len is PosOut-PosIn,
  582	sub_string(Input,PosIn,Len,_,Arg),
  583	R =.. [Name,Arg].
  584build_ptree(dynamic,[],Match,Name,R) :- !,         % no args -> leaf
  585	build_ptree(leaf,_,Match,Name,R).
  586build_ptree(dynamic,[Arg],_Match,_Name,Arg) :- !,  % single arg, unwrap
  587	compound(Arg).
  588build_ptree(_RType,Arg,_Match,Name,R) :-           % general case
  589	R =.. [Name,Arg].
  590
  591
  592% extn instruction
  593% convert extension contents to callable Mod:Pred(StringArg)
  594extn_pred(S,T) :-
  595	(sub_string(S,Pos,1,_," ")                 % contains a space at Pos
  596	 -> FLen is Pos-1,                         % functor length
  597	    sub_atom(S,1,FLen,_,Pred),             % strip <
  598	    APos is Pos+1,                         % StringArg pos           
  599	    sub_string(S,APos,_,1,S1),             % also strip >    
  600	    split_string(S1,""," ",[StringArg])    % and trim whitespace from Arg 
  601	 ;  sub_atom(S,1,_,1,Pred),                % empty StringArg
  602	    StringArg = ""
  603	),
  604	(split_string(Pred,':','',[SM,SF])         % optional module specification
  605	 -> atom_string(M,SM), atom_string(F,SF),
  606	    P =.. [F,StringArg],
  607	    T = M:P
  608	 ;  T =.. [Pred,StringArg]
  609	).
  610
  611% extensions call T/6 if defined, else just a tracepoint with nothing returned
  612extn_call(T,Env,Input,PosIn,PosOut,R) :-
  613	catch(call(T,Env,Input,PosIn,PosOut,R),
  614	      Err, extn_error(Err,T,Env,Input,PosIn,PosOut,R)
  615	).
  616
  617extn_error(error(existence_error(procedure,_),_),T,_Env,Input,PosIn,PosIn,[]) :- !,
  618	sub_string(Input,PosIn,_,0,Rem),
  619	print_message(information, peg(extension(T,Rem))).
  620extn_error(Err,_T,_Env,_Input,_PosIn,_PosOut,_R) :-
  621	throw(Err).
  622
  623prolog:message(peg(extension(T,Rem))) -->  % DCG
  624	[ "Extension ~p parsing: ~p\n" - [T,Rem] ].
  625
  626%
  627% set tracing on named rules
  628%
  629peg_add_tracing([],Grammar,Grammar) :- !.  % nothing to trace
  630peg_add_tracing(TRules,Grammar,GrammarT) :-
  631	( (Grammar = [Rule|_], functor(Rule,rule,_))
  632	 -> duplicate_term(Grammar,GrammarC)   % create duplicate of optimized grammar
  633	 ;  GrammarC = Grammar                 % unoptimized case copies as needed
  634	),
  635	add_tracing(TRules,GrammarC,GrammarT).
  636
  637add_tracing([],Grammar,Grammar) :- !.
  638add_tracing([Name|Names],Grammar,GrammarT) :- !,
  639	add_tracing(Name,Grammar,NxtGrammar), 
  640	add_tracing(Names,NxtGrammar,GrammarT).
  641add_tracing(Name,Grammar,GrammarT) :-
  642	add_trace(Grammar,Name,GrammarT).
  643	
  644add_trace([],_SName,[]).
  645add_trace([rule([id(SName), def(Def), Exp])|Rules], Name, 
  646          [rule([id(SName), trace(rule(AName,Treat,Exp))])|Rules]) :-
  647	nonvar(Exp),              % must be defined
  648	atom_string(AName,SName), % SName and Name equivalent to AName
  649	atom_string(AName,Name),
  650	scheme_treatment(Def,SName,Treat),
  651	!.
  652add_trace([Rule|Rules], Name, [Rule|Rules]) :-
  653	Rule = rule(AName, Treat, Exp),  % optimized Rule
  654	nonvar(Exp),              % must be defined
  655	atom_string(AName,Name),  % name matches
  656	!,
  657	% overwrite expression in place so all call references persist
  658	setarg(3,Rule,trace(rule(AName,Treat,Exp))).
  659add_trace([Rule|Rules], Name, [Rule|RulesT]) :-
  660	add_trace(Rules, Name, RulesT).
  661
  662%
  663% enable/disable tracing (from trace instruction)
  664%
  665peg_trace :-
  666	debug(pPEG(trace)),
  667	wrap_predicate(pPEG:eval_(Inst, Env, Input, PosIn, PosOut, R), 
  668	               'pPEG:eval_',
  669	               Wrapped,
  670	               doEval_wrap_(Wrapped, Inst, Env, Input, PosIn, PosOut, R)).
  671
  672peg_notrace :-
  673	(debugging(pPEG(trace),true)
  674	 -> unwrap_predicate(pPEG:eval_/6, 'pPEG:eval_'),
  675	    nodebug(pPEG(trace))
  676	 ;  true
  677	).
  678
  679doEval_wrap_(Wrapped, Inst, Env, Input, PosIn, PosOut, R) :-
  680	peg_inst_type(Inst,Type),
  681	vm_instruction(Inst,TInst),
  682	persistent_env_(Env,PEnv),
  683	peg_trace_port_(Type, call, TInst, PEnv, Input, PosIn, PosOut, R),	
  684	(Wrapped   % execute eval_/6
  685	 -> peg_trace_port_(Type, exit, TInst, PEnv, Input, PosIn, PosOut, R)
  686	 ;  peg_trace_port_(Type, fail, TInst, PEnv, Input, PosIn, PosOut, R),
  687	    fail
  688	).
  689
  690peg_trace_port_(call, call, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
  691	peg_cursor_pos(Input,PosIn,Cursor),
  692	peg_trace_msg(postInc, PEnv, "~w~w~w", [Cursor,TInst]).            % with indent parm
  693peg_trace_port_(call, fail, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
  694	peg_cursor_pos(Input,PosIn,Cursor),
  695	peg_trace_input(Input,PosIn,Str),
  696	peg_trace_msg(preDec, PEnv, "~w~w~w != \t~p", [Cursor,TInst,Str]). % with indent parm
  697peg_trace_port_(call, exit, TInst, PEnv, Input, PosIn, PosOut, R) :- !,
  698	peg_cursor_pos(Input,PosOut,Cursor),		
  699	(R = []  % if null result (_rule), print matching string
  700	 -> Len is PosOut-PosIn,
  701	    sub_string(Input,PosIn,Len,_,RT) 
  702	 ;  RT = R
  703	),
  704	(string(RT) -> MatchOp = "==" ; MatchOp = "=>"),
  705	peg_trace_msg(preDec, PEnv, "~w~w~w ~w \t~p", [Cursor,TInst,MatchOp,RT]). % with indent parm
  706peg_trace_port_(meta, call, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
  707	peg_cursor_pos(Input,PosIn,Cursor),
  708	peg_trace_msg(indent, PEnv, "~w~w~w", [Cursor,TInst]).             % with indent parm
  709peg_trace_port_(terminal, fail, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
  710	peg_cursor_pos(Input,PosIn,Cursor),
  711	peg_trace_input(Input,PosIn,Str),
  712	peg_trace_msg(indent, PEnv, "~w~w~w != \t~p", [Cursor,TInst,Str]). % with indent parm
  713peg_trace_port_(terminal, exit, TInst, PEnv, Input, PosIn, PosOut, _R) :- !,
  714	peg_cursor_pos(Input,PosOut,Cursor),
  715	Len is PosOut-PosIn,
  716	sub_string(Input,PosIn,Len,_,RT),
  717	peg_trace_msg(indent, PEnv, "~w~w~w == \t~p", [Cursor,TInst,RT]).  % with indent parm
  718peg_trace_port_(_Other, _, _, _, _, _, _, _).  % else no trace message
  719
  720peg_inst_type(alt(_),meta).
  721peg_inst_type(seq(_),meta).
  722peg_inst_type(pre(_),call).
  723peg_inst_type(rep(_),meta).
  724peg_inst_type(rep_O(_,_,_),meta).
  725peg_inst_type(quote(_),terminal). 
  726peg_inst_type(quote_O(_,_),terminal).
  727peg_inst_type(class(_),terminal).
  728peg_inst_type(class_O(_,_),terminal).
  729peg_inst_type(dot(_),terminal).
  730peg_inst_type(extn(_),terminal).
  731peg_inst_type(id(_),notrace).               % not traced, caught in call_O
  732peg_inst_type(call_O(Rule),Type) :-  % don't trace calls which are explicitly traced
  733	arg(3,Rule,trace(_)) -> Type = notrace ; Type = call.
  734peg_inst_type(trace(_),notrace).            % not traced, caught in call_O
  735
  736peg_cursor_pos(Input,Pos,Cursor) :-
  737	string_length(Input,InputLen),                             % Pos may be past Input length
  738	StartPos is min(Pos,InputLen-1),
  739	peg_line_pos(Input,StartPos,0,1,_Text,LinePos,LineNo),     % source text information
  740	CPos is LinePos +1,                                        % cursor position is 1 based
  741	format(string(Cursor),"~` t~d~4+.~d~4+",[LineNo,CPos]).    % more format tab magic
  742
  743peg_line_pos("",_Pos,_LinePos,LineNum,"",0,LineNum) :- !.      % corner case: empty string
  744peg_line_pos(Input,Pos,LinePos,LineNum,Text,EPos,ELineNum) :-  % assumes Pos has been range checked
  745	% Note: could use a pPEG for line matching, but this avoids re-entrant issues with globalvars
  746	re_matchsub("[^\n\r]*(\n|\r\n?)?",Input,Match,[start(LinePos)]),  % match a line
  747	string_length(Match.0,Len),
  748	NxtLinePos is LinePos+Len,
  749	((LinePos =< Pos,Pos < NxtLinePos)                 % Pos is within this line?
  750	 -> string_concat(Text,Match.get(1,""),Match.0),   % yes
  751	    EPos is Pos-LinePos,
  752	    ELineNum = LineNum
  753	 ;  NxtLineNum is LineNum+1,                       % no
  754	    peg_line_pos(Input,Pos,NxtLinePos,NxtLineNum,Text,EPos,ELineNum) 
  755	).
  756
  757peg_trace_input(Input,PosIn,Str) :-
  758	sub_string(Input,PosIn,L,0,SStr),          % current residue
  759	(L =< 32
  760	 -> Str = SStr
  761	  ; sub_string(SStr,0,32,_,SStr1),
  762	    string_concat(SStr1," ... ",Str)
  763	).
  764
  765peg_trace_msg(postInc, PEnv, Msg, [Cursor|Args]) :-
  766	arg(2,PEnv,Indent),
  767	debug_peg_trace(Msg,[Cursor,Indent|Args]),
  768	string_concat(Indent,"|  ",NxtIndent),      % add "|  " to current indent
  769	nb_linkarg(2,PEnv,NxtIndent).
  770peg_trace_msg(preDec, PEnv, Msg, [Cursor|Args]) :-
  771	arg(2,PEnv,Indent),
  772	sub_string(Indent,0,_,3,NxtIndent),         % subtract 3 chars from end of current indent
  773	debug_peg_trace(Msg,[Cursor,NxtIndent|Args]),
  774	nb_linkarg(2,PEnv,NxtIndent).
  775peg_trace_msg(indent, PEnv, Msg, [Cursor|Args]) :-
  776	arg(2,PEnv,Indent),
  777	debug_peg_trace(Msg,[Cursor,Indent|Args]).
  778
  779%
  780% de-compile VM instructions of interest, used for tracing and error messages
  781%
  782vm_instruction(id(Name), Name).
  783vm_instruction(call_O(Var), "??undefined rule??") :- var(Var),!.  % for rule_body undefined
  784vm_instruction(call_O(rule(Name,_Treat,_Exp)), Name). 
  785vm_instruction(seq(Exps), Is) :-
  786	vm_instruction_list(Exps,LIs),
  787	atomics_to_string(LIs," ",Is0),
  788	atomics_to_string(["(",Is0,")"],Is).
  789vm_instruction(alt(Exps), Is) :-
  790	vm_instruction_list(Exps,LIs),
  791	atomics_to_string(LIs," / ",Is0),
  792	atomics_to_string(["(",Is0,")"],Is).
  793vm_instruction(rep([Exp, Sfx]), Is) :-
  794	vm_rep_sfx(Sfx,ROp), !,
  795	vm_instruction(Exp,I),
  796	string_concat(I,ROp,Is).
  797vm_instruction(rep_O(Exp, Min, Max), Is) :-
  798	rep_counts(Sfx, Min, Max), !,
  799	vm_instruction(rep([Exp, Sfx]), Is).
  800vm_instruction(pre([pfx(Chs),Exp]), Is) :-
  801	vm_instruction(Exp,I),
  802	string_concat(Chs,I,Is).
  803vm_instruction(quote(Match), Is) :-
  804	unescape_std(Match,Is).
  805vm_instruction(quote_O(Case,Match), Is) :-
  806	(Case = exact -> Sens = "" ; Sens = "i"),
  807	unescape_std(Match,S1),
  808	unescape_string(S1,"'","\\u0027",S),
  809	atomics_to_string(["'",S,"'",Sens],Is).
  810vm_instruction(class(Match), Is) :-
  811	unescape_std(Match,Is).
  812vm_instruction(class_O(In,MChars), Is) :-
  813	(In = notin -> Pfx = '~' ; Pfx = ''),
  814	string_chars(MStr,MChars),
  815	unescape_std(MStr,S),
  816	unescape_string(S,"]","\\u005d",S1),
  817	atomics_to_string([Pfx,"[",S1,"]"],Is).
  818vm_instruction(dot(_), ".").
  819vm_instruction(extn(Ext), Is) :-
  820	(string(Ext)
  821	 -> Is = Ext                              % native string format
  822	 ;  (Ext = Mod:Pred
  823	     -> Pred =.. [Func,StringArg],        % module qualified predicate
  824	        atomics_to_string(['<',Mod,':',Func,' ',StringArg,'>'],Is)
  825	     ;  Ext =.. [Func,StringArg],         % plain predicate
  826	        atomics_to_string(['<',Func,' ',StringArg,'>'],Is)
  827	    )
  828	).
  829vm_instruction(trace(Rule), Is) :-
  830	vm_instruction(call_O(Rule), Is).
  831
  832vm_instruction_list([],[]).
  833vm_instruction_list([Exp|Exps],[Is|LIs]) :-
  834	vm_instruction(Exp,Is),
  835	vm_instruction_list(Exps,LIs).
  836
  837vm_rep_sfx(sfx(ROp), ROp).
  838vm_rep_sfx(num(StrN), ROp) :-                      atomics_to_string(["*",StrN],ROp).
  839vm_rep_sfx(range([num(StrN),_]), ROp) :-           atomics_to_string(["*",StrN,".."],ROp).
  840vm_rep_sfx(range([num(StrM),_,num(StrN)]), ROp) :- atomics_to_string(["*",StrM,"..",StrN],ROp).
  841
  842unescape_string(Sin,Esc,Usc,Sout) :-
  843	split_string(Sin,Esc,"",L),
  844	atomics_to_string(L,Usc,Sout).
  845
  846unescape_std(Sin,Sout) :-
  847	string_chars(Sin,CharsIn),
  848	escape_chars(CharsIn,CharsOut),
  849	string_chars(Sout,CharsOut).
  850
  851escape_chars([],[]).
  852escape_chars([C|CharsIn],[C|CharsOut]) :-
  853	char_code(C,CS), between(32,126,CS), !,     % ASCII
  854	escape_chars(CharsIn,CharsOut).
  855escape_chars([ECh|CharsIn],['\\',Ch|CharsOut]) :- 
  856	std_escape_(Ch,ECh),!,                      % escapes
  857	escape_chars(CharsIn,CharsOut).
  858escape_chars([C|CharsIn],['\\','u',X1,X2,X3,X4|CharsOut]) :-
  859	char_code(C,CS), % (CS =< 31 ; CS >= 127),  % outside ASCII, but not std escape
  860	divmod(CS,16,Q4,R4),
  861	divmod(Q4,16,Q3,R3),
  862	divmod(Q3,16,R1,R2),
  863	char_type(X1,xdigit(R1)), char_type(X2,xdigit(R2)), char_type(X3,xdigit(R3)), char_type(X4,xdigit(R4)),
  864	escape_chars(CharsIn,CharsOut).
  865
  866%
  867% optimizing compiler for use with peg_compile
  868% normally takes unoptimized ptree as input, but it's idempotent
  869% produces an optimized grammar object which is faster but not a ptree
  870%
  871optimize_peg('Peg'(Rules),'Peg'(RulesO,RRefs)) :-
  872	(optimize_rules(Rules,RDefs,RulesO)
  873	 -> once(length(RDefs,_)),         % make indefinite list definite
  874	    chk_RDefs(RulesO,RDefs,RRefs)  % must be done after optimize so as to not corrupt refs
  875	 ;  (Rules = [rule([id(GName),_,_])|_Rules] -> true ; GName = "?unknown?"),
  876	    print_message(warning,peg(optimize_fail(GName))),  % ensures failure msg               
  877	    fail
  878	 ).
  879
  880chk_RDefs([],RDefs,[]) :-
  881	forall(member(Name:_,RDefs), print_message(warning, peg(undefined(Name)))).
  882chk_RDefs([rule(PName,_,_)|Rules],RDefs,[_|RRefs]) :-
  883	memberchk(rule(PName,_,_),Rules), !,              % check for duplicates
  884	print_message(warning,peg(duplicate(PName))),     % found, later rules overwrite                
  885	chk_RDefs(Rules,RDefs,RRefs).
  886chk_RDefs([rule(PName,_,_)|Rules],RDefs,[RRef|RRefs]) :-
  887	atom_string(PName,Name),
  888	remove_def(RDefs,Name,RRef,NxtRDefs),
  889	chk_RDefs(Rules,NxtRDefs,RRefs).
  890
  891remove_def([],_Name,_RRef,[]).
  892%	print_message(warning, peg(unreferenced(Name))).
  893remove_def([Name:RRef|RDefs],Name,RRef,RDefs) :- !.
  894remove_def([RDef|RDefs],Name,RRef,[RDef|NxtRDefs]) :-
  895	remove_def(RDefs,Name,RRef,NxtRDefs).
  896
  897prolog:message(peg(duplicate(Name))) -->  % DCG
  898	[ "pPEG: duplicate rule ~w, last definition will apply" - [Name] ].
  899
  900prolog:message(peg(optimize_fail(GName))) -->  % DCG
  901	[ "pPEG: grammar ~w optimization failed" - [GName] ].
  902
  903optimize_rules([],_RDefs,[]).
  904optimize_rules([Rule|Rules],RDefs,[RuleO|RulesO]) :-
  905	optimize_rule(Rule,RDefs,RuleO),
  906	optimize_rules(Rules,RDefs,RulesO).
  907
  908optimize_rule(rule([id(Name), def(Def), Exp]), RDefs, rule(PName,Treat,ExpO)) :- !, % unoptimized rule 
  909	atom_string(PName,Name),  % optimised rule name is atom for building ptrees
  910	scheme_treatment(Def,Name,Treat),
  911	optimize_exp(Exp, RDefs, ExpO).
  912optimize_rule(rule(Name,Treat,Exp), _RDefs, rule(Name,Treat,Exp)).  % already optimized?
  913
  914optimize_exp(id(Name), RDefs, call_O(Rule)) :-          % id(Name) ==> call_O(Rule)
  915	memberchk(Name:Rule, RDefs).
  916
  917optimize_exp(seq(Ins), RDefs, seq(Opt)) :-
  918	optimize_exp_list(Ins,RDefs,Opt).
  919
  920optimize_exp(alt(Ins), RDefs, alt(Opt)) :-
  921	optimize_exp_list(Ins,RDefs,Opt).
  922
  923optimize_exp(rep([Exp, ROp]), RDefs, rep_O(ExpO, Min, Max)) :-
  924	rep_counts(ROp,Min,Max), !,
  925	optimize_exp(Exp,RDefs,ExpO).
  926
  927optimize_exp(pre([pfx("~"), class(MatchSet)]), RDefs, class_O(notin,MChars)) :- !,
  928	optimize_exp(class(MatchSet), RDefs, class_O(_,MChars)).
  929optimize_exp(pre([pfx(POp), Exp]), RDefs, pre([pfx(POp), ExpO])) :-
  930	optimize_exp(Exp,RDefs,ExpO).
  931
  932optimize_exp(class(MatchSet), _RDefs, class_O(in,MChars)) :- 
  933	match_chars(MatchSet, MChars).
  934
  935optimize_exp(quote(QS), _RDefs, quote_O(Case,Match)) :-
  936	(sub_string(QS,_,1,0,"i")                  % case insensitive match test
  937	 -> Case = upper,
  938	    sub_string(QS,0,_,1,S),                % strip i
  939	    literal_match_(S,AMatch),              % string to match
  940	    string_upper(AMatch,Match)
  941	 ;  Case = exact,
  942	    literal_match_(QS,Match)               % string to match
  943	).
  944
  945optimize_exp(dot(D), _RDefs, dot(D)).          % nothing to optimize
  946
  947optimize_exp(extn(S), _RDefs, extn(T)) :-      % extn "instruction"
  948	(string(S) -> extn_pred(S,T) ; T = S).
  949
  950optimize_exp(call_O(Rule), _RDefs, call_O(Rule)).                 % already optimized?
  951optimize_exp(rep_O(Exp, Min, Max), _RDefs, rep_O(Exp, Min, Max)). % already optimized?
  952optimize_exp(quote_O(C,M), _RDefs, quote_O(C,M)).                       % already optimized?
  953optimize_exp(class_O(M), _RDefs, class_O(M)).                         % already optimized?
  954% Note: trace instructions don't appear in static grammar.
  955
  956optimize_exp_list([],_RDefs,[]).
  957optimize_exp_list([Exp|Exps],RDefs,[ExpO|ExpOs]) :-
  958	optimize_exp(Exp,RDefs,ExpO),
  959	optimize_exp_list(Exps,RDefs,ExpOs).
  960
  961%
  962% time to initialize...
  963%
  964:- initialization(init_peg,now).