26:- module(pPEG,[ 27 peg_compile/2, 28 peg_compile/3, 29 peg_parse/3, 30 peg_parse/5, 31 peg_grammar/1, 32 peg_lookup_previous/3, 33 pPEG/4 34 ]). 35
36version("2.1.0").
37
38:- version(pPEG(versionInfo)). 39
40:- use_module(library(strings),[string/4]). 41:- use_module(library(lists),[reverse/2]). 42:- use_module(library(prolog_wrap)). 43:- use_module(library(option),[option/3]). 44:- use_module(library(pcre),[re_matchsub/4]). 45:- use_module(library(quasi_quotations), [ 46 quasi_quotation_syntax/1,
47 with_quasi_quotation_input/3
48 ]). 49
50%
51% the "standard" pPEG grammar source for bootstrapping and reference, e.g.,
52% ?- peg_grammar(S), write_term(S,[]).
53%
54peg_grammar({|string||
55 Peg = _ rule+
56 rule = id _ def _ alt
57 def = '=' ':'? / ':' '='?
58
59 alt = seq ('/'_ seq)*
60 seq = rep+
61 rep = pre sfx? _
62 pre = pfx? term
63 term = call / quote / class / dot /group / extn
64
65 group = '('_ alt ')'
66 call = id _ !def
67 id = [a-zA-Z_] [a-zA-Z0-9_-]*
68 pfx = [&!~]
69 sfx = [+?] / '*' nums?
70 nums = min ('..' max)?
71 min = [0-9]+
72 max = [0-9]*
73 quote = ['] ~[']* ['] 'i'?
74 class = '[' ~']'* ']'
75 dot = '.'
76 extn = '<' ~'>'* '>'
77 _ = ([ \t\n\r]+ / '#' ~[\n\r]* )*
78
79|}).
80
84boot_grammar_def('Peg'([
85 rule([id("Peg"), def("="), seq([id("_"), rep([id("rule"), sfx("+")]), id("_")])]),
86 rule([id("rule"), def("="), seq([id("id"), id("_"), id("def"), id("_"), id("alt")])]),
87 rule([id("def"), def("="), quote("'='")]),
88
89 rule([id("alt"), def("="), seq([id("seq"), rep([seq([quote("'/'"), id("_"), id("seq")]), sfx("*")])])]),
90 rule([id("seq"), def("="), rep([id("rep"), sfx("*")])]),
91 rule([id("rep"), def("="), seq([id("pre"), rep([id("sfx"), sfx("?")]), id("_")])]),
92 rule([id("pre"), def("="), seq([rep([id("pfx"), sfx("?")]), id("term")])]),
93 rule([id("term"), def("="), alt([id("call"), id("quote"), id("class"), id("group")])]),
94
95 rule([id("group"), def("="), seq([quote("'('"), id("_"), id("alt"), quote("')'")])]),
96 rule([id("pfx"), def("="), class("[&!~]")]),
97 rule([id("sfx"), def("="), class("[+?*]")]),
98
99 rule([id("call"), def("="), seq([id("id"), id("_"), pre([pfx("!"), id("def")])])]),
100 rule([id("id"), def("="), seq([class("[a-zA-Z_]"), rep([class("[a-zA-Z0-9_-]"), sfx("*")])])]),
101 rule([id("quote"), def("="), seq([class("[']"), rep([pre([pfx("~"), class("[']")]), sfx("*")]), class("[']"), rep([quote("'i'"), sfx("?")])])]),
102 rule([id("class"), def("="), seq([quote("'['"), rep([pre([pfx("~"), quote("']'")]), sfx("*")]), quote("']'")])]),
103 rule([id("_"), def("="), rep([alt([seq([quote("'#'"), rep([pre([pfx("~"), class("[\\n\\r]")]), sfx("*")])]), rep([class("[ \\t\\n\\r]"), sfx("+")])]), sfx("*")])])
104], _)).
105
109:-set_prolog_flag(optimise,false). 110
112debug_peg_trace(FString,Args) :- debug(pPEG(trace),FString,Args).
113
114:-set_prolog_flag(optimise,true). 115
117init_peg :-
118 foreach((nb_current(Key,_), atom_concat('pPEG:',_,Key)), nb_delete(Key)), 119 nodebug(pPEG(trace)), 120 bootstrap_grammar. 121
122user:exception(undefined_global_variable,'pPEG:$pPEG',retry) :-
123 bootstrap_grammar. 124
125bootstrap_grammar :-
126 boot_grammar_def(BootPeg), 127 nb_setval('pPEG:$pPEG',BootPeg),
128 peg_grammar(PegSrc),
129 peg_compile(PegSrc,pPEG,[optimise(true)]). 130
134:- quasi_quotation_syntax(pPEG). 135
136pPEG(Content, Args, _Binding, Grammar) :-
137 with_quasi_quotation_input(Content, Stream, read_string(Stream, _, String)),
138 peg_compile(String,Grammar,Args). 139
143peg_compile(Src, GrammarSpec) :- 144 peg_compile(Src, GrammarSpec, []).
145
146peg_compile(Src, GrammarSpec, OptionList) :- 147 peg_parse(pPEG, Src, Ptree, _, OptionList),
148 option_value(optimise(Opt),OptionList,true),
149 make_grammar(Opt,Ptree,Grammar),
150 (Grammar = GrammarSpec
151 -> true 152 ; (atom(GrammarSpec) 153 -> atomic_concat('pPEG:$',GrammarSpec,GKey),
154 nb_setval(GKey,Grammar)
155 ; current_prolog_flag(verbose,GVrbse),
156 option_value(verbose(Vrbse),OptionList,GVrbse), 157 peg_fail_msg(peg(argError('GrammarSpec',GrammarSpec)),Vrbse)
158 )
159 ).
160
161make_grammar(true,Ptree,Grammar) :- !, 162 optimize_peg(Ptree,Grammar).
163make_grammar(_,'Peg'(Rules),'Peg'(Rules,_)). 164
169peg_parse(GrammarSpec, Input, Result) :-
170 peg_parse(GrammarSpec, Input, Result, _Residue, []).
171
172peg_parse(GrammarSpec, Input, Result, Residue, OptionList) :-
173 174 option_value(incomplete(Incomplete),OptionList,false), 175 option_value(tracing(TRules),OptionList,[]), 176 current_prolog_flag(verbose,GVrbse),
177 option_value(verbose(Vrbse),OptionList,GVrbse), 178 peg_setup_parse_(GrammarSpec,Input,Vrbse,TRules,Env,Eval), 179 (eval_(Eval, Env, Input, 0, PosOut, Result0) 180 -> (Result0 = [] -> sub_string(Input,0,PosOut,_,Result) ; Result = Result0) 181 ; error_info(Env,_,ErrorInfo), 182 peg_fail_msg(peg(errorinfo(Input,ErrorInfo,"")),Vrbse) 183 ),
184 (string_length(Input,PosOut) 185 -> Residue = "" 186 ; (Incomplete == true 187 -> sub_string(Input,PosOut,_,0,Residue) 188 ; \+eval_(Eval, Env, Input, PosOut, _, _), 189 error_info(Env,_,ErrorInfo), 190 peg_fail_msg(peg(errorinfo(Input,ErrorInfo,"fell short, (?)")),Vrbse) 191 )
192 ).
193
194option_value(Option, Options, Default) :-
195 (Options = []
196 -> arg(1,Option,Default) 197 ; option(Option, Options, Default) 198 ).
199
200peg_setup_parse_(GrammarSpec,Input,Vrbse,TRules,@(Grammar,[],0,([],[]),PEnv),Eval) :-
201 (string(Input)
202 -> true
203 ; peg_fail_msg(peg(argError('Input',Input)),Vrbse)
204 ),
205 (copy_term(GrammarSpec,'Peg'(Grammar0,Grammar0)) 206 -> true
207 ; 208 (atom(GrammarSpec), atomic_concat('pPEG:$',GrammarSpec,GKey), nb_current(GKey,'Peg'(Grammar0,Grammar0))
209 -> true
210 ; peg_fail_msg(peg(argError('Grammar',GrammarSpec)),Vrbse)
211 )
212 ),
213 peg_add_tracing(TRules,Grammar0,Grammar), 214 (Vrbse == normal 215 -> PEnv = (@([GName],0,id(GName)), "") 216 ; PEnv = (@(), "")
217 ),
218 Grammar = [FirstRule|_], 219 (FirstRule = rule([Eval|_]) 220 -> Eval = id(GName) 221 ; Eval = call_O(FirstRule), 222 arg(1,FirstRule,GName)
223 ).
224
228goal_expansion(persistent_env_(Env,PEnv), arg(5,Env,PEnv)). 229goal_expansion(grammar_(Env,Rules), arg(1,Env,Rules)). 230goal_expansion(error_info(Env,PEnv,ErrorInfo), 231 (persistent_env_(Env,PEnv), arg(1,PEnv,ErrorInfo))
232 ).
233goal_expansion(update_error_info(PEnv,ErrorInfo), 234 nb_linkarg(1,PEnv,ErrorInfo)
235 ).
237grammar_(Env,Rules) :- arg(1,Env,Rules).
238context_(Env,Ctxt) :- arg(4,Env,Ctxt).
239persistent_env_(Env,PEnv) :- arg(5,Env,PEnv).
240error_info(Env,PEnv,ErrorInfo) :- persistent_env_(Env,PEnv), arg(1,PEnv,ErrorInfo).
241update_error_info(PEnv,ErrorInfo) :- nb_linkarg(1,PEnv,ErrorInfo).
242
244peg_fail_msg(Msg, normal) :- 245 print_message(informational, Msg),
246 fail.
247
248:- multifile prolog:message/1. 249
250prolog:message(pPEG(versionInfo)) -->
251 { version(Version) },
252 [ '*** pPEG v~w ***.'-[Version] ].
253
254prolog:message(peg(argError(Arg,Value))) --> 255 [ "pPEG Error: invalid argument, ~w = ~w" - [Arg,Value] ].
256
257prolog:message(peg(errorinfo(Input,@(Names,Pos,Op),Etext))) --> 258 {reverse(Names,Path),
259 atomics_to_string(Path,' -> ',PathExp),
260 string_length(Input,InputLen), 261 (Pos == InputLen -> Caret = CPos ; Caret = EPos), 262 StartPos is min(Pos,InputLen-1),
263 peg_line_pos(Input,StartPos,0,1,Text,EPos,ELineNum), 264 CPos is EPos+1, 265 vm_instruction(Op,Expected)
266 },
267 268 [ 'pPEG Error: ~w failed.\n% ~wexpected ~w at line ~w.~w:\n% ~|~` t~d~3+ | ~w\n% ~|~` t~3+ ~|~` t~*+^'
269 - [PathExp,Etext,Expected,ELineNum,CPos,ELineNum,Text,Caret]
270 ].
271
272prolog:message(peg(undefined(RuleName))) --> 273 [ 'pPEG: ~w undefined' - [RuleName] ]. 274
275prolog:message(peg(scheme(RuleName,Scheme))) --> 276 [ 'pPEG: ~w ~w ... scheme undefined' - [RuleName,Scheme] ].
277
281peg_lookup_previous(Name,Env,Match) :-
282 arg(4,Env,Ctxt), 283 (var(Name)
284 -> lookup_match_(Ctxt,RName,Match), 285 atom_string(RName,Name)
286 ; atom_string(RName,Name), 287 lookup_match_(Ctxt,RName,Match)
288 ).
289
290lookup_match_((Matches,Parent),Name,Match) :-
291 (memberchk((Name,slice(Input,PosIn,PosOut)),Matches)
292 -> Len is PosOut-PosIn, 293 sub_string(Input,PosIn,Len,_,Match)
294 ; lookup_match_(Parent,Name,Match) 295 ).
296
300eval_(id(Name), Env, Input, PosIn, PosOut, R) :- 301 atom_string(PName,Name), 302 arg(1,Env,Grammar), 303 (memberchk(rule([id(Name), def(Def), Exp]), Grammar) 304 -> scheme_treatment(Def,Name,Treat), 305 eval_(call_O(rule(PName,Treat,Exp)), Env, Input, PosIn, PosOut, R) 306 ; print_message(warning, peg(undefined(PName))), 307 fail
308 ).
309
310eval_(alt(Ss), Env, Input, PosIn, PosOut, R) :- 311 error_info(Env,PEnv,ErrorInfo),
312 (alt_eval(Ss, Env, Input, PosIn, PosOut, R)
313 -> update_error_info(PEnv,ErrorInfo) 314 ; error_info(Env,_,FErrorInfo), 315 arg(2,FErrorInfo,FHWM),
316 PosIn == FHWM, 317 eval_fail_(alt(Ss),Env,PosIn) 318 ).
319
320eval_(seq(Ss), Env, Input, PosIn, PosOut, R) :- 321 seq_eval(Ss, PosIn, Env, Input, PosIn, PosOut, R).
322
323eval_(rep([Exp, ROp]), Env, Input, PosIn, PosOut, R) :- 324 rep_counts(ROp,Min,Max), !, 325 eval_(rep_O(Exp, Min, Max), Env, Input, PosIn, PosOut, R).
326
327eval_(pre([pfx(POp), Exp]), Env, Input, PosIn, PosOut, []) :- 328 error_info(Env,PEnv,ErrorInfo),
329 (prefix_eval(POp, Exp, Env, Input, PosIn, PosOut) 330 -> update_error_info(PEnv,ErrorInfo) 331 ; update_error_info(PEnv,ErrorInfo), 332 eval_fail_(pre([pfx(POp), Exp]),Env,PosIn) 333 ).
334
335eval_(quote(S), Env, Input, PosIn, PosOut, []) :- 336 (sub_string(S,_,1,0,"i") 337 -> sub_string(S,0,_,1,S1), 338 literal_match_(S1,SMatch), 339 string_upper(SMatch,UMatch),
340 string_length(SMatch,Len),
341 sub_string(Input,PosIn,Len,_,Match),
342 string_upper(Match,UMatch) 343 ; literal_match_(S,Match), 344 sub_string(Input,PosIn,Len,_,Match) 345 ) -> PosOut is PosIn+Len ; eval_fail_(quote(S),Env,PosIn).
346
347eval_(class(MatchSet), Env, Input, PosIn, PosOut, []) :- 348 match_chars(MatchSet,MChars), 349 eval_(class_O(in,MChars), Env, Input, PosIn, PosOut, []).
350
351eval_(dot(D), Env, Input, PosIn, PosOut, []) :-
352 string_length(Input, Len),
353 (PosIn < Len 354 -> PosOut is PosIn+1 355 ; eval_fail_(dot(D),Env,PosIn)
356 ).
357
358eval_(extn(S), Env, Input, PosIn, PosOut, R) :- 359 (string(S) -> extn_pred(S,T) ; T = S), 360 (extn_call(T,Env,Input,PosIn,PosOut,R) -> true ; eval_fail_(extn(S),Env,PosIn)).
361
363eval_(call_O(rule(Name, Treat, Exp)), @(Grammar,Names,Dep,Ctxt,PEnv), Input, PosIn, PosOut, R) :- 364 365 nonvar(Exp), 366 Dep1 is Dep+1, 367 368 (Dep1 >= 64 369 -> recursive_loop_check(eval_(call_O(rule(Name,_,_)),_,_,P,_,_),P,PosIn,Name)
370 ; true
371 ),
372 eval_(Exp, @(Grammar,[Name|Names],Dep1,([],Ctxt),PEnv), Input, PosIn, PosOut, Res), 373 (Exp = trace(_)
374 -> R = Res 375 ; Match = slice(Input,PosIn,PosOut), 376 377 arg(1,Ctxt,Matches), setarg(1,Ctxt,[(Name,Match)|Matches]),
378 flatten_(Res,[],RRs), 379 build_ptree(Treat,RRs,Match,Name,R) 380 ).
381
382eval_(rep_O(Exp, Min, Max), Env, Input, PosIn, PosOut, R) :- 383 repeat_eval(0, Min, Max, Exp, Env, Input, PosIn, PosOut, R).
384
385eval_(quote_O(Case,Match), Env, Input, PosIn, PosOut, []) :- 386 (Case == exact
387 -> sub_string(Input,PosIn,Len,_,Match) 388 ; 389 string_length(Match,Len),
390 sub_string(Input,PosIn,Len,_,S),
391 string_upper(S,Match)
392 ) -> PosOut is PosIn+Len ; eval_fail_(quote_O(Case,Match),Env,PosIn).
393
394eval_(class_O(In,MChars), Env, Input, PosIn, PosOut, []) :- 395 (sub_atom(Input, PosIn, 1, _, R), 396 chars_in_match(MChars,R,In) 397 -> PosOut is PosIn+1 398 ; eval_fail_(class_O(In,MChars),Env,PosIn)
399 ).
400
401eval_(trace(Rule), Env, Input, PosIn, PosOut, R) :- 402 403 (debugging(pPEG(trace),true)
404 -> eval_(call_O(Rule),Env,Input,PosIn,PosOut,R) 405 ; current_prolog_flag(debug,DF), 406 peg_trace, 407 persistent_env_(Env,PEnv),
408 nb_linkarg(2,PEnv," "), 409 (eval_(call_O(Rule),Env,Input,PosIn,PosOut,R) 410 -> peg_notrace, 411 set_prolog_flag(debug,DF) 412 ; peg_notrace, 413 set_prolog_flag(debug,DF), 414 fail
415 )
416 ).
417
421
423eval_fail_(Op,Env,PosIn) :-
424 error_info(Env,PEnv,ErrorInfo),
425 arg(2,ErrorInfo,HWM), 426 PosIn >= HWM, 427 arg(2,Env,Names),
428 update_error_info(PEnv,@(Names,PosIn,Op)),
429 fail.
430
431
433alt_eval([S|Ss], Env, Input, PosIn, PosOut, R) :-
434 eval_(S, Env, Input, PosIn, PosOut, R) 435 -> true 436 ; alt_eval(Ss, Env, Input, PosIn, PosOut, R). 437
438
441seq_eval([], _Start, _Env, _Input, PosIn, PosIn, []).
442seq_eval([S|Ss], Start, Env, Input, PosIn, PosOut, R) :-
443 eval_(S, Env, Input, PosIn, PosNxt, Re), 444 (Re == []
445 -> seq_eval(Ss, Start, Env, Input, PosNxt, PosOut, R) 446 ; R = [Re|Rs], 447 seq_eval(Ss, Start, Env, Input, PosNxt, PosOut, Rs) 448 ).
449
450
453rep_counts(sfx("?"),0, 1).
454rep_counts(sfx("+"),1,-1).
455rep_counts(sfx("*"),0,-1). 456rep_counts(min(StrN),N,N) :- 457 number_string(N,StrN).
458rep_counts(nums([min(StrN),max("")]),N,-1) :- 459 number_string(N,StrN).
460rep_counts(nums([min(StrM),max(StrN)]),M,N) :- 461 number_string(M,StrM),
462 number_string(N,StrN).
463
465repeat_eval(Max, _Min, Max, _Exp, _Env, _Input, PosIn, PosIn, []) :- !. 466repeat_eval(C, Min, Max, Exp, Env, Input, PosIn, PosOut, R) :-
467 eval_(Exp, Env, Input, PosIn, PosN, Re),
468 PosN > PosIn, 469 !,
470 C1 is C+1, 471 (Re == [] 472 -> repeat_eval(C1, Min, Max, Exp, Env, Input, PosN, PosOut, R)
473 ; R = [Re|Rs],
474 repeat_eval(C1, Min, Max, Exp, Env, Input, PosN, PosOut, Rs)
475 ).
476repeat_eval(C, Min,_Max, _Exp, _Env, _Input, PosIn, PosIn, []) :- 477 C >= Min. 478
479
481prefix_eval("&", Exp, Env, Input, PosIn, PosIn) :- 482 eval_(Exp, Env, Input, PosIn, _PosOut, _R). 483prefix_eval("!", Exp, Env, Input, PosIn, PosIn) :- 484 \+eval_(Exp, Env, Input, PosIn, _PosOut, _R). 485prefix_eval("~", Exp, Env, Input, PosIn, PosOut) :- 486 \+eval_(Exp, Env, Input, PosIn, _PosOut, _R), 487 \+string_length(Input,PosIn), 488 PosOut is PosIn+1. 489
490
493literal_match_(S,Match) :-
494 match_chars(S,Chars), 495 string_chars(Match,Chars). 496
497
500match_chars(MatchSet, MChars) :-
501 sub_string(MatchSet,1,_,1,Str), 502 string_chars(Str,Chars),
503 unescape_(Chars,MChars).
504
505unescape_([],[]).
506unescape_(['\\',x,C1,C2|NxtChars],[Esc|MChars]) :-
507 char_type(C1,xdigit(V1)), char_type(C2,xdigit(V2)), !,
508 VEsc is (V1*16+V2),
509 char_code(Esc,VEsc),
510 unescape_(NxtChars,MChars).
511unescape_(['\\',u,C1,C2,C3,C4|NxtChars],[Esc|MChars]) :-
512 char_type(C1,xdigit(V1)), char_type(C2,xdigit(V2)), char_type(C3,xdigit(V3)), char_type(C4,xdigit(V4)), !,
513 VEsc is ((V1*16+V2)*16+V3)*16+V4,
514 char_code(Esc,VEsc),
515 unescape_(NxtChars,MChars).
516unescape_(['\\','U',C1,C2,C3,C4,C5,C6,C7,C8|NxtChars],[Esc|MChars]) :-
517 char_type(C1,xdigit(V1)), char_type(C2,xdigit(V2)), char_type(C3,xdigit(V3)), char_type(C4,xdigit(V4)),
518 char_type(C5,xdigit(V5)), char_type(C6,xdigit(V6)), char_type(C7,xdigit(V7)), char_type(C8,xdigit(V8)), !,
519 VEsc is ((((((V1*16+V2)*16+V3)*16+V4)*16+V5)*16+V6)*16+V7)*16+V8,
520 char_code(Esc,VEsc),
521 unescape_(NxtChars,MChars).
522unescape_(['\\',CEsc|Chars],[Esc|MChars]) :-
523 std_escape_(CEsc,Esc), !,
524 unescape_(Chars,MChars).
525unescape_([Char|Chars],[Char|MChars]) :-
526 unescape_(Chars,MChars).
527
528std_escape_('n','\n').
529std_escape_('r','\r').
530std_escape_('t','\t').
531
533chars_in_match([],_Ch,In) :- In == notin. 534chars_in_match([Cl,'-',Cu|MChars],Ch,In) :- !, 535 (Cl@=<Ch,Ch@=<Cu -> In == in ; chars_in_match(MChars,Ch,In)).
536chars_in_match([Cl|MChars],Ch,In) :- 537 (Cl==Ch -> In == in ; chars_in_match(MChars,Ch,In)).
538
539
542recursive_loop_check(Goal,Last,Pos,Name) :-
543 prolog_current_frame(F), 544 prolog_frame_attribute(F,parent,IPF), 545 prolog_frame_attribute(IPF,parent,GPF), 546 (once(prolog_frame_attribute(GPF,parent_goal,Goal)), Last=Pos
547 -> 548 peg_notrace,
549 format(string(Message),"pPEG infinite recursion applying ~w",[Name]),
550 throw(error(resource_error(Message),_))
551 ; true
552 ).
553
555flatten_([], Tl, Tl) :-
556 !.
557flatten_([Hd|Tl], Tail, List) :-
558 !,
559 flatten_(Hd, FlatHeadTail, List),
560 flatten_(Tl, Tail, FlatHeadTail).
561flatten_(NonList, Tl, [NonList|Tl]).
562
565
566scheme_treatment("=",Name,Treat) :-
567 sub_string(Name,0,1,_,C),
568 pPEG_type(C,Treat).
569scheme_treatment(":",_Name,anonymous).
570scheme_treatment(":=",_Name,component).
571scheme_treatment("=:",_Name,leaf).
572
574pPEG_type("_",anonymous).
575pPEG_type(C,dynamic) :- char_type(C,lower), !.
576pPEG_type(_,component).
577
579build_ptree(anonymous,_Args,_Match,_Name,[]) :- !.
580build_ptree(leaf,_,slice(Input,PosIn,PosOut),Name,R) :- !, 581 Len is PosOut-PosIn,
582 sub_string(Input,PosIn,Len,_,Arg),
583 R =.. [Name,Arg].
584build_ptree(dynamic,[],Match,Name,R) :- !, 585 build_ptree(leaf,_,Match,Name,R).
586build_ptree(dynamic,[Arg],_Match,_Name,Arg) :- !, 587 compound(Arg).
588build_ptree(_RType,Arg,_Match,Name,R) :- 589 R =.. [Name,Arg].
590
591
594extn_pred(S,T) :-
595 (sub_string(S,Pos,1,_," ") 596 -> FLen is Pos-1, 597 sub_atom(S,1,FLen,_,Pred), 598 APos is Pos+1, 599 sub_string(S,APos,_,1,S1), 600 split_string(S1,""," ",[StringArg]) 601 ; sub_atom(S,1,_,1,Pred), 602 StringArg = ""
603 ),
604 (split_string(Pred,':','',[SM,SF]) 605 -> atom_string(M,SM), atom_string(F,SF),
606 P =.. [F,StringArg],
607 T = M:P
608 ; T =.. [Pred,StringArg]
609 ).
610
612extn_call(T,Env,Input,PosIn,PosOut,R) :-
613 catch(call(T,Env,Input,PosIn,PosOut,R),
614 Err, extn_error(Err,T,Env,Input,PosIn,PosOut,R)
615 ).
616
617extn_error(error(existence_error(procedure,_),_),T,_Env,Input,PosIn,PosIn,[]) :- !,
618 sub_string(Input,PosIn,_,0,Rem),
619 print_message(information, peg(extension(T,Rem))).
620extn_error(Err,_T,_Env,_Input,_PosIn,_PosOut,_R) :-
621 throw(Err).
622
623prolog:message(peg(extension(T,Rem))) --> 624 [ "Extension ~p parsing: ~p\n" - [T,Rem] ].
625
629peg_add_tracing([],Grammar,Grammar) :- !. 630peg_add_tracing(TRules,Grammar,GrammarT) :-
631 ( (Grammar = [Rule|_], functor(Rule,rule,_))
632 -> duplicate_term(Grammar,GrammarC) 633 ; GrammarC = Grammar 634 ),
635 add_tracing(TRules,GrammarC,GrammarT).
636
637add_tracing([],Grammar,Grammar) :- !.
638add_tracing([Name|Names],Grammar,GrammarT) :- !,
639 add_tracing(Name,Grammar,NxtGrammar),
640 add_tracing(Names,NxtGrammar,GrammarT).
641add_tracing(Name,Grammar,GrammarT) :-
642 add_trace(Grammar,Name,GrammarT).
643
644add_trace([],_SName,[]).
645add_trace([rule([id(SName), def(Def), Exp])|Rules], Name,
646 [rule([id(SName), trace(rule(AName,Treat,Exp))])|Rules]) :-
647 nonvar(Exp), 648 atom_string(AName,SName), 649 atom_string(AName,Name),
650 scheme_treatment(Def,SName,Treat),
651 !.
652add_trace([Rule|Rules], Name, [Rule|Rules]) :-
653 Rule = rule(AName, Treat, Exp), 654 nonvar(Exp), 655 atom_string(AName,Name), 656 !,
657 658 setarg(3,Rule,trace(rule(AName,Treat,Exp))).
659add_trace([Rule|Rules], Name, [Rule|RulesT]) :-
660 add_trace(Rules, Name, RulesT).
661
665peg_trace :-
666 debug(pPEG(trace)),
667 wrap_predicate(pPEG:eval_(Inst, Env, Input, PosIn, PosOut, R),
668 'pPEG:eval_',
669 Wrapped,
670 doEval_wrap_(Wrapped, Inst, Env, Input, PosIn, PosOut, R)).
671
672peg_notrace :-
673 (debugging(pPEG(trace),true)
674 -> unwrap_predicate(pPEG:eval_/6, 'pPEG:eval_'),
675 nodebug(pPEG(trace))
676 ; true
677 ).
678
679doEval_wrap_(Wrapped, Inst, Env, Input, PosIn, PosOut, R) :-
680 peg_inst_type(Inst,Type),
681 vm_instruction(Inst,TInst),
682 persistent_env_(Env,PEnv),
683 peg_trace_port_(Type, call, TInst, PEnv, Input, PosIn, PosOut, R),
684 (Wrapped 685 -> peg_trace_port_(Type, exit, TInst, PEnv, Input, PosIn, PosOut, R)
686 ; peg_trace_port_(Type, fail, TInst, PEnv, Input, PosIn, PosOut, R),
687 fail
688 ).
689
690peg_trace_port_(call, call, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
691 peg_cursor_pos(Input,PosIn,Cursor),
692 peg_trace_msg(postInc, PEnv, "~w~w~w", [Cursor,TInst]). 693peg_trace_port_(call, fail, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
694 peg_cursor_pos(Input,PosIn,Cursor),
695 peg_trace_input(Input,PosIn,Str),
696 peg_trace_msg(preDec, PEnv, "~w~w~w != \t~p", [Cursor,TInst,Str]). 697peg_trace_port_(call, exit, TInst, PEnv, Input, PosIn, PosOut, R) :- !,
698 peg_cursor_pos(Input,PosOut,Cursor),
699 (R = [] 700 -> Len is PosOut-PosIn,
701 sub_string(Input,PosIn,Len,_,RT)
702 ; RT = R
703 ),
704 (string(RT) -> MatchOp = "==" ; MatchOp = "=>"),
705 peg_trace_msg(preDec, PEnv, "~w~w~w ~w \t~p", [Cursor,TInst,MatchOp,RT]). 706peg_trace_port_(meta, call, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
707 peg_cursor_pos(Input,PosIn,Cursor),
708 peg_trace_msg(indent, PEnv, "~w~w~w", [Cursor,TInst]). 709peg_trace_port_(terminal, fail, TInst, PEnv, Input, PosIn, _PosOut, _R) :- !,
710 peg_cursor_pos(Input,PosIn,Cursor),
711 peg_trace_input(Input,PosIn,Str),
712 peg_trace_msg(indent, PEnv, "~w~w~w != \t~p", [Cursor,TInst,Str]). 713peg_trace_port_(terminal, exit, TInst, PEnv, Input, PosIn, PosOut, _R) :- !,
714 peg_cursor_pos(Input,PosOut,Cursor),
715 Len is PosOut-PosIn,
716 sub_string(Input,PosIn,Len,_,RT),
717 peg_trace_msg(indent, PEnv, "~w~w~w == \t~p", [Cursor,TInst,RT]). 718peg_trace_port_(_Other, _, _, _, _, _, _, _). 719
720peg_inst_type(alt(_),meta).
721peg_inst_type(seq(_),meta).
722peg_inst_type(pre(_),call).
723peg_inst_type(rep(_),meta).
724peg_inst_type(rep_O(_,_,_),meta).
725peg_inst_type(quote(_),terminal).
726peg_inst_type(quote_O(_,_),terminal).
727peg_inst_type(class(_),terminal).
728peg_inst_type(class_O(_,_),terminal).
729peg_inst_type(dot(_),terminal).
730peg_inst_type(extn(_),terminal).
731peg_inst_type(id(_),notrace). 732peg_inst_type(call_O(Rule),Type) :- 733 arg(3,Rule,trace(_)) -> Type = notrace ; Type = call.
734peg_inst_type(trace(_),notrace). 735
736peg_cursor_pos(Input,Pos,Cursor) :-
737 string_length(Input,InputLen), 738 StartPos is min(Pos,InputLen-1),
739 peg_line_pos(Input,StartPos,0,1,_Text,LinePos,LineNo), 740 CPos is LinePos +1, 741 format(string(Cursor),"~` t~d~4+.~d~4+",[LineNo,CPos]). 742
743peg_line_pos("",_Pos,_LinePos,LineNum,"",0,LineNum) :- !. 744peg_line_pos(Input,Pos,LinePos,LineNum,Text,EPos,ELineNum) :- 745 746 re_matchsub("[^\n\r]*(\n|\r\n?)?",Input,Match,[start(LinePos)]), 747 string_length(Match.0,Len),
748 NxtLinePos is LinePos+Len,
749 ((LinePos =< Pos,Pos < NxtLinePos) 750 -> string_concat(Text,Match.get(1,""),Match.0), 751 EPos is Pos-LinePos,
752 ELineNum = LineNum
753 ; NxtLineNum is LineNum+1, 754 peg_line_pos(Input,Pos,NxtLinePos,NxtLineNum,Text,EPos,ELineNum)
755 ).
756
757peg_trace_input(Input,PosIn,Str) :-
758 sub_string(Input,PosIn,L,0,SStr), 759 (L =< 32
760 -> Str = SStr
761 ; sub_string(SStr,0,32,_,SStr1),
762 string_concat(SStr1," ... ",Str)
763 ).
764
765peg_trace_msg(postInc, PEnv, Msg, [Cursor|Args]) :-
766 arg(2,PEnv,Indent),
767 debug_peg_trace(Msg,[Cursor,Indent|Args]),
768 string_concat(Indent,"| ",NxtIndent), 769 nb_linkarg(2,PEnv,NxtIndent).
770peg_trace_msg(preDec, PEnv, Msg, [Cursor|Args]) :-
771 arg(2,PEnv,Indent),
772 sub_string(Indent,0,_,3,NxtIndent), 773 debug_peg_trace(Msg,[Cursor,NxtIndent|Args]),
774 nb_linkarg(2,PEnv,NxtIndent).
775peg_trace_msg(indent, PEnv, Msg, [Cursor|Args]) :-
776 arg(2,PEnv,Indent),
777 debug_peg_trace(Msg,[Cursor,Indent|Args]).
778
782vm_instruction(id(Name), Name).
783vm_instruction(call_O(Var), "??undefined rule??") :- var(Var),!. 784vm_instruction(call_O(rule(Name,_Treat,_Exp)), Name).
785vm_instruction(seq(Exps), Is) :-
786 vm_instruction_list(Exps,LIs),
787 atomics_to_string(LIs," ",Is0),
788 atomics_to_string(["(",Is0,")"],Is).
789vm_instruction(alt(Exps), Is) :-
790 vm_instruction_list(Exps,LIs),
791 atomics_to_string(LIs," / ",Is0),
792 atomics_to_string(["(",Is0,")"],Is).
793vm_instruction(rep([Exp, Sfx]), Is) :-
794 vm_rep_sfx(Sfx,ROp), !,
795 vm_instruction(Exp,I),
796 string_concat(I,ROp,Is).
797vm_instruction(rep_O(Exp, Min, Max), Is) :-
798 rep_counts(Sfx, Min, Max), !,
799 vm_instruction(rep([Exp, Sfx]), Is).
800vm_instruction(pre([pfx(Chs),Exp]), Is) :-
801 vm_instruction(Exp,I),
802 string_concat(Chs,I,Is).
803vm_instruction(quote(Match), Is) :-
804 unescape_std(Match,Is).
805vm_instruction(quote_O(Case,Match), Is) :-
806 (Case = exact -> Sens = "" ; Sens = "i"),
807 unescape_std(Match,S1),
808 unescape_string(S1,"'","\\u0027",S),
809 atomics_to_string(["'",S,"'",Sens],Is).
810vm_instruction(class(Match), Is) :-
811 unescape_std(Match,Is).
812vm_instruction(class_O(In,MChars), Is) :-
813 (In = notin -> Pfx = '~' ; Pfx = ''),
814 string_chars(MStr,MChars),
815 unescape_std(MStr,S),
816 unescape_string(S,"]","\\u005d",S1),
817 atomics_to_string([Pfx,"[",S1,"]"],Is).
818vm_instruction(dot(_), ".").
819vm_instruction(extn(Ext), Is) :-
820 (string(Ext)
821 -> Is = Ext 822 ; (Ext = Mod:Pred
823 -> Pred =.. [Func,StringArg], 824 atomics_to_string(['<',Mod,':',Func,' ',StringArg,'>'],Is)
825 ; Ext =.. [Func,StringArg], 826 atomics_to_string(['<',Func,' ',StringArg,'>'],Is)
827 )
828 ).
829vm_instruction(trace(Rule), Is) :-
830 vm_instruction(call_O(Rule), Is).
831
832vm_instruction_list([],[]).
833vm_instruction_list([Exp|Exps],[Is|LIs]) :-
834 vm_instruction(Exp,Is),
835 vm_instruction_list(Exps,LIs).
836
837vm_rep_sfx(sfx(ROp), ROp).
838vm_rep_sfx(num(StrN), ROp) :- atomics_to_string(["*",StrN],ROp).
839vm_rep_sfx(range([num(StrN),_]), ROp) :- atomics_to_string(["*",StrN,".."],ROp).
840vm_rep_sfx(range([num(StrM),_,num(StrN)]), ROp) :- atomics_to_string(["*",StrM,"..",StrN],ROp).
841
842unescape_string(Sin,Esc,Usc,Sout) :-
843 split_string(Sin,Esc,"",L),
844 atomics_to_string(L,Usc,Sout).
845
846unescape_std(Sin,Sout) :-
847 string_chars(Sin,CharsIn),
848 escape_chars(CharsIn,CharsOut),
849 string_chars(Sout,CharsOut).
850
851escape_chars([],[]).
852escape_chars([C|CharsIn],[C|CharsOut]) :-
853 char_code(C,CS), between(32,126,CS), !, 854 escape_chars(CharsIn,CharsOut).
855escape_chars([ECh|CharsIn],['\\',Ch|CharsOut]) :-
856 std_escape_(Ch,ECh),!, 857 escape_chars(CharsIn,CharsOut).
858escape_chars([C|CharsIn],['\\','u',X1,X2,X3,X4|CharsOut]) :-
859 char_code(C,CS), 860 divmod(CS,16,Q4,R4),
861 divmod(Q4,16,Q3,R3),
862 divmod(Q3,16,R1,R2),
863 char_type(X1,xdigit(R1)), char_type(X2,xdigit(R2)), char_type(X3,xdigit(R3)), char_type(X4,xdigit(R4)),
864 escape_chars(CharsIn,CharsOut).
865
871optimize_peg('Peg'(Rules),'Peg'(RulesO,RRefs)) :-
872 (optimize_rules(Rules,RDefs,RulesO)
873 -> once(length(RDefs,_)), 874 chk_RDefs(RulesO,RDefs,RRefs) 875 ; (Rules = [rule([id(GName),_,_])|_Rules] -> true ; GName = "?unknown?"),
876 print_message(warning,peg(optimize_fail(GName))), 877 fail
878 ).
879
880chk_RDefs([],RDefs,[]) :-
881 forall(member(Name:_,RDefs), print_message(warning, peg(undefined(Name)))).
882chk_RDefs([rule(PName,_,_)|Rules],RDefs,[_|RRefs]) :-
883 memberchk(rule(PName,_,_),Rules), !, 884 print_message(warning,peg(duplicate(PName))), 885 chk_RDefs(Rules,RDefs,RRefs).
886chk_RDefs([rule(PName,_,_)|Rules],RDefs,[RRef|RRefs]) :-
887 atom_string(PName,Name),
888 remove_def(RDefs,Name,RRef,NxtRDefs),
889 chk_RDefs(Rules,NxtRDefs,RRefs).
890
891remove_def([],_Name,_RRef,[]).
893remove_def([Name:RRef|RDefs],Name,RRef,RDefs) :- !.
894remove_def([RDef|RDefs],Name,RRef,[RDef|NxtRDefs]) :-
895 remove_def(RDefs,Name,RRef,NxtRDefs).
896
897prolog:message(peg(duplicate(Name))) --> 898 [ "pPEG: duplicate rule ~w, last definition will apply" - [Name] ].
899
900prolog:message(peg(optimize_fail(GName))) --> 901 [ "pPEG: grammar ~w optimization failed" - [GName] ].
902
903optimize_rules([],_RDefs,[]).
904optimize_rules([Rule|Rules],RDefs,[RuleO|RulesO]) :-
905 optimize_rule(Rule,RDefs,RuleO),
906 optimize_rules(Rules,RDefs,RulesO).
907
908optimize_rule(rule([id(Name), def(Def), Exp]), RDefs, rule(PName,Treat,ExpO)) :- !, 909 atom_string(PName,Name), 910 scheme_treatment(Def,Name,Treat),
911 optimize_exp(Exp, RDefs, ExpO).
912optimize_rule(rule(Name,Treat,Exp), _RDefs, rule(Name,Treat,Exp)). 913
914optimize_exp(id(Name), RDefs, call_O(Rule)) :- 915 memberchk(Name:Rule, RDefs).
916
917optimize_exp(seq(Ins), RDefs, seq(Opt)) :-
918 optimize_exp_list(Ins,RDefs,Opt).
919
920optimize_exp(alt(Ins), RDefs, alt(Opt)) :-
921 optimize_exp_list(Ins,RDefs,Opt).
922
923optimize_exp(rep([Exp, ROp]), RDefs, rep_O(ExpO, Min, Max)) :-
924 rep_counts(ROp,Min,Max), !,
925 optimize_exp(Exp,RDefs,ExpO).
926
927optimize_exp(pre([pfx("~"), class(MatchSet)]), RDefs, class_O(notin,MChars)) :- !,
928 optimize_exp(class(MatchSet), RDefs, class_O(_,MChars)).
929optimize_exp(pre([pfx(POp), Exp]), RDefs, pre([pfx(POp), ExpO])) :-
930 optimize_exp(Exp,RDefs,ExpO).
931
932optimize_exp(class(MatchSet), _RDefs, class_O(in,MChars)) :-
933 match_chars(MatchSet, MChars).
934
935optimize_exp(quote(QS), _RDefs, quote_O(Case,Match)) :-
936 (sub_string(QS,_,1,0,"i") 937 -> Case = upper,
938 sub_string(QS,0,_,1,S), 939 literal_match_(S,AMatch), 940 string_upper(AMatch,Match)
941 ; Case = exact,
942 literal_match_(QS,Match) 943 ).
944
945optimize_exp(dot(D), _RDefs, dot(D)). 946
947optimize_exp(extn(S), _RDefs, extn(T)) :- 948 (string(S) -> extn_pred(S,T) ; T = S).
949
950optimize_exp(call_O(Rule), _RDefs, call_O(Rule)). 951optimize_exp(rep_O(Exp, Min, Max), _RDefs, rep_O(Exp, Min, Max)). 952optimize_exp(quote_O(C,M), _RDefs, quote_O(C,M)). 953optimize_exp(class_O(M), _RDefs, class_O(M)). 955
956optimize_exp_list([],_RDefs,[]).
957optimize_exp_list([Exp|Exps],RDefs,[ExpO|ExpOs]) :-
958 optimize_exp(Exp,RDefs,ExpO),
959 optimize_exp_list(Exps,RDefs,ExpOs).
960
964:- initialization(init_peg,now).