1:- module(pac_regex, [parse_regex/2,
2 regex_unix_macro/2]). 3
5
6regex_unix_macro($, @( $ )).
7regex_unix_macro(^, []).
8
15
16 24
57parse_regex(X, Y) :- string_chars(X, X0),
58 once(parse_regex([], [], [], Y0, X0, [])),
59 Y0 = [Y].
61parse_regex(A, B, A0, B0) --> token(T), !,
62 { once(push_pop_stacks(T, A, B, A1, B1)) },
63 parse_regex(A1, B1, A0, B0).
64parse_regex(A, B, A0, B0) --> 65 { close_block(A, B, A0, B1),
66 fold_block_reversely(B1, B0) }.
67
69push_pop_stacks(')', A, X, B, Y) :- close_block(A, X, B, Z), 70 fold_block_reversely(Z, Y).
71push_pop_stacks('(', A, X, ['('|A], ['('|X]). 72push_pop_stacks({}(I), A, [T|X], A, [T^I|X]). 73push_pop_stacks(F, A, X, A, Y) :- unary_opr(F),
74 apply_unary_opr(F, X, Y).
75push_pop_stacks(F, A, X, [F|B], Y) :- binary_opr(F),
76 once(sweep_higher_opr(F, A, X, B, Y)).
77push_pop_stacks(T, A, X, A, [T|X]).
78
80token('C'([C])) --> [(\)], [C0], {char_code(C0, C)}. 81token(X) --> [ A ], { regex_unix_macro(A, X) }.
82token(T) --> [T], {memberchk(T, ['(',')', '*', '+', '!', '|', '.', '?'])}.
83token(out(D)) --> ['[', ^], char_class(D0, []), {chars_interval(D0, D)}.
84token(dot(D)) --> ['['], char_class(D0, []), {chars_interval(D0, D)}.
85token({}(I)) --> ['{'], repeat_spec(I).
86token('C'([C])) --> [C0], {char_code(C0, C)}.
87
89sweep_higher_opr(_, [], X, [], X).
90sweep_higher_opr(_, ['('|X], Y, ['('|X], Y).
91sweep_higher_opr(F, [G|A], X, B, Y):- higher_priority(G, F),
92 apply_binary_opr(G, X, Z),
93 sweep_higher_opr(F, A, Z, B, Y).
94sweep_higher_opr(F, A, X, B, Y):- push_pop_stacks(F, A, X, B, Y).
95
97close_block([], X, [], X).
98close_block(['('|A], X, A, X).
99close_block([F|A], X, B, Y):-
100 apply_binary_opr(F, X, Z),
101 close_block(A, Z, B, Y).
102
104fold_block_reversely([X|Y], [Z|U]):- fold_block_reversely(X, Y, Z, U).
105fold_block_reversely([], []).
106
108fold_block_reversely(X, [], X, []).
109fold_block_reversely(X, ['('|Y], X, Y).
110fold_block_reversely(X, [Y|Z], U, V):- fold_block_reversely(Y+X, Z, U, V).
111
113unary_opr(*).
114unary_opr(?).
115unary_opr(!).
116unary_opr(+).
117
119binary_opr(&).
120binary_opr('|').
121
123higher_priority(&, &).
124higher_priority(&, '|').
125higher_priority('|', '|').
126
128apply_unary_opr(*, [X|Y], [*(X)|Y]).
129apply_unary_opr(+, [X|Y], [+(X)|Y]).
130apply_unary_opr(?, [X|Y], [?(X)|Y]).
131apply_unary_opr(!, [X|Y], [!(X)|Y]).
132
134apply_binary_opr('|', [X, Y|Z], ['|'(Y, X)|Z]).
135apply_binary_opr('&', [X, Y|Z], ['&'(Y, X)|Z]).
136
141
142repeat_spec(X) --> number_chars(J0), [','], { J0\==[] },
143 number_chars(K0), ['}'],
144 { number_chars(J, J0),
145 ( K0 \== []
146 -> number_chars(K, K0),
147 X = J-K
148 ; X = (>=(J))
149 )
150 }.
151
153number_chars([D|Ds]) --> [D], {char_type(D, digit)},
154 number_chars(Ds).
155number_chars([]) --> [].
156
158char_class([\(C)|Y], Z) --> [\], [C], char_class(Y,Z).
159char_class([A|X], Y) --> [A, A], char_class(X, Y). 160char_class(X, X) --> [']'].
161char_class([C|Y], Z) --> [C], char_class(Y,Z).
162
164chars_interval([],[]).
165chars_interval([X, -, Y|R], [C - D|S]):- drop_escape(X, X0),
166 drop_escape(Y, Y0),
167 char_code(X0, C),
168 char_code(Y0, D),
169 chars_interval(R, S).
170chars_interval([X|R], [C|S]):- drop_escape(X, X0),
171 char_code(X0, C),
172 chars_interval(R, S).
173
175drop_escape(\(X), X).
176drop_escape(X, X)