1:- module(tokenize, [token_stream_of/2, detokenize/2]). 10
14add_word(Word, SoFar, [word(Atom)|SoFar]) :-
15 reverse(Word, WordLR),
16 atomic_list_concat(WordLR, Atom).
17
18add_num(Word, SoFar, [number(Val)|SoFar]) :-
19 base_ten_value(Word, 1 , 0, Val).
20
21base_ten_value([], _ , X, X).
22base_ten_value([C|T], Place, ValSoFar, Val) :-
23 char_type(C, digit(V)),
24 NPlace is 10 * Place,
25 NValSoFar is ValSoFar + Place * V,
26 base_ten_value(T, NPlace, NValSoFar, Val).
27
28add_special(C, SoFar, [special(C)|SoFar]).
29
41tokenize(_, [], [], Tokens, Tokens) :- !.
43tokenize(in_word, [], Word, SoFar, Tokens) :-
44 add_word(Word, SoFar, Tokens).
45tokenize(in_num, [], Word, SoFar, Tokens) :-
46 add_num(Word, SoFar, Tokens).
47
49tokenize(in_word, [C|T], Word, SoFar, Tokens) :-
50 char_type(C, csym),
51 tokenize(in_word, T, [C|Word], SoFar, Tokens).
52tokenize(in_word, [C|T], Word, SoFar, Tokens) :-
53 add_word(Word, SoFar, NewSoFar),
54 tokenize(not_in_word, [C|T], [], NewSoFar, Tokens).
55
57tokenize(in_num, [C|T], Word, SoFar, Tokens) :-
58 char_type(C, digit),
59 tokenize(in_num, T, [C|Word], SoFar, Tokens).
60tokenize(in_num, [C|T], Word, SoFar, Tokens) :-
61 add_num(Word, SoFar, NewSoFar),
62 tokenize(not_in_word, [C|T], [], NewSoFar, Tokens).
63
65tokenize(not_in_word, [C|T], [], SoFar, Tokens) :-
66 char_type(C, csymf),
67 tokenize(in_word, T, [C], SoFar, Tokens).
68
70tokenize(not_in_word, [C|T], [], SoFar, Tokens) :-
71 char_type(C, digit),
72 tokenize(in_num, T, [C], SoFar, Tokens).
73
75tokenize(not_in_word, [C|T], [], SoFar, Tokens) :-
76 char_type(C, space),
77 tokenize(not_in_word, T, [], SoFar, Tokens).
78
80tokenize(not_in_word, [C|T], [], SoFar, Tokens) :-
81 add_special(C, SoFar, NewSoFar),
82 tokenize(not_in_word, T, [], NewSoFar, Tokens).
83
84token_stream_of('', []).
85token_stream_of(Intext, InTokens) :-
86 atom_chars(Intext, Chars),
87 tokenize(not_in_word, Chars, [] , [] , InTokensBackwards),
88 reverse(InTokensBackwards, InTokens).
89
91detokenize([], '').
92detokenize(TokenList, Atom) :- detokenize(TokenList, '', Atom).
93detokenize(List, AtomSoFar, Atom) :-
94 elements_to_atoms(List, [], ListOfAtoms),
95 atom_concat(ListOfAtoms, ' ', Atom).
96
97elements_to_atoms([] , A, A).
98elements_to_atoms([word(X)|T], [X|SoFar], Final) :-
99 atom(X),
100 elements_to_atoms(T, SoFar, Final).
101elements_to_atoms([nt(_)|T], SoFar, Final) :-
102 elements_to_atoms(T, SoFar, Final).
103elements_to_atoms([special(X)|T], [X|SoFar], Final) :-
104 atom(X),
105 elements_to_atoms(T, [X|SoFar], Final)