1/* $Id$ 2 3 Read a sentence from the current input stream and convert it 4 into a list of atoms and numbers. 5 Letters(A-Z, a-z) are converted to atoms 6 Digits (0-9) (and a '.' if a real number) are converted to numbers 7 Some obscure 'rounding' is done, so you have most of the times 8 only 6 significant digits with an exponent part. (This is caused 9 by the system predicate 'name'. If you want looonnnggg numbers 10 then define digits as parts of words). 11 (N.B. reals work only if '.' is not defined as 'stop-char' but 12 'escape' will work in this case) 13 14 The reader is >>flexible<<, you can define yourself: 15 - the character on which reading will stop 16 (this character is escapable with \ 17 to read a \ type this character twice!!) 18 - the character(s) that make up a word (execpt the 19 characters A-Z, a-z that always make up words!! 20 and (real)-numbers that always are grouped together!!) 21 - whether you want conversion of uppercase letters to 22 lowercase letters. 23 24 readln/1 25 The default setting for readln/1 is 26 - read up till newline 27 - see underscore('_') and numbers 0-9 as part of words 28 - make lowercase 29 30 - If nothing is read readln/1 succeeds with [] 31 - If an end_of_file is read readln/1 succeeds with [..|end_of_file] 32 33 34 readln/5 35 This predicate gives you the flexibility. 36 It succeeds with arg1 = list of word&atoms 37 arg2 = Ascii code of last character 38 (but '-1' in case of ^D). 39 To change one or more of the defaults you have to 40 instantiate argument3 and/or argument4 and/or argument5. 41 !! Uninstantiated arguments are defaulted !! 42 - stop character(s): 43 instantiate argument 3 with the list of ASCII code's 44 of the desired stop characters (Note: you can also 45 say: ".!?", what is equivalent to [46,33,63]). 46 - word character(s): 47 instantiate argument 4 with the list of ASCII code's 48 of the desired word-part characters (Note: wou can also 49 say: "", what is equivalent to [] ; i.e. no extra 50 characters). 51 - lowercase conversion: 52 instantiate argument 5 with lowercase 53 54 55Main predicates provided: 56 57 readln(P) - Read a sentence up till NewLine and 58 unify <P> with the list of atoms/numbers 59 (identical to: 60 readln(P, [10],"_01213456789",uppercase).) 61 readln(P, LastCh) - idem as above but the second argument is unified 62 with the last character read (the ascii-code for 63 the stop-character or -1) 64 readln(P, LastCh, Arg1, Arg2, Arg3) 65 - idem as above but the default setting is changed 66 for the instantiated args: 67 Arg1: List of stop characters 68 Arg2: List of word_part characters 69 Arg3: uppercase/lowercase conversion 70 71Examples: 72 read_sentence(P,Case) :- 73 readln(P,_,".!?","_0123456789",Case). 74 75 read_in(P) :- % with numbers as separate 76 readln(P,Eof,_,"", _). % entities. 77 78 read_atom(A) :- % stop on newline, 79 readln(A,_,_," ",_). % space is part of word 80 81 82 Author: Wouter Jansweijer 83 Date: 26 april 1985 84 85 Modified: Jan Wielemaker 86 Date: 19 feb 2001 87 88 Modernised a bit and fixed some end_of_file/-1 issues. As we have modules 89 since a while I removed the ugly $ stuff :-) 90******************************************************************************/ 91 92:- module(readln, 93 [ readln/1, % -Line 94 readln/2, % -Line, +EOL 95 readln/5 % See above 96 ]). 97:- use_module(library(lists)). 98:- license(swipl). 99 100readln(Read) :- % the default is read up to EOL 101 rl_readln(Line, LastCh, [10], "_0123456789", uppercase), 102 ( LastCh == -1 103 -> append(Line,[end_of_file], Read) 104 ; Read = Line 105 ). 106 107readln(Read, LastCh):- 108 rl_readln(Read, LastCh, [10], "_0123456789", uppercase). 109 110readln(P, EOF, StopChars, WordChars, Case) :- 111 ( var(StopChars) 112 -> Arg1 = [10] 113 ; Arg1 = StopChars 114 ), 115 ( var(WordChars) 116 -> Arg2 = "01234567890_" 117 ; Arg2 = WordChars 118 ), 119 ( var(Case) 120 -> Arg3 = lowercase 121 ; Arg3 = Case 122 ), 123 rl_readln(P, EOF, Arg1, Arg2, Arg3). 124 125rl_readln(P, EOF, StopChars, WordChars, Case) :- 126 rl_initread(L, EOF, StopChars), 127 rl_blanks(L, LL), !, 128 rl_words(P, LL,[], options(WordChars, Case)), !. 129 130rl_initread(S, EOF, StopChars) :- 131 get0(K), 132 rl_readrest(K, S, EOF, StopChars). 133 134rl_readrest(-1, [], end_of_file, _) :- !. 135rl_readrest(0'\\, [K1|R], EOF, StopChars) :- 136 get0(K1), % skip it, take next char 137 get0(K2), 138 rl_readrest(K2, R, EOF, StopChars). 139rl_readrest(K, [K], K, StopChars) :- % the stop char(s) 140 member(K, StopChars), !. 141rl_readrest(K, [K|R], EOF, StopChars) :- % the normal case 142 get0(K1), 143 rl_readrest(K1, R, EOF, StopChars). 144 145rl_words([W|Ws], S1, S4, Options) :- 146 rl_word(W, S1, S2, Options), !, 147 rl_blanks(S2, S3), 148 rl_words(Ws, S3, S4, Options). 149rl_words([], S1, S2, _) :- 150 rl_blanks(S1, S2), !. 151rl_words([], S, S, _). 152 153rl_word(N, [46|S1], S3, _) :- % the dot can be in the beginning of 154 rl_basic_num(N1, S1, S2), !, % a real number. 155 rl_basic_nums(Rest, S2, S3, dot), % only ONE dot IN a number !! 156 name(N,[48, 46, N1|Rest]). % i.e '0.<number>' 157rl_word(N, S0, S2, _) :- 158 rl_basic_num(N1, S0, S1), !, 159 rl_basic_nums(Rest, S1, S2, _), 160 name(N,[N1|Rest]). 161rl_word(W, S0, S2, Options) :- 162 rl_basic_char(C1, S0, S1, Options), !, 163 rl_basic_chars(Rest, S1, S2, Options), 164 name(W, [C1|Rest]). 165rl_word(P,[C|R], R, _) :- 166 name(P, [C]), !. 167 168rl_basic_chars([A|As], S0, S2, Options) :- 169 rl_basic_char(A, S0, S1, Options), !, 170 rl_basic_chars(As, S1, S2, Options). 171rl_basic_chars([], S, S, _). 172 173rl_basic_nums([46,N|As], [46|S1], S3, Dot) :- % a dot followed by >= one digit 174 var(Dot), % but not found a dot already 175 rl_basic_num(N, S1, S2), !, 176 rl_basic_nums(As, S2, S3, dot). 177rl_basic_nums([A|As], S0, S2, Dot) :- 178 rl_basic_num(A, S0, S1), !, 179 rl_basic_nums(As, S1, S2, Dot). 180rl_basic_nums([], S, S, _). 181 182rl_blanks([C|S0], S1) :- 183 rl_blank(C), !, 184 rl_blanks(S0, S1). 185rl_blanks(S, S). 186 187/* Basic Character types that form rl_words together */ 188 189rl_basic_char(A, [C|S], S, options(WordChars, Case)) :- 190 rl_lc(C, A, WordChars, Case). 191 192rl_basic_num(N, [N|R], R) :- 193 code_type(N, digit). 194 195rl_blank(X) :- 196 code_type(X, space). 197 198rl_lc(X, X1, _, Case) :- 199 code_type(X, upper), !, 200 rl_fix_case(Case, X, X1). 201rl_lc(X, X, _, _) :- 202 code_type(X, lower). 203rl_lc(X, X, WordChars, _) :- 204 memberchk(X, WordChars). 205 206rl_fix_case(lowercase, U, L) :- !, 207 code_type(L, lower(U)). 208rl_fix_case(_, C, C)