View source with raw comments or as raw
   1/*  $Id$
   2
   3    Read a sentence from the current input stream and convert it
   4    into a list of atoms and numbers.
   5    Letters(A-Z, a-z) are converted to atoms
   6    Digits (0-9) (and a '.' if a real number) are converted to numbers
   7	Some obscure 'rounding' is done, so you have most of the times
   8	only 6 significant digits with an exponent part. (This is caused
   9	by the system predicate 'name'. If you want looonnnggg numbers
  10	then define digits as parts of words).
  11	(N.B. reals work only if '.' is not defined as 'stop-char' but
  12		'escape' will work in this case)
  13
  14    The reader is >>flexible<<, you can define yourself:
  15	- the character on which reading will stop
  16		(this character is escapable with \
  17		 to read a \ type this character twice!!)
  18	- the character(s) that make up a word (execpt the
  19	  characters A-Z, a-z that always make up words!!
  20	  and (real)-numbers that always are grouped together!!)
  21	- whether you want conversion of uppercase letters to
  22	  lowercase letters.
  23
  24    readln/1
  25	The default setting for readln/1 is
  26		- read up till newline
  27		- see underscore('_') and numbers 0-9 as part of words
  28		- make lowercase
  29
  30        - If nothing is read readln/1 succeeds with []
  31        - If an end_of_file is read readln/1 succeeds with [..|end_of_file]
  32
  33
  34    readln/5
  35	This predicate gives you the flexibility.
  36	It succeeds with arg1 = list of word&atoms
  37			 arg2 = Ascii code of last character
  38				(but '-1' in case of ^D).
  39	To change one or more of the defaults you have to
  40	instantiate argument3 and/or argument4 and/or argument5.
  41	 !! Uninstantiated arguments are defaulted !!
  42	- stop character(s):
  43		instantiate argument 3 with the list of ASCII code's
  44		of the desired stop characters (Note: you can also
  45		say: ".!?", what is equivalent to [46,33,63]).
  46	- word character(s):
  47		instantiate argument 4 with the list of ASCII code's
  48		of the desired word-part characters (Note: wou can also
  49		say: "", what is equivalent to [] ; i.e. no extra
  50		characters).
  51	- lowercase conversion:
  52		instantiate argument 5 with lowercase
  53
  54
  55Main predicates provided:
  56
  57    readln(P)		- Read a sentence up till NewLine and
  58			  unify <P> with the list of atoms/numbers
  59			  (identical to:
  60				 readln(P, [10],"_01213456789",uppercase).)
  61    readln(P, LastCh)   - idem as above but the second argument is unified
  62			  with the last character read (the ascii-code for
  63			  the stop-character or -1)
  64    readln(P, LastCh, Arg1, Arg2, Arg3)
  65			- idem as above but the default setting is changed
  66			  for the instantiated args:
  67			  Arg1: List of stop characters
  68			  Arg2: List of word_part characters
  69			  Arg3: uppercase/lowercase conversion
  70
  71Examples:
  72	read_sentence(P,Case) :-
  73		readln(P,_,".!?","_0123456789",Case).
  74
  75	read_in(P) :-				% with numbers as separate
  76		readln(P,Eof,_,"", _).	% entities.
  77
  78	read_atom(A) :-			% stop on newline,
  79		readln(A,_,_," ",_).		% space is part of word
  80
  81
  82   Author: Wouter Jansweijer
  83   Date: 26 april 1985
  84
  85   Modified: Jan Wielemaker
  86   Date: 19 feb 2001
  87
  88   Modernised a bit and fixed some end_of_file/-1 issues.  As we have modules
  89   since a while I removed the ugly $ stuff :-)
  90******************************************************************************/
  91
  92:- module(readln,
  93	  [ readln/1,			% -Line
  94	    readln/2,			% -Line, +EOL
  95	    readln/5			% See above
  96	  ]).  97:- use_module(library(lists)).  98:- license(swipl).  99
 100readln(Read) :-			% the default is read up to EOL
 101	string_codes("_0123456789", Arg2),
 102	rl_readln(Line, LastCh, [10], Arg2, uppercase),
 103	(   LastCh == -1
 104	->  append(Line,[end_of_file], Read)
 105	;   Read = Line
 106	).
 107
 108readln(Read, LastCh):-
 109	string_codes("_0123456789", Arg2),
 110	rl_readln(Read, LastCh, [10], Arg2, uppercase).
 111
 112readln(P, EOF, StopChars, WordChars, Case) :-
 113	(   var(StopChars)
 114	->  Arg1 = [10]
 115	;   Arg1 = StopChars
 116	),
 117	(   var(WordChars)
 118	->  string_codes("01234567890_", Arg2)
 119	;   Arg2 = WordChars
 120	),
 121	(   var(Case)
 122	->  Arg3 = lowercase
 123	;   Arg3 = Case
 124	),
 125	rl_readln(P, EOF, Arg1, Arg2, Arg3).
 126
 127rl_readln(P, EOF, StopChars, WordChars, Case) :-
 128	rl_initread(L, EOF, StopChars),
 129	rl_blanks(L, LL), !,
 130	rl_words(P, LL,[], options(WordChars, Case)), !.
 131
 132rl_initread(S, EOF, StopChars) :-
 133	get_code(K),
 134	rl_readrest(K, S, EOF, StopChars).
 135
 136rl_readrest(-1, [], end_of_file, _) :- !.
 137rl_readrest(0'\\, [K1|R], EOF, StopChars) :-
 138	get_code(K1),			% skip it, take next char
 139	get_code(K2),
 140	rl_readrest(K2, R, EOF, StopChars).
 141rl_readrest(K, [K], K, StopChars) :-	% the stop char(s)
 142	member(K, StopChars), !.
 143rl_readrest(K, [K|R], EOF, StopChars) :-	% the normal case
 144	get_code(K1),
 145	rl_readrest(K1, R, EOF, StopChars).
 146
 147rl_words([W|Ws], S1, S4, Options) :-
 148	rl_word(W, S1, S2, Options), !,
 149	rl_blanks(S2, S3),
 150	rl_words(Ws, S3, S4, Options).
 151rl_words([], S1, S2, _) :-
 152	rl_blanks(S1, S2), !.
 153rl_words([], S, S, _).
 154
 155rl_word(N, [46|S1], S3, _) :-		% the dot can be in the beginning of
 156	rl_basic_num(N1, S1, S2),	!,	% a real number.
 157	rl_basic_nums(Rest, S2, S3, dot),	% only ONE dot IN a number !!
 158	name(N,[48, 46, N1|Rest]).	% i.e '0.<number>'
 159rl_word(N, S0, S2, _) :-
 160	rl_basic_num(N1, S0, S1), !,
 161	rl_basic_nums(Rest, S1, S2, _),
 162	name(N,[N1|Rest]).
 163rl_word(W, S0, S2, Options) :-
 164	rl_basic_char(C1, S0, S1, Options), !,
 165	rl_basic_chars(Rest, S1, S2, Options),
 166	name(W, [C1|Rest]).
 167rl_word(P,[C|R], R, _) :-
 168	name(P, [C]), !.
 169
 170rl_basic_chars([A|As], S0, S2, Options) :-
 171	rl_basic_char(A, S0, S1, Options), !,
 172	rl_basic_chars(As, S1, S2, Options).
 173rl_basic_chars([], S, S, _).
 174
 175rl_basic_nums([46,N|As], [46|S1], S3, Dot) :- % a dot followed by >= one digit
 176	var(Dot),			% but not found a dot already
 177	rl_basic_num(N, S1, S2), !,
 178	rl_basic_nums(As, S2, S3, dot).
 179rl_basic_nums([A|As], S0, S2, Dot) :-
 180	rl_basic_num(A, S0, S1), !,
 181	rl_basic_nums(As, S1, S2, Dot).
 182rl_basic_nums([], S, S, _).
 183
 184rl_blanks([C|S0], S1) :-
 185	rl_blank(C), !,
 186	rl_blanks(S0, S1).
 187rl_blanks(S, S).
 188
 189/* Basic Character types that form rl_words together */
 190
 191rl_basic_char(A, [C|S], S, options(WordChars, Case)) :-
 192	rl_lc(C, A, WordChars, Case).
 193
 194rl_basic_num(N, [N|R], R) :-
 195	code_type(N, digit).
 196
 197rl_blank(X) :-
 198	code_type(X, space).
 199
 200rl_lc(X, X1, _, Case) :-
 201	code_type(X, upper), !,
 202	rl_fix_case(Case, X, X1).
 203rl_lc(X, X, _, _) :-
 204	code_type(X, lower).
 205rl_lc(X, X, WordChars, _) :-
 206	memberchk(X, WordChars).
 207
 208rl_fix_case(lowercase, U, L) :- !,
 209	code_type(L, lower(U)).
 210rl_fix_case(_, C, C).