View source as raw
   1/*  $Id$
   2
   3    Read a sentence from the current input stream and convert it
   4    into a list of atoms and numbers.
   5    Letters(A-Z, a-z) are converted to atoms
   6    Digits (0-9) (and a '.' if a real number) are converted to numbers
   7	Some obscure 'rounding' is done, so you have most of the times
   8	only 6 significant digits with an exponent part. (This is caused
   9	by the system predicate 'name'. If you want looonnnggg numbers
  10	then define digits as parts of words).
  11	(N.B. reals work only if '.' is not defined as 'stop-char' but
  12		'escape' will work in this case)
  13
  14    The reader is >>flexible<<, you can define yourself:
  15	- the character on which reading will stop
  16		(this character is escapable with \
  17		 to read a \ type this character twice!!)
  18	- the character(s) that make up a word (execpt the
  19	  characters A-Z, a-z that always make up words!!
  20	  and (real)-numbers that always are grouped together!!)
  21	- whether you want conversion of uppercase letters to
  22	  lowercase letters.
  23
  24    readln/1
  25	The default setting for readln/1 is
  26		- read up till newline
  27		- see underscore('_') and numbers 0-9 as part of words
  28		- make lowercase
  29
  30        - If nothing is read readln/1 succeeds with []
  31        - If an end_of_file is read readln/1 succeeds with [..|end_of_file]
  32
  33
  34    readln/5
  35	This predicate gives you the flexibility.
  36	It succeeds with arg1 = list of word&atoms
  37			 arg2 = Ascii code of last character
  38				(but '-1' in case of ^D).
  39	To change one or more of the defaults you have to
  40	instantiate argument3 and/or argument4 and/or argument5.
  41	 !! Uninstantiated arguments are defaulted !!
  42	- stop character(s):
  43		instantiate argument 3 with the list of ASCII code's
  44		of the desired stop characters (Note: you can also
  45		say: ".!?", what is equivalent to [46,33,63]).
  46	- word character(s):
  47		instantiate argument 4 with the list of ASCII code's
  48		of the desired word-part characters (Note: wou can also
  49		say: "", what is equivalent to [] ; i.e. no extra
  50		characters).
  51	- lowercase conversion:
  52		instantiate argument 5 with lowercase
  53
  54
  55Main predicates provided:
  56
  57    readln(P)		- Read a sentence up till NewLine and
  58			  unify <P> with the list of atoms/numbers
  59			  (identical to:
  60				 readln(P, [10],"_01213456789",uppercase).)
  61    readln(P, LastCh)   - idem as above but the second argument is unified
  62			  with the last character read (the ascii-code for
  63			  the stop-character or -1)
  64    readln(P, LastCh, Arg1, Arg2, Arg3)
  65			- idem as above but the default setting is changed
  66			  for the instantiated args:
  67			  Arg1: List of stop characters
  68			  Arg2: List of word_part characters
  69			  Arg3: uppercase/lowercase conversion
  70
  71Examples:
  72	read_sentence(P,Case) :-
  73		readln(P,_,".!?","_0123456789",Case).
  74
  75	read_in(P) :-				% with numbers as separate
  76		readln(P,Eof,_,"", _).	% entities.
  77
  78	read_atom(A) :-			% stop on newline,
  79		readln(A,_,_," ",_).		% space is part of word
  80
  81
  82   Author: Wouter Jansweijer
  83   Date: 26 april 1985
  84
  85   Modified: Jan Wielemaker
  86   Date: 19 feb 2001
  87
  88   Modernised a bit and fixed some end_of_file/-1 issues.  As we have modules
  89   since a while I removed the ugly $ stuff :-)
  90******************************************************************************/
  91
  92:- module(readln,
  93	  [ readln/1,			% -Line
  94	    readln/2,			% -Line, +EOL
  95	    readln/5			% See above
  96	  ]).  97:- use_module(library(lists)).  98:- license(swipl).  99
 100readln(Read) :-			% the default is read up to EOL
 101	rl_readln(Line, LastCh, [10], "_0123456789", uppercase),
 102	(   LastCh == -1
 103	->  append(Line,[end_of_file], Read)
 104	;   Read = Line
 105	).
 106
 107readln(Read, LastCh):-
 108	rl_readln(Read, LastCh, [10], "_0123456789", uppercase).
 109
 110readln(P, EOF, StopChars, WordChars, Case) :-
 111	(   var(StopChars)
 112	->  Arg1 = [10]
 113	;   Arg1 = StopChars
 114	),
 115	(   var(WordChars)
 116	->  Arg2 = "01234567890_"
 117	;   Arg2 = WordChars
 118	),
 119	(   var(Case)
 120	->  Arg3 = lowercase
 121	;   Arg3 = Case
 122	),
 123	rl_readln(P, EOF, Arg1, Arg2, Arg3).
 124
 125rl_readln(P, EOF, StopChars, WordChars, Case) :-
 126	rl_initread(L, EOF, StopChars),
 127	rl_blanks(L, LL), !,
 128	rl_words(P, LL,[], options(WordChars, Case)), !.
 129
 130rl_initread(S, EOF, StopChars) :-
 131	get0(K),
 132	rl_readrest(K, S, EOF, StopChars).
 133
 134rl_readrest(-1, [], end_of_file, _) :- !.
 135rl_readrest(0'\\, [K1|R], EOF, StopChars) :-
 136	get0(K1),			% skip it, take next char
 137	get0(K2),
 138	rl_readrest(K2, R, EOF, StopChars).
 139rl_readrest(K, [K], K, StopChars) :-	% the stop char(s)
 140	member(K, StopChars), !.
 141rl_readrest(K, [K|R], EOF, StopChars) :-	% the normal case
 142	get0(K1),
 143	rl_readrest(K1, R, EOF, StopChars).
 144
 145rl_words([W|Ws], S1, S4, Options) :-
 146	rl_word(W, S1, S2, Options), !,
 147	rl_blanks(S2, S3),
 148	rl_words(Ws, S3, S4, Options).
 149rl_words([], S1, S2, _) :-
 150	rl_blanks(S1, S2), !.
 151rl_words([], S, S, _).
 152
 153rl_word(N, [46|S1], S3, _) :-		% the dot can be in the beginning of
 154	rl_basic_num(N1, S1, S2),	!,	% a real number.
 155	rl_basic_nums(Rest, S2, S3, dot),	% only ONE dot IN a number !!
 156	name(N,[48, 46, N1|Rest]).	% i.e '0.<number>'
 157rl_word(N, S0, S2, _) :-
 158	rl_basic_num(N1, S0, S1), !,
 159	rl_basic_nums(Rest, S1, S2, _),
 160	name(N,[N1|Rest]).
 161rl_word(W, S0, S2, Options) :-
 162	rl_basic_char(C1, S0, S1, Options), !,
 163	rl_basic_chars(Rest, S1, S2, Options),
 164	name(W, [C1|Rest]).
 165rl_word(P,[C|R], R, _) :-
 166	name(P, [C]), !.
 167
 168rl_basic_chars([A|As], S0, S2, Options) :-
 169	rl_basic_char(A, S0, S1, Options), !,
 170	rl_basic_chars(As, S1, S2, Options).
 171rl_basic_chars([], S, S, _).
 172
 173rl_basic_nums([46,N|As], [46|S1], S3, Dot) :- % a dot followed by >= one digit
 174	var(Dot),			% but not found a dot already
 175	rl_basic_num(N, S1, S2), !,
 176	rl_basic_nums(As, S2, S3, dot).
 177rl_basic_nums([A|As], S0, S2, Dot) :-
 178	rl_basic_num(A, S0, S1), !,
 179	rl_basic_nums(As, S1, S2, Dot).
 180rl_basic_nums([], S, S, _).
 181
 182rl_blanks([C|S0], S1) :-
 183	rl_blank(C), !,
 184	rl_blanks(S0, S1).
 185rl_blanks(S, S).
 186
 187/* Basic Character types that form rl_words together */
 188
 189rl_basic_char(A, [C|S], S, options(WordChars, Case)) :-
 190	rl_lc(C, A, WordChars, Case).
 191
 192rl_basic_num(N, [N|R], R) :-
 193	code_type(N, digit).
 194
 195rl_blank(X) :-
 196	code_type(X, space).
 197
 198rl_lc(X, X1, _, Case) :-
 199	code_type(X, upper), !,
 200	rl_fix_case(Case, X, X1).
 201rl_lc(X, X, _, _) :-
 202	code_type(X, lower).
 203rl_lc(X, X, WordChars, _) :-
 204	memberchk(X, WordChars).
 205
 206rl_fix_case(lowercase, U, L) :- !,
 207	code_type(L, lower(U)).
 208rl_fix_case(_, C, C)