View source with raw comments or as raw
   1/*  Part of SWI-Prolog
   2
   3    Author:        Wouter Jansweijer and Jan Wielemaker
   4    E-mail:        J.Wielemaker@vu.nl
   5    WWW:           http://www.swi-prolog.org
   6    Copyright (c)  1985-2013, University of Amsterdam
   7    All rights reserved.
   8
   9    Redistribution and use in source and binary forms, with or without
  10    modification, are permitted provided that the following conditions
  11    are met:
  12
  13    1. Redistributions of source code must retain the above copyright
  14       notice, this list of conditions and the following disclaimer.
  15
  16    2. Redistributions in binary form must reproduce the above copyright
  17       notice, this list of conditions and the following disclaimer in
  18       the documentation and/or other materials provided with the
  19       distribution.
  20
  21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  32    POSSIBILITY OF SUCH DAMAGE.
  33*/
  34
  35:- module(readln,
  36          [ readln/1,                   % -Line
  37            readln/2,                   % -Line, +EOL
  38            readln/5                    % See above
  39          ]).
  40:- use_module(library(lists)).
  41
  42/** <module> Read line as list of tokens
  43
  44Read a sentence from the current input stream and convert it into a list
  45of atoms and numbers:
  46
  47    - Letters(A-Z, a-z) are converted to atoms
  48    - Digits (0-9) (and a '.' if a real number) are converted to numbers
  49        Some obscure 'rounding' is done, so you have most of the times
  50        only 6 significant digits with an exponent part. (This is caused
  51        by the system predicate 'name'. If you want looonnnggg numbers
  52        then define digits as parts of words).
  53        (N.B. reals work only if '.' is not defined as 'stop-char' but
  54                'escape' will work in this case)
  55
  56    The reader is _flexible_, you can define yourself:
  57
  58        - the character on which reading will stop
  59                (this character is escapable with \
  60                 to read a \ type this character twice!!)
  61        - the character(s) that make up a word (execpt the
  62          characters A-Z, a-z that always make up words!!
  63          and (real)-numbers that always are grouped together!!)
  64        - whether you want conversion of uppercase letters to
  65          lowercase letters.
  66
  67    readln/1
  68        The default setting for readln/1 is
  69                - read up till newline
  70                - see underscore('_') and numbers 0-9 as part of words
  71                - make lowercase
  72
  73        - If nothing is read readln/1 succeeds with []
  74        - If an end_of_file is read readln/1 succeeds with [..|end_of_file]
  75
  76
  77    readln/5
  78        This predicate gives you the flexibility.
  79        It succeeds with arg1 = list of word&atoms
  80                         arg2 = Ascii code of last character
  81                                (but '-1' in case of ^D).
  82        To change one or more of the defaults you have to
  83        instantiate argument3 and/or argument4 and/or argument5.
  84         !! Uninstantiated arguments are defaulted !!
  85        - stop character(s):
  86                instantiate argument 3 with the list of ASCII code's
  87                of the desired stop characters (Note: you can also
  88                say: ".!?", what is equivalent to [46,33,63]).
  89        - word character(s):
  90                instantiate argument 4 with the list of ASCII code's
  91                of the desired word-part characters (Note: wou can also
  92                say: "", what is equivalent to [] ; i.e. no extra
  93                characters).
  94        - lowercase conversion:
  95                instantiate argument 5 with lowercase
  96
  97
  98Main predicates provided:
  99
 100    readln(P)           - Read a sentence up till NewLine and
 101                          unify <P> with the list of atoms/numbers
 102                          (identical to:
 103                                 readln(P, [10],"_01213456789",uppercase).)
 104    readln(P, LastCh)   - idem as above but the second argument is unified
 105                          with the last character read (the ascii-code for
 106                          the stop-character or -1)
 107    readln(P, LastCh, Arg1, Arg2, Arg3)
 108                        - idem as above but the default setting is changed
 109                          for the instantiated args:
 110                          Arg1: List of stop characters
 111                          Arg2: List of word_part characters
 112                          Arg3: uppercase/lowercase conversion
 113
 114Examples:
 115        read_sentence(P,Case) :-
 116                readln(P,_,".!?","_0123456789",Case).
 117
 118        read_in(P) :-                           % with numbers as separate
 119                readln(P,Eof,_,"", _).  % entities.
 120
 121        read_atom(A) :-                 % stop on newline,
 122                readln(A,_,_," ",_).            % space is part of word
 123
 124@deprecated Old code. Not maintained and probably not at the
 125        right level of abstraction.  Not locale support.
 126@see    library(readutil), nlp package.
 127*/
 128
 129
 130readln(Read) :-                 % the default is read up to EOL
 131    string_codes("_0123456789", Arg2),
 132    rl_readln(Line, LastCh, [10], Arg2, uppercase),
 133    (   LastCh == -1
 134    ->  append(Line,[end_of_file], Read)
 135    ;   Read = Line
 136    ).
 137
 138readln(Read, LastCh):-
 139    string_codes("_0123456789", Arg2),
 140    rl_readln(Read, LastCh, [10], Arg2, uppercase).
 141
 142readln(P, EOF, StopChars, WordChars, Case) :-
 143    (   var(StopChars)
 144    ->  Arg1 = [10]
 145    ;   Arg1 = StopChars
 146    ),
 147    (   var(WordChars)
 148    ->  string_codes("01234567890_", Arg2)
 149    ;   Arg2 = WordChars
 150    ),
 151    (   var(Case)
 152    ->  Arg3 = lowercase
 153    ;   Arg3 = Case
 154    ),
 155    rl_readln(P, EOF, Arg1, Arg2, Arg3).
 156
 157rl_readln(P, EOF, StopChars, WordChars, Case) :-
 158    rl_initread(L, EOF, StopChars),
 159    rl_blanks(L, LL),
 160    !,
 161    rl_words(P, LL,[], options(WordChars, Case)),
 162    !.
 163
 164rl_initread(S, EOF, StopChars) :-
 165    get_code(K),
 166    rl_readrest(K, S, EOF, StopChars).
 167
 168rl_readrest(-1, [], end_of_file, _) :- !.
 169rl_readrest(0'\\, [K1|R], EOF, StopChars) :-
 170    get_code(K1),                   % skip it, take next char
 171    get_code(K2),
 172    rl_readrest(K2, R, EOF, StopChars).
 173rl_readrest(K, [K], K, StopChars) :-    % the stop char(s)
 174    member(K, StopChars),
 175    !.
 176rl_readrest(K, [K|R], EOF, StopChars) :-        % the normal case
 177    get_code(K1),
 178    rl_readrest(K1, R, EOF, StopChars).
 179
 180rl_words([W|Ws], S1, S4, Options) :-
 181    rl_word(W, S1, S2, Options),
 182    !,
 183    rl_blanks(S2, S3),
 184    rl_words(Ws, S3, S4, Options).
 185rl_words([], S1, S2, _) :-
 186    rl_blanks(S1, S2),
 187    !.
 188rl_words([], S, S, _).
 189
 190rl_word(N, [46|S1], S3, _) :-           % the dot can be in the beginning of
 191    rl_basic_num(N1, S1, S2),        % a real number.
 192    !,
 193    rl_basic_nums(Rest, S2, S3, dot),       % only ONE dot IN a number !!
 194    name(N,[48, 46, N1|Rest]).      % i.e '0.<number>'
 195rl_word(N, S0, S2, _) :-
 196    rl_basic_num(N1, S0, S1),
 197    !,
 198    rl_basic_nums(Rest, S1, S2, _),
 199    name(N,[N1|Rest]).
 200rl_word(W, S0, S2, Options) :-
 201    rl_basic_char(C1, S0, S1, Options),
 202    !,
 203    rl_basic_chars(Rest, S1, S2, Options),
 204    name(W, [C1|Rest]).
 205rl_word(P,[C|R], R, _) :-
 206    name(P, [C]),
 207    !.
 208
 209rl_basic_chars([A|As], S0, S2, Options) :-
 210    rl_basic_char(A, S0, S1, Options),
 211    !,
 212    rl_basic_chars(As, S1, S2, Options).
 213rl_basic_chars([], S, S, _).
 214
 215rl_basic_nums([46,N|As], [46|S1], S3, Dot) :- % a dot followed by >= one digit
 216    var(Dot),                       % but not found a dot already
 217    rl_basic_num(N, S1, S2),
 218    !,
 219    rl_basic_nums(As, S2, S3, dot).
 220rl_basic_nums([A|As], S0, S2, Dot) :-
 221    rl_basic_num(A, S0, S1),
 222    !,
 223    rl_basic_nums(As, S1, S2, Dot).
 224rl_basic_nums([], S, S, _).
 225
 226rl_blanks([C|S0], S1) :-
 227    rl_blank(C),
 228    !,
 229    rl_blanks(S0, S1).
 230rl_blanks(S, S).
 231
 232/* Basic Character types that form rl_words together */
 233
 234rl_basic_char(A, [C|S], S, options(WordChars, Case)) :-
 235    rl_lc(C, A, WordChars, Case).
 236
 237rl_basic_num(N, [N|R], R) :-
 238    code_type(N, digit).
 239
 240rl_blank(X) :-
 241    code_type(X, space).
 242
 243rl_lc(X, X1, _, Case) :-
 244    code_type(X, upper),
 245    !,
 246    rl_fix_case(Case, X, X1).
 247rl_lc(X, X, _, _) :-
 248    code_type(X, lower).
 249rl_lc(X, X, WordChars, _) :-
 250    memberchk(X, WordChars).
 251
 252rl_fix_case(lowercase, U, L) :-
 253    !,
 254    code_type(L, lower(U)).
 255rl_fix_case(_, C, C).