% Parses lines from STDIN.
% Requires SWI-Prolog (tested with v6.1.4).
%
% Usage:
%
% > echo "Mary is a friend of Mary ." | swipl -f parse.pl -g main -t halt -q
% OK: [Mary,is,a,friend of,Mary,.]
%
% Author: Kaarel Kaljurand
% Version: 2012-05-05

% The readutil-library provides 'read_line_to_codes'.
% According to the SWI-Prolog manual, the predicates of this library
% perform better if the clib-package is installed because in this case
% the C implementation of these predicates is used.
:- use_module(library(readutil)).

% The encoding of this file.
:- encoding(utf8).

% The default encoding of text-streams.
:- set_prolog_flag(encoding, utf8).

:- style_check(-singleton).
:- consult(grammar_dcg).
:- style_check(+singleton).

main :-
	prompt(_, ''),
	%set_stream_encoding(user_input, utf8),
	%set_stream_encoding(user_output, utf8),
	main_loop.

main_loop :-
	read_line_to_codes(user_input, Codes),
	Codes \= end_of_file,
	phrase(tokens(Tokens), Codes),
	parse(Tokens),
	main_loop ; true.

% We don't care if there are multiple parses,
% but there shouldn't be anyway.
parse(Tokens) :-
	phrase(text(_, _, []/_), Tokens),
	!,
	format_tokens('OK', Tokens).

parse(Tokens) :-
	format_tokens('FAIL', Tokens).


% There must always be whitespace between two tokens.
tokens([]) --> [].
tokens([T]) --> token(T).
tokens([T1, T2 | Ts]) -->
	ws(_), token(T1), ws(1),
	!,
	tokens([T2 | Ts]).


ws(1) --> [C], { code_type(C, space) }, ws(_).
ws(0) --> [].


token('asked by') --> "asked by".
token('man of') --> "man of".
token('woman of') --> "woman of".
token('friend of') --> "friend of".
token('the man') --> "the man".
token('the woman') --> "the woman".
token('the friend') --> "the friend".
token('at least') --> "at least".
token('at most') --> "at most".
token('more than') --> "more than".
token('less than') --> "less than".
token('it is false that') --> "it is false that".
token('there is') --> "there is".
token('there are') --> "there are".
token('for every') --> "for every".
token('does not') --> "does not".
token('do not') --> "do not".
token('is not') --> "is not".
token('are not') --> "are not".
token(T) --> word(T).

word(Word) --> letters([C | Cs]), { atom_codes(Word, [C | Cs]) }.

letters([C | Cs]) --> letter(C), letters(Cs).
letters([]) --> [].

% letter//1 consumes possibly several codes rewriting them into a single code.
letter(C) --> [C], { \+ code_type(C, space) }.


%% set_stream_encoding(+Stream, +Enc)
%
% Sets the encoding of the given stream.
%
% @param Enc is in {utf8, ...}
%
set_stream_encoding(Stream, Enc) :-
	set_stream(Stream, encoding(Enc)).


format_tokens(Tag, Tokens) :-
	atomic_list_concat(Tokens, '  ', TokensAsAtom),
	format('~w: ~w~n', [Tag, TokensAsAtom]).