:- ensure_loaded('$REGULUS/PrologLib/compatibility').

:- module(parse_xml_rec_result,
	[parse_xml_rec_result/2,
	 parse_xml_rec_result/3,
	 test_parse_xml_rec_result/1
	]).

%----------------------------------------------------------------------

:- use_module('$REGULUS/Prolog/strcat_semantics').
:- use_module('$REGULUS/Prolog/regulus_utilities').
:- use_module('$REGULUS/PrologLib/utilities').

:- use_module(library(lists)).
:- use_module(library(xml)).

%----------------------------------------------------------------------

test_parse_xml_rec_result(discard_sem) :-
	File = '$REGULUS/Examples/Calendar/doc/4-best.txt',
	absolute_file_name(File, AbsFile),
	read_file_to_string(AbsFile, String),
	atom_codes(Atom, String),
	format('~N~n~w~n~n', [Atom]),
	parse_xml_rec_result(String, Result),
	prettyprintq(Result).
test_parse_xml_rec_result(discard_sem_n9_1) :-
	File = '$DEME/doc/rec_result_1.txt',
	absolute_file_name(File, AbsFile),
	read_file_to_string(AbsFile, String),
	atom_codes(Atom, String),
	format('~N~n~w~n~n', [Atom]),
	parse_xml_rec_result(String, Result),
	prettyprintq(Result).
test_parse_xml_rec_result(discard_sem_n9_2) :-
	File = '$DEME/doc/rec_result_2.txt',
	absolute_file_name(File, AbsFile),
	read_file_to_string(AbsFile, String),
	atom_codes(Atom, String),
	format('~N~n~w~n~n', [Atom]),
	parse_xml_rec_result(String, Result),
	prettyprintq(Result).
test_parse_xml_rec_result(discard_sem_n9_2a) :-
	File = '$DEME/doc/rec_result_2a.txt',
	absolute_file_name(File, AbsFile),
	read_file_to_string(AbsFile, String),
	atom_codes(Atom, String),
	format('~N~n~w~n~n', [Atom]),
	parse_xml_rec_result(String, Result),
	prettyprintq(Result).
test_parse_xml_rec_result(n9_with_sem_1) :-
	File = '$DEME/doc/n9_with_sem_1.txt',
	absolute_file_name(File, AbsFile),
	read_file_to_string(AbsFile, String),
	atom_codes(Atom, String),
	format('~N~n~w~n~n', [Atom]),
	parse_xml_rec_result(String, keep_sem, Result),
	prettyprintq(Result).
test_parse_xml_rec_result(keep_sem) :-
	File = '$MED_SLT2/Runtime/Prolog/n_best_xml_example.txt',
	absolute_file_name(File, AbsFile),
	read_file_to_string(AbsFile, String),
	atom_codes(Atom, String),
	format('~N~n~w~n~n', [Atom]),
	parse_xml_rec_result(String, keep_sem, Result),
	prettyprintq(Result).
test_parse_xml_rec_result(slm) :-
	File = '$MED_SLT2/Runtime/Prolog/slm_xml_example.txt',
	absolute_file_name(File, AbsFile),
	read_file_to_string(AbsFile, String),
	atom_codes(Atom, String),
	format('~N~n~w~n~n', [Atom]),
	parse_xml_rec_result(String, Result),
	prettyprintq(Result).

%----------------------------------------------------------------------

parse_xml_rec_result(String, Result) :-
	parse_xml_rec_result(String, discard_sem, Result).

%----------------------------------------------------------------------

parse_xml_rec_result(String, SemTreatment, HypsList) :-
	xml_parse_or_complain(String, XMLTerm),
	replace_strings_with_atoms_in_xml(XMLTerm, XMLTerm1),
	xml_term_to_hyps_list(XMLTerm1, SemTreatment, HypsList),
	!.
parse_xml_rec_result(String, SemTreatment, Result) :-
	format2error('~N*** Error: bad call: ~w~n', [parse_xml_rec_result(String, SemTreatment, Result)]),
	fail.

xml_parse_or_complain(String, XMLTerm) :-
	xml_parse(String, XMLTerm),
	!.
xml_parse_or_complain(String, _XMLTerm) :-
	format2error('~N*** Error: unable to parse XML string to Prolog: ~s~n', [String]),
	fail.
	       
xml_term_to_hyps_list(XMLTerm, SemTreatment, HypsList) :-
	get_interpretation_list_from_result(XMLTerm, InterpretationList),
	interpretation_list_to_hyps_list(InterpretationList, SemTreatment, HypsList).

% Format has changed a couple of times. Try to handle all variants.
get_interpretation_list_from_result(XMLTerm, InterpretationList) :-
	XMLTerm = xml(_VersionInfo, 
		      [element(result,
			       _GrammarInfo,
			       InterpretationList)]),
	!.
get_interpretation_list_from_result(XMLTerm, InterpretationList) :-
	XMLTerm = xml([], 
		      [_VersionInfo, 
		       element(result, 
			       _GrammarInfo, 
			       InterpretationList)]),
	!.
get_interpretation_list_from_result(XMLTerm, _InterpretationList) :-
	format2error('~N*** Error: unable to find interpretation list in Prolog XML result:~n', []),
	prettyprint(XMLTerm),
	fail.

interpretation_list_to_hyps_list([], _SemTreatment, []).
interpretation_list_to_hyps_list([F | R], SemTreatment, [F1 | R1]) :-
	interpretation_to_hyp(F, SemTreatment, F1),
	!,
	interpretation_list_to_hyps_list(R, SemTreatment, R1).

/*
element(interpretation, [confidence=59], 
                      [element(instance, [], 
                               [element(value, [confidence=0], 
                                        ['\n                ((whq (apply (lambda x ((tense present) (existential there_is) (subj (x (duration ((spec next) (time_period week))))))) (\n            '])]),
                       element(input, 
                               [(mode)=speech, confidence=65, 
                                'timestamp-start'='2007-07-26T11:21:06.749', 
                                'timestamp-end'='2007-07-26T11:21:06.921'],
                               ['\n            what meetings are there next week\n            ',
                                element(extensions, [], 
                                        ['\n                ', 
                                         element('word-confidence',[],[' 58 64 20 77 67 70 ']),
                                         '\n            ']),
                                '\n        ']),
                       element(extensions, [], 
                               [element(probability,[],[' -3069 ']), 
                                element('nl-probability',[],[' 0 ']), 
                                element('necessary-word-confidence',[],[' 0 '])])]),
*/

% Format for dialogue server (Nuance 8.5)
interpretation_to_hyp(Interpretation, discard_sem, Hyp) :-
	Interpretation = element(interpretation, [confidence=Confidence], 
				 [_RecResultElement,
				  element(input, 
					  _ModeInfo,
					  [RawRecStringAtom | _Rest]) |
				  _OtherElements]),
	split_atom_into_words(RawRecStringAtom, Words),
	join_with_spaces(Words, RecStringAtom),
	Hyp = nbest_hyp(Confidence, RecStringAtom).
% Format for dialogue server (Nuance 9, discard semantics)
interpretation_to_hyp(Interpretation, discard_sem, Hyp) :-
	Interpretation = element(interpretation, [grammar=_Grammar, confidence=Confidence], 
				 [element(input, 
					  _ModeInfo,
					  [RawRecStringAtom0 | _Rest]) |
				 _OtherElements]),
	replace_double_quotes_with_single(RawRecStringAtom0, RawRecStringAtom),
	split_atom_into_words(RawRecStringAtom, Words),
	join_with_spaces(Words, RecStringAtom),
	Hyp = nbest_hyp(Confidence, RecStringAtom).
/*
[element(interpretation, 
        [(grammar = 'session:testgrammar@paideiacomputing.com'), (confidence=0.81)],
        [element(input,[(mode)=speech],['grab the gun']), 
         element(instance, [], 
                 [element('SWI_literal', [], 
                          ['grab the gun']),
                  element(value, [confidence=81], 
                          ['concat([], concat([[imp, concat([[tense, imperative], [subj, [[spec,pro], [head, you]]]], concat(concat(concat([[verb_type, trans]], [[verb, grab]]), [[obj, concat(concat([[spec, the_sing]], [[head, gun]]), [])]]), []))]], []))']),
                  element('SWI_grammarName', [], 
                          ['session:testgrammar@paideiacomputing.com']),
                  element('SWI_meaning', [], 
                          ['{value:concat([], concat([[imp, concat([[tense, imperative], [subj, [[spec, pro], [head, you]]]], concat(concat(concat([[verb_type, trans]], [[verb, grab]]), [[obj, concat(concat([[spec, the_sing]], [[head, gun]]), [])]]), []))]], []))}'])])]),
*/
% Format for dialogue server (Nuance 9, strcat semantics)
interpretation_to_hyp(Interpretation, keep_sem_strcat, Hyp) :-
	Interpretation = element(interpretation, [grammar=_Grammar, confidence=Confidence], 
				 [element(input, 
					  _ModeInfo,
					  [RawRecStringAtom0 | _Rest]) |
				 OtherElements]),
	using_strcat_semantics,
	replace_double_quotes_with_single(RawRecStringAtom0, RawRecStringAtom),
	split_atom_into_words(RawRecStringAtom, Words),
	join_with_spaces(Words, RecStringAtom),
	member(element(instance, _, InstanceElements), OtherElements),
	member(element('SWI_meaning', _, [RawSem]), InstanceElements),
	unpack_strcat_semantics_from_mrcp(RawSem, LF),
	Hyp = nbest_hyp(Confidence, string_and_lf(RecStringAtom, LF)).

% Format for translation server
interpretation_to_hyp(Interpretation, keep_sem, Hyp) :-
	Interpretation = element(interpretation, [confidence=Confidence], 
				 [element(instance, _, 
					  [element(value, _MoreConfInfo, [LispFormattedSemAtom])]),
				  element(input, 
					  _ModeInfo,
					  [RawRecStringAtom | _Rest]) |
				  _OtherElements]),
	split_atom_into_words(RawRecStringAtom, Words),
	join_with_spaces(Words, RecStringAtom),
	parse_lisp_formatted_sem_value(LispFormattedSemAtom, LF),
	Hyp = hyp(RecStringAtom, Confidence, LF).
	
%----------------------------------------------------------------------

replace_strings_with_atoms_in_xml(Atom, Atom) :-
	atomic(Atom),
	!.
replace_strings_with_atoms_in_xml(pcdata(String), Atom) :-
	is_prolog_string(String),
	(   safe_number_codes(Atom, String)
	;
	    atom_codes(Atom, String)
	),
	!.
replace_strings_with_atoms_in_xml(String, Atom) :-
	is_prolog_string(String),
	(   safe_number_codes(Atom, String)
	;
	    atom_codes(Atom, String)
	),
	!.
replace_strings_with_atoms_in_xml(Term, Term1) :-
	functor(Term, F, N),
	functor(Term1, F, N),
	replace_strings_with_atoms_in_xml_args(N, Term, Term1).

replace_strings_with_atoms_in_xml_args(I, _Term, _Term1) :-
	I =< 0,
	!.
replace_strings_with_atoms_in_xml_args(I, Term, Term1) :-
	I > 0,
	arg(I, Term, Arg),
	arg(I, Term1, Arg1),
	replace_strings_with_atoms_in_xml(Arg, Arg1),
	I1 is I - 1,
	!,
	replace_strings_with_atoms_in_xml_args(I1, Term, Term1).

%----------------------------------------------------------------------

parse_lisp_formatted_sem_value(LispFormattedSemAtom, LF) :-
	atom_codes(LispFormattedSemAtom, LispFormattedSemString),
	parse_s_expression(LispFormattedSemString, LF),
	!.
parse_lisp_formatted_sem_value(LispFormattedSemAtom, LF) :-
	format2error('~N*** Error: bad call: ~w~n',
		     [parse_lisp_formatted_sem_value(LispFormattedSemAtom, LF)]),
	fail.

parse_s_expression(LispFormattedString, LF) :-
	s_expression(LF, LispFormattedString, []),
	!.
parse_s_expression(LispFormattedString, _LF) :-
	format2error('~N*** Error: unable to treat "~s" as S-expression~n',
		     [LispFormattedString]),
	fail.

s_expression(S) -->
	optional_whitespaces,
	!,
	s_expression1(S),
	optional_whitespaces.

s_expression1(List) -->
	"(",
	!,
	s_expression_list(List),
	")".
s_expression1(Word) -->
	word(Word).

word(Word) -->
	non_special_char_sequence(Chars),
	{Chars \== []},
	{atom_codes(Word, Chars)},
	!.

s_expression_list([F | R]) -->
	s_expression(F),
	!,
	s_expression_list(R).
s_expression_list([]) -->
	optional_whitespaces,
	!.

optional_whitespaces -->
	[F],
	{whitespace_char(F)},
	!,
	optional_whitespaces.
optional_whitespaces -->
	[].

non_special_char_sequence([F | R]) -->
	[F],
	{non_special_char(F)},
	!,
	non_special_char_sequence(R).
non_special_char_sequence([]) -->
	[].

non_special_char(X) :-
	\+ special_char(X).

special_char(0'() :-
	!.
special_char(0')) :-
	!.
special_char(X) :-
	whitespace_char(X).

replace_double_quotes_with_single(Atom, Atom1) :-
	atom_codes(Atom, Str),
	replace_double_quotes_with_single_str(Str, Str1),
	atom_codes(Atom1, Str1).

replace_double_quotes_with_single_str([], []).
replace_double_quotes_with_single_str([F | R], [F1 | R1]) :-
	(   F = 0'" ->
	    F1 = 0'\'
	;
	    F1 = F
	),
	!,
	replace_double_quotes_with_single_str(R, R1).