:- ensure_loaded('$REGULUS/PrologLib/compatibility').

%======================================================================

:- module(amt_utilities,
	  [amt_csv_file_to_list_of_alists/2,
	   amt_csv_file_to_list_of_alists/4,
	   amt_csv_file_to_list_of_alists/5,
	   reorganise_cvs_file/5,
	   reorganise_cvs_file/6,

	   check_csv_file_for_duplicate_lines/3]
    ).

:- use_module('$REGULUS/PrologLib/utilities').

:- use_module(library(lists)).

%======================================================================

amt_csv_file_to_list_of_alists(InFile, FinalContents) :-
	amt_csv_file_to_list_of_alists(InFile, 0'", 0',, FinalContents).  %" To keep Emacs happy

amt_csv_file_to_list_of_alists(InFile, DelimiterChar, SeparatorChar, FinalContents) :-
	amt_csv_file_to_list_of_alists(InFile, DelimiterChar, SeparatorChar, default_encoding, FinalContents).

amt_csv_file_to_list_of_alists(InFile, DelimiterChar, SeparatorChar, Encoding, FinalContents) :-
	read_batch_file(InFile, DelimiterChar, SeparatorChar, Encoding, Header, Contents1),
	parse_header(Header, ParsedHeader),
	tag_contents(Contents1, ParsedHeader, FinalContents),
	!.

%======================================================================

reorganise_cvs_file(InFile, NCopiesPerLine, Header, Filler, OutFile) :-
	reorganise_cvs_file(InFile, default_encoding, NCopiesPerLine, Header, Filler, OutFile).

reorganise_cvs_file(InFile, Encoding, NCopiesPerLine, Header, Filler, OutFile) :-
	safe_absolute_file_name(InFile, AbsInFile),
	safe_absolute_file_name(OutFile, AbsOutFile),

	csv_file_to_list_of_lists(AbsInFile, Encoding, 0'", 0',, InList), %" To keep Emacs happy
	length(InList, NIn),
	format('~N--- Read CSV file (~d records) ~w~n', [NIn, AbsInFile]),

	warn_if_bad_header_or_filler(InList, Header, header),
	warn_if_bad_header_or_filler(InList, Filler, filler),

	full_header(NCopiesPerLine, 1, Header, FullHeader-[]),
	       
	reorganise_cvs_list(InList, NCopiesPerLine, Filler, OutList),

	length(OutList, NOut),
	list_of_lists_to_csv_file([FullHeader | OutList], AbsOutFile, Encoding),
	format('~N--- Written CSV file (~d records, ~d items per line) ~w~n', [NOut, NCopiesPerLine, AbsOutFile]),
	!.
reorganise_cvs_file(InFile, Encoding, NCopiesPerLine, Header, Filler, OutFile) :-
	format('~N*** Error: bad call: ~w~n',
	       [reorganise_cvs_file(InFile, Encoding, NCopiesPerLine, Header, Filler, OutFile)]),
	fail.

warn_if_bad_header_or_filler([], _Filler, _Type).
warn_if_bad_header_or_filler(InList, Filler, Type) :-
	InList = [FirstItem | _],
	length(FirstItem, N),
	(   ( is_list_of_atoms(Filler), length(Filler, N) ) ->
	    true
	;
	    format('~N*** Error: bad value for ~w "~w". Should be list with ~d atomic elements', [Type, Filler, N]),
	    fail
	).

full_header(N, _I, _Header, In-In) :-
	N =< 0,
	!.
full_header(N, I, Header, In-Out) :-
	add_number_to_header(Header, I, In-Next),
	N1 is N - 1,
	I1 is I + 1,
	!,
	full_header(N1, I1, Header, Next-Out).

add_number_to_header([], _I, In-In).
add_number_to_header([F | R], I, In-Out) :-
	format_to_atom('~w~d', [F, I], F1),
	In = [F1 | Next],
	!,
	add_number_to_header(R, I, Next-Out).	

reorganise_cvs_list([], _NCopiesPerLine, _Filler, []).
reorganise_cvs_list(InList, NCopiesPerLine, Filler, OutList) :-
	length(InList, N),
	N >= NCopiesPerLine,
	length(InitialSegment, NCopiesPerLine),
	append(InitialSegment, InRest, InList),
	append_list(InitialSegment, NewRecord),
	OutList = [NewRecord | OutRest],
	!,
	reorganise_cvs_list(InRest, NCopiesPerLine, Filler, OutRest).
reorganise_cvs_list(InList, NCopiesPerLine, Filler, [LastRecord]) :-
	length(InList, N),
	N < NCopiesPerLine,
	NMissing is NCopiesPerLine - N,
	append_n_copies(NMissing, [Filler], Missing),
	append(InList, Missing, CompletedSegment),
	append_list(CompletedSegment, LastRecord),
	!.
reorganise_cvs_list(InList, NCopiesPerLine, Filler, OutList) :-
	format('~N*** Error: bad call: ~w~n', [reorganise_cvs_list(InList, NCopiesPerLine, Filler, OutList)]),
	fail.	

%======================================================================

read_batch_file(InFile, DelimiterChar, SeparatorChar, Encoding, Header, Contents) :-
	safe_absolute_file_name(InFile, AbsInFile),
	csv_file_to_list_of_lists(AbsInFile, Encoding, DelimiterChar, SeparatorChar, List),
	List = [Header | Contents],
	length(Contents, N),
	format('~N--- Read file (~d records) ~w~n', [N, AbsInFile]),
	!.

%======================================================================

parse_header([], []).
parse_header([F | R], [F1 | R1]) :-
	parse_header_element(F, F1),
	!,
	parse_header(R, R1).

parse_header_element(Elt, Elt1) :-
	atom_codes(Elt, Str),
	parse_header_element1(Str, Elt1),
	!.
parse_header_element(Elt, Elt) :-
	!.
parse_header_element(Elt, Elt1) :-
	format('~N*** Error: bad call: ~w~n', [parse_header_element(Elt, Elt1)]),
	fail.

parse_header_element1(Str, Elt) :-
	header_element(Elt, Str, []).

%----------------------------------------------------------------------

/*

HITId	HITTypeId	Title	Description	Keywords	Reward	CreationTime	MaxAssignments	RequesterAnnotation	AssignmentDurationInSeconds	AutoApprovalDelayInSeconds	Expiration	NumberOfSimilarHITs	LifetimeInSeconds	AssignmentId	WorkerId	AssignmentStatus	AcceptTime	SubmitTime	AutoApprovalTime	ApprovalTime	RejectionTime	RequesterFeedback	WorkTimeInSeconds	LifetimeApprovalRate	Last30DaysApprovalRate	Last7DaysApprovalRate	Input.Command1	Input.Command2	Input.Command3	Input.Command4	Input.Command5	Input.Command6	Input.Command7	Input.Command8	Input.Command9	Input.Command10	Input.Command11	Input.Command12	Input.Command13	Input.Command14	Input.Command15	Input.Command16	Input.Command17	Input.Command18	Input.Command19	Input.Command20	Input.Command21	Input.Command22	Input.Command23	Input.Command24	Input.Command25	Answer.Command4	Answer.Comments	Answer.Command16	Answer.Command5	Answer.Gender	Answer.Command17	Answer.Command6	Answer.Command18	Answer.Location	Answer.Command7	Answer.Command19	Answer.Command20	Answer.Command8	Answer.Command21	Answer.Command9	Answer.Command10	Answer.Command22	Answer.StartTime	Answer.Command11	Answer.Command23	Answer.Command12	Answer.Command24	Answer.Command1	Answer.Command13	Answer.Command25	Answer.Command2	Answer.Command14	Answer.Command3	Answer.EndTime	Answer.Command15	Approve	Reject

*/

header_element(input(Command)) -->
	"Input.",
	command_name(Command).

header_element(answer(Command)) -->
	"Answer.",
	command_name(Command).

command_name(Command) -->
	char_sequence(Str),
	{atom_codes(Command, Str)}.

char_sequence([F | R]) -->
	[F],
	!,
	char_sequence(R).
char_sequence([]) --> [].

%----------------------------------------------------------------------

%tag_contents(Contents, ParsedHeader, TaggedContents)

tag_contents([], _ParsedHeader, []).
tag_contents([F | R], ParsedHeader, [F1 | R1]) :-
	tag_contents_line(F, ParsedHeader, F1),
	!,
	tag_contents(R, ParsedHeader, R1).

tag_contents_line([], [], []).
tag_contents_line([F | R], [HF | HR], [HF-F | R1]) :-
	!,
	tag_contents_line(R, HR, R1).
% There may be missing fields at the end of the line. Fill them in with null values
tag_contents_line([], [HF | HR], [HF-'' | R1]) :-
	!,
	tag_contents_line([], HR, R1).

%----------------------------------------------------------------------

check_csv_file_for_duplicate_lines(InFile, Encoding, Separator) :-
	safe_absolute_file_name(InFile, AbsInFile),
	csv_file_to_list_of_lists(AbsInFile, Encoding, 0'", Separator, InList), %" To keep Emacs happy
	length(InList, NIn),
	format('~N--- Read CSV file (~d records) ~w~n', [NIn, AbsInFile]),
	list_to_ordered_multiset(InList, Multiset),
	print_duplicate_lines_in_multiset(Multiset),
	!.

print_duplicate_lines_in_multiset([]).
print_duplicate_lines_in_multiset([N-Item | R]) :-
	(   N > 1 ->
	    print_line_from_multiset(N, Item)
	;
	    otherwise ->
	    true
	),
	!,
	print_duplicate_lines_in_multiset(R).

print_line_from_multiset(N, Item) :-
	format('~N~nMultiplicity: ~d~n', [N]),
	print_lines_from_multiset1(Item).

print_lines_from_multiset1([]).
print_lines_from_multiset1([F | R]) :-
	format('~N~w~n', [F]),
	!,
	print_lines_from_multiset1(R).