#!/usr/bin/env swipl 

:- initialization main.

:- use_module(library(main)).
:- use_module(library(optparse)).
%:- use_module(library(semweb/rdf_db)).
:- use_module(library(option)).

:- use_module(library(semweb/rdf_library)).
:- use_module(library(semweb/rdf_http_plugin)).
:- use_module(library(semweb/rdf_cache)).
:- use_module(library(semweb/rdf_zlib_plugin)).
:- use_module(library(semweb/rdf11)).
:- use_module(library(semweb/rdfs)).
:- use_module(library(semweb/rdf_turtle)).
:- use_module(library(semweb/rdf_ntriples)).
:- use_module(library(sparqlprog)).
:- use_module(library(sparqlprog/labelutils)).
:- use_module(library(sparqlprog/ontologies/owl), []).
:- use_module(library(sparqlprog/owl_util)).
:- use_module(library(sparqlprog/owl_search_viz)).
:- use_module(library(sparqlprog/search_util)).

% TODO: make configurable
:- rdf_set_cache_options([ global_directory('RDF-Cache'),
                           create_global_directory(true)
                         ]).



main(Argv) :-
        Spec =
        [
         [opt(output), type(atom),
          longflags(['output']),
          shortflags([o]),
          help('Outfile')
         ],
         [opt(format), type(atom),
          longflags(['format']),
          shortflags([f]),
          help('Output format: csv')
         ],
         [opt(input), type(atom),
          longflags(['input']),
          shortflags([i]),
          help('Input RDF file (use in combo with -x)')
         ],
         [opt(goal), type(term),
          longflags([goal]),
          shortflags([g]),
          help('Prolog goal to call')
         ],
         [opt(consult), type(atom),
          longflags([consult]),
          shortflags([c]),  % Stopped working in swipl8? interferes with command line flag?
          help('Prolog program to load/consult')
         ],
         [opt(use), type(atom),
          longflags([use]),
          shortflags([u]),
          help('Prolog module to use')
         ],
         [opt(use_no_import), type(atom),
          longflags([use_no_import]),
          shortflags(['U']),
          help('Prolog module to use, do not import all')
         ],
         [opt(debug), type(term),
          longflags([debug]),
          shortflags([d]),
          help('term passed to debug/1')
         ],
         [opt(attach), type(atom),
          longflags([attach]),
          shortflags(['A']),
          help('rdf_attach_library - path to void.ttl')
         ],
         [opt(assign), type(atom),
          longflags([assign]),
          shortflags([a]),
          help('assign a setting')
         ],
         [opt(service), type(atom),
          longflags([service]),
          shortflags([s]),
          help('name of remote service to query')
         ],
         [opt(limit),
          type(integer),
          longflags([limit]),
          help('SELECT limit. Note you may want to combine this with --no-autopage')
         ],         
         [opt(header),
          type(atom),
          longflags([header]),
          help('comma-separated list of column headers, output as first row')
         ],         
         [opt(autopage),
          type(boolean),
          default(true),
          longflags([autopage]),
          help('If true, then iterate multiple SPARQL queries with increasing offset')
         ],         
         [opt(inject_labels),
          type(boolean),
          default(false),
          longflags([label]),
          shortflags([l]),
          help('Inject query for rdfs labels into query')
         ],
         [opt(label_predicate),
          type(term),
          default(rdfpred(rdfs:label)),
          longflags([label_predicate]),
          shortflags(['L']),
          help('predicate to use when looking up labels')
         ],
         [opt(show),
          type(boolean),
          default(false),
          longflags([show]),
          shortflags(['S']),
          help('Show SPARQL query')
         ],
         [opt(distinct),
          type(boolean),
          default(false),
          longflags([distinct]),
          shortflags(['Q']),
          help('UniQue results - forces SELECT DISTINCT')
         ],
         [opt(interactive),
          type(boolean),
          default(false),
          longflags([interactive]),
          shortflags(['I']),
          help('sparqlprog REPL')
         ],
         [opt(prolog),
          type(boolean),
          default(false),
          longflags([prolog]),
          shortflags(['P']),
          help('Interactive prolog')
         ],
         [opt(compile),
          type(boolean),
          default(false),
          longflags([compile]),
          shortflags(['C']),
          help('Compile Prolog to SPARQL (no execution)')
         ],
         [opt(construct),
          type(boolean),
          default(false),
          longflags([construct]),
          help('Compile Prolog to SPARQL CONSTRUCT (no execution)')
         ],
         [opt(verbose),
          type(boolean),
          default(false),
          longflags([verbose]),
          shortflags([v]),
          help('Same as --debug sparqlprog')
         ],
         [opt(stacktrace),
          type(boolean),
          default(false),
          longflags([stacktrace]),
          shortflags(['T']),
          help('Shows stack trace on error')
         ],
         [opt(prefix_mapping), type(atom),
          longflags(['prefix_mapping']),
          shortflags([m]),
          help('Declare a prefix mapping. The value should be of the form "PREFIX=URIBase"')
         ],
         [opt(execute),
          type(boolean),
          default(false),
          longflags([execute]),
          shortflags([e]),
          help('Executes query directly in prolog')
         ],
         [opt(index), type(term),
          longflags([index]),
          help('materialize index')
         ],
         [opt(query), type(term),
          longflags([query]),
          shortflags([q]),
          help('Prolog query')
         ]
        ],
        opt_parse(Spec, Argv, Opts, QueryAtoms, [duplicated_flags(keepall)]),
        handle_opts(Opts),
        declare_shacl_prefixes,
        opt_if_call(interactive,sparqlprog_shell(Opts),Opts),
        opt_if_call(prolog,prolog_shell(Opts),Opts),
        debug(pl2sparql,'Opts=~q',[Opts]),
        catch(run(QueryAtoms, Opts),
              E,
              (   format(user_error,'~q~n',[E]),
                  halt(1))),
        halt.


add_prefix(X) :-
        concat_atom([Pre,Base],'=',X),
        debug(sparqlprog,'Registering: ~w = ~w',[Pre,Base]),
        rdf_register_prefix(Pre,Base).

handle_opts(Opts) :-
        opt_if_call(verbose,debug(sparqlprog),Opts),
        opt_if_call(distinct,force_distinct,Opts),
        opt_if_call(stacktrace,use_module(library(sparqlprog/stacktrace)),Opts),
        opt_if_call(execute,ensure_loaded(library(sparqlprog/emulate_builtins)),Opts),
        opt_forall(attach(X),rdf_attach_library(X),Opts),
        opt_forall(debug(X),debug(X),Opts),        
        opt_forall(prefix_mapping(X),add_prefix(X),Opts),
        opt_forall(use(X),use_module(library(X)),Opts),
        opt_forall(use_no_import(X),use_module(library(X),[]),Opts),
        opt_forall(consult(X),consult(X),Opts),
        opt_forall(input(X),rdf_load_wrap(X),Opts),
        opt_forall(index(_),ensure_loaded(library(index_util)),Opts),
        opt_forall(index(X),materialize_index(X),Opts),
        forall((member(goal(X),Opts),\+var(X)), X),
        opt_forall(goal(X),X,Opts),
        opt_forall(assign(X),assign_setting(X),Opts).

force_distinct :-
        setting(sparqlprog:select_options, Opts),
        select(distinct(_),Opts,Opts2),
        set_setting(sparqlprog:select_options, [distinct(true)|Opts2]).


assign_setting(A) :-
        concat_atom([P,V1],'=',A),
        concat_atom([M,Pred],':',P),
        (   setting_property(M:Pred,type(float))
        ->  atom_number(V1,V)
        ;   V=V1),
        set_setting(M:Pred,V).


rdf_load_wrap(X) :-
        catch(rdf_load(X),
              _E,
              rdf_load_library(X)).



a2t(A,T) :-
        atom_chars(A,Chars),
        forall(member(C,Chars),
               C='.'),
        !,
        member(_,Chars),
        T='_'.
        
a2t(A,T) :-
        atom_chars(A,[C|_]),
        (   (   C = '_'
            ;   C @>= 'A', C @=< 'Z')
        ->  T='_'
        ;   sformat(T,'~q',[A])).

atom_uri(A,U) :-
        atom(A),
        concat_atom([Pre,Frag],':',A),
        rdf_current_prefix(Pre,_),
        rdf_global_id(Pre:Frag,U),
        !.
atom_uri(A,A).

run([describe_all|Xs], Opts) :-
        !,
        maplist(atom_uri,Xs,Xs2),
        Goal=rdf(X,P,O,G),
        Q=(member(X,Xs2),Goal),
        exec_query(Goal, Q, ['P'=P,'O'=O,'G'=G], Opts).

run([describe,X], Opts) :-
        !,
        atom_uri(X,X2),
        Goal=rdf(X2,P,O),
        exec_query(Goal, Goal, ['P'=P,'O'=O], Opts).
run([in,X], Opts) :-
        !,
        atom_uri(X,X2),
        Goal=rdf(S,P,X2),
        exec_query(Goal, Goal, ['S'=S,'P'=P], Opts).

run([inout,X], Opts) :-
        !,
        atom_uri(X,X2),
        Goal=(
              rdf(X2,P,O),
              bind(X2, Subject),
              bind(O, Object)
             ;
              rdf(O,P,X2),
              bind(O, Subject),
              bind(X2, Object)
             ),
        exec_query((Subject,P,Object), Goal, ['Subject'=Subject,'P'=P,'Object'=Object], Opts).

run([owl_node_info|Xs], Opts) :-
        !,
        maplist(atom_uri,Xs,Xs2),
        Goal=owl_node_info(X,P,O,G),
        Q=(member(X,Xs2),Goal),
        exec_query(Goal, Q, ['P'=P,'O'=O,'G'=G], Opts).
        %forall((member(X,Xs2),Q=owl_node_info(X,P,O,G)),
        %       exec_query(Q, Q, ['P'=P,'O'=O,'G'=G], Opts)).

run([q,SearchTerm,PredTerm,PostTerm,Rels,DispTerm|_Args], Opts) :-
        !,
        owl_search_and_display(SearchTerm, PredTerm, PostTerm, Rels, DispTerm, _, Opts).
run([q,SearchTerm,PostTerm,Rels,DispTerm|_Args], Opts) :-
        !,
        owl_search_and_display(SearchTerm, _, PostTerm, Rels, DispTerm, _, Opts).
run([q,SearchTerm], Opts) :-
        !,
        owl_search_and_display([SearchTerm], Opts).
run([q,SearchTerm,DispTerm], Opts) :-
        !,
        owl_search_and_display(SearchTerm, l, '.', [], DispTerm, _, Opts).
run([q,SearchTerm,Rels,DispTerm], Opts) :-
        !,
        owl_search_and_display(SearchTerm, l, '.', Rels, DispTerm, _, Opts).

run([report, Name|_], _Opts) :-
        ensure_loaded(library(sparqlprog/dataframe)),
        dataframe_to_csv(Name,[]).

run([labelify, File|_Keys], Opts) :-
        csv_read_file(File, Rows, [functor(row)]),
        forall(member(Row,Rows),
               (   row_labelify(Row,Row2),
                   write_result(Row2,Opts))).


run([save, F|_], _Opts) :-
        ensure_loaded(library(semweb/rdf_turtle)),
        rdf_turtle:rdf_save_turtle(F, []).

run(['/',Pred|Args], Opts) :-
        !,
        % shorthand syntax for terms: / Pred Args
        % each Arg is either: a variable (upper case or '_) or an atom or a list of variables ('..')
        findall(T,
               (   member(A,Args),
                   a2t(A,T)),
               Terms),
        concat_atom(Terms, ', ', ArgsAtom),
        sformat(Q,'~w(~w)',[Pred,ArgsAtom]),
        debug(sparqlprog, 'Q=~q', [Q]),
        run([Q],Opts).
run([QueryAtom,SelectAtom], Opts) :-
        !,
        % USER SPECIFIES SELECT
        % ensure the same variables are used:
        % create a term object to be parsed of form (S)-(Q)
        concat_atom(['(',SelectAtom,')-(',QueryAtom,')'],SQA),
        atom_to_term(SQA,Select-Query,Bindings),
        exec_query(Select, Query, Bindings, Opts).

run([QueryAtom],Opts) :-
        % USER USES PRED/ARITY SHORTCUT
        % todo bindings
        atom_to_term(QueryAtom,Pred/Arity,Bindings),
        !,
        functor(Query,Pred,Arity),
        create_default_select(Query,Select),
        exec_query(Select, Query, Bindings, Opts).
run([QueryAtom],Opts) :-
        % USER USES PRED/ARITY SHORTCUT
        % todo bindings
        atom_to_term(QueryAtom,Pred,_),
        atom(Pred),
        !,
        find_arity(Pred,Arity),
        functor(Query,Pred,Arity),
        create_default_select(Query,Select),
        bindings_from_srule(Query,Bindings),
        exec_query(Select, Query, Bindings, Opts).
run([QueryAtom],Opts) :-
        % DEFAULT: MAKE SELECT FROM QUERY
        !,
        atom_to_term(QueryAtom,Query,Bindings),
        create_default_select(Query,Select),
        exec_query(Select, Query, Bindings, Opts).

bindings_from_srule(Query,Bindings) :-
        Query =.. [Pred|Args],
        length(Args,Arity),
        srule(Pred,Cols,_,_),
        length(Cols,Arity),
        !,
        make_bindings(Args,Cols,Bindings).
bindings_from_srule(_,[]).

make_bindings([],[],[]).
make_bindings([A|Args],[C|Cols],[C=A|Bindings]) :-
        make_bindings(Args, Cols, Bindings).


opt_show_query(Select, Query, Bindings, Opts) :-
        option(construct(true),Opts),
        !,
        tr_opts(Opts, _Opts2),
        format('# SELECT=~q QUERY=~q BINDINGS=~q~n',[Select,Query,Bindings]),        
        create_sparql_construct(Select, Query, SPARQL, []),
        format('~w~n',[SPARQL]),
        halt.
opt_show_query(Select, Query, Bindings, Opts) :-
        option(compile(true),Opts),
        !,
        tr_opts(Opts, Opts2),
        format('# SELECT=~q QUERY=~q BINDINGS=~q~n',[Select,Query,Bindings]),        
        create_sparql_select(Select, Query, SPARQL, [bindings(Bindings)|Opts2]),
        format('~w~n',[SPARQL]),
        halt.
opt_show_query(Select, Query, Bindings, Opts) :-
        option(show(true),Opts),
        !,
        tr_opts(Opts, Opts2),
        format('# SELECT=~q QUERY=~q BINDINGS=~q~n',[Select,Query,Bindings]),        
        create_sparql_select(Select, Query, SPARQL, [bindings(Bindings)|Opts2]),
        format('# SPARQL: ~w~n',[SPARQL]).
opt_show_query(_, _, _, _).


exec_query(Select, Query, Bindings, Opts) :-
        debug(sparqlprog,'SELECT=~q QUERY=~q BINDINGS=~q',[Select,Query,Bindings]),
        exec_query_direct(Select, Query, Bindings, Opts).

exec_query_direct(Select, Query, _Bindings, Opts) :-
        option(execute(true),Opts),
        % execute in prolog
        !,
        ensure_loaded(library(sparqlprog/emulate_builtins)),
        (   option(inject_labels(true),Opts)
        ->  Opts2=[dynlabel(true)|Opts]
        ;   Opts2=Opts),
        forall(Query,
               write_result(Select,Opts2)),
        nl.

exec_query_direct(Select, Query, Bindings, Opts) :-
        option(service(Service),Opts),
        nonvar(Service),
        % execute on remote service
        !,
        (   sparql_endpoint_url(Service,_)
        ->  true
        ;   debug(sparqlprog,'~w not found - loading default endpoints...',[Service]),
            load_default_endpoints,
            (   sparql_endpoint_url(Service,_)
            ->  true
            ;   throw(no_such_endpoint(Service)))),
        opt_show_query(Select, Query, Bindings, Opts),
        (   option(inject_labels(true),Opts)
        ->  inject_label_query(Select,Query,Select2,Query2,Opts)
        ;   Select2=Select, Query2=Query),
        include(is_select_option, Opts, SelectOpts),
        forall('??'(Service,Query2,Select2, SelectOpts),
               write_result(Select2,Opts)),
        nl.


exec_query_direct(Select, Query, Bindings, Opts) :-
        % translate to SPARQL
        tr_opts(Opts, Opts2),
        create_sparql_select(Select, Query, SPARQL, [bindings(Bindings)|Opts2]),
        opt_show_query(Select, Query, Bindings, Opts),
        format(SPARQL),
        nl.

is_select_option(limit(X)) :- ground(X).
is_select_option(autopage(X)) :- ground(X).

% execute a goal for every ground instance of Template
opt_forall(Template,Goal,Opts) :-
        debug(sparqlprog,'Running ~q for all ground ~q in ~q',[Goal,Template,Opts]),
        forall((member(Template,Opts),ground(Template)),
               Goal).

opt_if_call(Opt,Goal,Opts) :-
        T =.. [Opt,Var],
        member(T,Opts),
        ground(Var),
        Var=true,
        !,
        Goal.
opt_if_call(_,_,_).

opt_if(T,Opts) :-
        member(T,Opts),
        ground(T),
        !.
opt_if(T,Opts,Opts2) :-
        select(T,Opts,Opts2),
        ground(T),
        !.


tr_opts([],[]).
tr_opts([H|T],[H|T2]) :-
        H=..[P|_],
        is_pass_thru(P),
        !,
        tr_opts(T,T2).
tr_opts([_|T],T2) :-
        tr_opts(T,T2).

% some options are intercepted by pl2sparql;
% others passed thru to sparqlprog
is_pass_thru(inject_labels).
is_pass_thru(label_predicate).
is_pass_thru(distinct).


write_result(Term,Opts) :-
        option(format(Fmt),Opts),
        Fmt == prolog,
        !,
        format('~q.~n',[Term]).
        
write_result(Term,Opts) :-
        % test if we want to add labels dynamically
        % (via queries over in-memory rdf db, rather than inject into sparql)
        opt_if(dynlabel(true),Opts,Opts2),
        !,
        row_labelify(Term,Term2),
        write_result(Term2,Opts2).
write_result(Term,Opts) :-
        member(format(Fmt),Opts),
        csv_format_separator(Fmt,Sep),
        write_header(Opts,Sep),
        term_saferow(Term,Term2),
        debug(row,'ROW: ~q',[Term2]),
        csv_write_stream(current_output, [Term2], [separator(Sep)]),
        !.
write_result(Term,_Opts) :-
        write_canonical(Term),
        writeln('.').

:- dynamic written_header/0.

write_header(Opts,Sep) :-
        \+ written_header,
        member(header(H),Opts),
        nonvar(H),
        concat_atom(Cols,',',H),
        Cols=[_|_],
        Row =.. [row|Cols],
        !,
        csv_write_stream(current_output, [Row], [separator(Sep)]),
        assert(written_header).
write_header(_,_).

% translate a prolog term into an object that is suitable to send to csv_write_stream
%  - translate literals to atoms
%  - flatten lists
%  - translate args in a compound term
term_saferow(T,'?') :- var(T),!.
term_saferow(T^^_,A) :- !, atom_string(A,T).
term_saferow(T@_, A) :- !, atom_string(A,T).
term_saferow(T@_, A) :- !, atom_string(A,T).
term_saferow(literal(type(_,A)), A) :- !.
term_saferow(literal(lang(_,A)), A) :- !.
term_saferow(literal(A), A) :- !.
term_saferow(L,A) :- is_list(L), !, maplist(term_saferow,L,L2),concat_atom(L2,',',A).
term_saferow(T,T2) :-
        T =.. [P|Args],
        Args = [_|_],
        !,
        maplist([A,A2]>>(term_saferow(A,A1),sformat(S,'~w',[A1]),atom_string(A2,S)),Args,Args2),
        %maplist([A,A2]>>(term_saferow(A,A2)),Args,Args2),
        T2 =.. [P|Args2].
term_saferow(T,T2) :-
        rdf_global_id(Pre:Id,T),
        !,
        concat_atom([Pre,Id],:,T2).
term_saferow(T,T).


csv_format_separator(csv,0',).
csv_format_separator(tsv,0'\t).
csv_format_separator(psv,0'|).

create_default_select(Q,Q) :-
        Q =.. [_|Args],
        forall(member(A,Args),
               \+ compound(A)),
        !.
create_default_select(Q,S) :-
        Q=aggregate_group(_,GroupBys,_,V),
        !,
        debug(sparqlprog,'showing results for value: ~q and groups: ~q',[V,GroupBys]),
        S =.. [row,V|GroupBys].
create_default_select(Q,S) :-
        term_variables(Q,Vars),
        S =.. [row|Vars].


find_arity(Pred,Arity) :-
        find_arity(Pred,Arity,1).
find_arity(Pred,Arity,Arity) :-
        functor(T,Pred,Arity),
        catch(clause(T,_),
            _E,
            fail),
        !.
%find_arity(Pred,_,N) :-
%        N > 99,
%        !,
%        throw(no_clause_with_pred(Pred)).
find_arity(Pred,0,N) :-
        N > 99,
        debug(sparqlprog, 'Assuming ~w has arity zero',[Pred]),
        !.
find_arity(Pred,Arity,N) :-
        N2 is N+1,
        find_arity(Pred,Arity,N2).

        
load_default_endpoints :-
        use_module(library(sparqlprog/endpoints)).


sparqlprog_shell(Opts):-
        format('% Starting pl2sparql shell~n'),
        current_input(IO),
        HFile='.sparqlprog_history',
        (   exists_file(HFile)
        ->  rl_read_history(HFile)
        ;   true),
        repeat,
        read_line_to_codes(IO,Codes),
        (   Codes=end_of_file
        ->  !
        ;   atom_codes(A,Codes),
            rl_add_history(A),
            format('Cmd: ~w~n',[A]),
            concat_atom(L,' ',A),
            catch(run(L,Opts),
                  E,
                  (   format('ERROR:~n~w~n',[E]),fail)),
            format('SUCCESS!~n'),
            rl_write_history(HFile),
            fail).

prolog_shell(_Opts):-
        format('% Starting prolog shell~n'),
        HFile='.plhistory',
        (   exists_file(HFile)
        ->  rl_read_history(HFile)
        ;   true),
        prolog,
        format('% Bye!~n'),
        rl_write_history(HFile),
        halt.




/*

pl2sparql -u sparqlprog/ontologies/owl  -q 'subClassOf(X,A),subClassOf(Y,A),X\=Y -> A,X'

pl2sparql -u sparqlprog/owl_util -u sparqlprog/ontologies/owl  -q 'subClassOf(X,R),owl_some(R,P,V)'

pl2sparql -u obo_ro/ro -u sparqlprog/owl_util -u sparqlprog/ontologies/owl  -q 'occurs_in(A,B)'  

pl2sparql -u obo_ro/ro -u sparqlprog/owl_util -u sparqlprog/ontologies/owl  -q 'equivalentClass(X,R),intersectionOf(R,L),rdf_member(X,L)

pl2sparql -U obo_ro/ro -u sparqlprog/ontologies/owl -v  -i tests/go_nucleus.ttl 'subClassOf(A,R),owl_some(R,P,B),ro:part_of_iri(P)'.  
*/
