:- set_prolog_flag(stack_limit, 80 000 000 000).

:- use_module(library(csv)).    % csv_read_file/3.
:- use_module(library(apply)).  % maplist/2.
:- use_module(library(lists)).  % member/2.

% if library(lib) is missing, install via pack_install(lib).
%
:- use_module( library(lib) ).

% external code, lib knowns how to deal with these (will install if missing)
:- lib(mtx).
:- lib(os_lib).
:- lib(by_unix).
:- lib(stoics_lib:map_succ_list/3).


% also sets lib alias to that dir
:- ensure_loaded('../../lib/bio_db_build_aliases').  % /1.

% load necessary data that has already been generated
% :- ensure_loaded( hgnc:bio_db_build_downloads('hgnc/maps/map_hgnc_symb_hgnc') ).

% local libs & sources
% :- lib(go_id/2). -> go_obo_id/2 from below
:- lib(go_obo/2).  % 
:- lib(link_to_bio_sub/2).
:- lib(bio_db_dnt_times/3).
:- lib(bio_db_add_infos/1).                  % bio_db_add_infos_to/2
:- lib(build_dnload_loc/3).
:- lib(bio_db_source_url/3).
:- lib(url_file_local_date_mirror/3).

% :- debuc(std_maps_go).

std_maps_gont_defaults( Defs ) :-
                                   Defs = [ db(gont),
                                            debug(true),
                                            debug_fetch(true),
                                            debug_url(false),
                                            goa_base(gont_goa),
                                            goa_file('goa_human.gaf.gz'),
                                            iactive(true),
                                            obo_base(gont_obo),
                                            obo_file('go.obo'),
                                            org(human)
                                          ].

% std_maps_gont( Opts ).
% 
% Set up some standard maps for gene ontology (GO, gont) data.
%
% Source:  http://geneontology.org/gene-associations/gene_association.goa_human.gz
% 
% Currrently this sets ups 
%  * map_gont_gont_symb( GOTERM, Symb ).
%  * map_gont_symb_gont( Symb, GOTERM ).
%
%   TermGz = 'go_daily-termdb-tables.tar.gz',
%  * map_gont_gont_gonm( GOTERM, GONM ).
%
% Opts
%  * db(Db=gont)
%    source database
%  * debug(Dbg=true)
%    informational, progress messages
%  * debug_fetch(Fbg=true)
%    whether to debug the fetching of the url (via url_file_local_date_mirror/3)
%  * debug_url(Ubg=false)
%    whether to debug the concatenation of the url (via bio_db_source_url/3)
%  * goa_base(GoaB=gont_goa)
%    bio_db_source_base_url/2, token or url to download from
%  * goa_file(GoaF='goa_human.gaf.gz')
%    the file name for the download (appended to Ufx@bio_db_source_base_url(gont_goa,Ufx))
%  * iactive(Iact=true)
%    whether the session is interactive, otherwise wget gets --no-verbose
%  * obo_base(OboB=gont_obo)
%    the url base for the obo download
%  * obo_file(OboF='go.obo')
%    the file name for the obo download
%
% @author  nicos angelopoulos
% @version 0.1 2015/3/26
% @version 0.2 2023/9/22, moved download location to options
% @see bio_db_source_base_url/2, bio_db_source_url/3.
%
std_maps_gont( Args ) :-
    Self = std_maps_gont,
    options_append( Self, Args, Opts ),
    bio_db_build_aliases( Opts ),
    % DnDir = '/usr/local/users/nicos/work/db/data/go',
    % load necessary data that has already been generated
    ensure_loaded( hgnc:bio_db_build_downloads('hgnc/maps/hgnc_homs_symb_hgnc') ),
    build_dnload_loc( Self, DnDir, Opts ),
    working_directory( Here, DnDir ),
    SrcRnms = [debug_url-debug,goa_base-url_base,goa_file-url_file], 
    bio_db_source_url( Url, SrcRnms, Opts ),
    options( debug_fetch(Fbg), Opts ),
    url_file_local_date_mirror( Url, DnDir, [dnld_file(GoaF),debug(Fbg)|Opts] ),
    @ gunzip( --force, -k, GoaF ),
    file_name_extension( GoaHs, gz, GoaF ),
    % mtx( 'gene_association.goa_human.tsv', Mtx ),
    % mtx( GoaHs, Mtx, csv_read(sep=0'\t) ),
    csv_read_file( GoaHs, MtxPrv, [separator(0'\t),match_arity(false),convert(false)] ),
    clense_goa_hs( MtxPrv, Mtx ),

    debuc( Self, 'loaded data...', true ),
    make_directory_path( maps ),
    gaf_gont_symb( Mtx, Self, NonSymbs, MultSymbs, NewRows ),
    % fixme: make the messages more informative
    debuc( Self, length, [non_symboled,multi_symboled]/[NonSymbs,MultSymbs] ),
    sort( NewRows, OrdRows ),
    mtx( 'maps/gont_homs_gont_symb.csv', OrdRows ),
    GSopts = [predicate_name(gont_homs_gont_symb)],
    mtx_prolog( OrdRows, 'maps/gont_homs_gont_symb.pl', GSopts ),
    working_directory( AtHere, AtHere ),
    debuc( Self, 'Currently at: ~p', AtHere ),
    % bio_db_dnt_times( 'gene_association.goa_human.gz', UrlDntSt, _DntEnd ),
    bio_db_dnt_times( GoaF, UrlDntSt, _DntEnd ),
    AddOpts = [source(Url),datetime(UrlDntSt)],
    bio_db_add_infos_to( [header(row('GO Term','Evidence','HGNC Symbol'))|AddOpts], 'maps/gont_homs_gont_symb.pl' ),
    findall( row(Symb,Evid,Gont), member(row(Gont,Evid,Symb),OrdRows), SGRows ),
    sort( SGRows, OrdSGRows ),
    SGopts = [predicate_name(gont_homs_symb_gont)],
    mtx_prolog( OrdSGRows, 'maps/gont_homs_symb_gont.pl', SGopts ),
    bio_db_add_infos_to( [header(row('HGNC Symbol','Evidence','GO Term'))|AddOpts], 'maps/gont_homs_symb_gont.pl' ),
    debuc( Self, 'Building term to name map', true ),
    OboRnms = [debug_fetch-debug,obo_base-url_base,obo_file-url_file], 
    bio_db_source_url( OboUrl, OboRnms, Opts ),
    absolute_file_name( bio_db_build_downloads(gont), DnDir ),
    url_file_local_date_mirror( OboUrl, DnDir, [dnld_file(OboF),debug(Fbg)|Opts] ),
    debuc( Self, 'Dnload done: ~w', [DnDir] ),
    go_obo( OboF, GoObo),
    go_obo_non_obs( GoObo, GoOboCurr ),
    GoOboCurr = obo(_,OboTerms),
    findall( gont_homs_gont_gonm(Gont,Gonm), member(obo_term(Gont,Gonm,_Nspc,_Obs,_Props),OboTerms), GontGonms ),
    sort( GontGonms, OrdGontGonms ),
    bio_db_dnt_times( 'go.obo', DnDt, _DnEnd ),
    portray_clauses( OrdGontGonms, file('maps/gont_homs_gont_gonm.pl') ),
    InfoOpts = [header(row('GO Term','GO Name')),source(OboUrl),datetime(DnDt)],
    bio_db_add_infos_to( InfoOpts, 'maps/gont_homs_gont_gonm.pl' ),
    
    working_directory( First, maps ),
    OutFs = ['gont_homs_gont_symb.pl','gont_homs_symb_gont.pl','gont_homs_gont_gonm.pl'],
    maplist( link_to_bio_sub(gont), OutFs ),

    working_directory( _, First ),
    delete_file( GoaHs ),
    % delete_file( TermTar ),
    working_directory( _, Here ).

% fixme: parse Go through grammar
gaf_gont_symb( [], _Self, [], [], [] ).
gaf_gont_symb( [Row|Rows], Self, NonSymbs, MultSymbs, NewRows ) :-
    arg( 5, Row, GoTermFull ),
    % go_term( GoTermFull, GoTerm ),
    go_obo_id( GoTermFull, GoTerm ),
    arg( 11, Row, Bared ),
    go_bared_symbol( Bared, Symb, Self, NonSymbs, MultSymbs, TNonSymbs, TMultSymbs ),
    arg( 7, Row, Evid ),
    ( Symb == [] -> NewRows = TNewRows; NewRows= [row(GoTerm,Evid,Symb)|TNewRows] ),
    gaf_gont_symb( Rows, Self, TNonSymbs, TMultSymbs, TNewRows ).

go_term( GoTermFull, GoTerm ) :-
    (atom_concat('GO:',GoTermAtom,GoTermFull) -> 
        atom_number(GoTermAtom,GoTerm) 
        ; 
        throw(no_go(GoTermFull))
    ).

go_bared_symbol( Bared, Symb, Self, NonSymbs, MultSymbs, TNonSymbs, TMultSymbs ) :-
    atomic_list_concat( Parts, '|', Bared ),
    map_succ_list( user:hgnc_symb, Parts, NestSymbs ),
    flatten( NestSymbs, Symbs ),
    go_bared_symbol_single( Symbs, Bared, Symb, Self, NonSymbs, MultSymbs, TNonSymbs, TMultSymbs ).

hgnc_symb( Symb, Symb ) :-
    hgnc:hgnc_homs_symb_hgnc( Symb, _ ),
    !.
hgnc_symb( Part, Symbs ) :-
    atomic_list_concat( Subs, ':', Part ),
    Subs \= [Part],
    map_succ_list( hgnc_symb, Subs, Symbs ).

% This is a generic pattern...
go_bared_symbol_single( [], Bared, Symb, _Self, NonSymbs, MultSymbs, TNonSymbs, TMultSymbs ) :-
    debuc( std_maps_gont(details), 'GO bared:~w did not lead to a symbol...', Bared ),
    Symb = [],
    NonSymbs = [Bared|TNonSymbs],
    TMultSymbs = MultSymbs.
go_bared_symbol_single( [Symb], _Bared, Symb, _Self, NonSymbs, MultSymbs, TNonSymbs, TMultSymbs ) :-
    !,
    NonSymbs = TNonSymbs,
    MultSymbs = TMultSymbs.
go_bared_symbol_single( [S1,S2|Sail], Bared, Symb, _Self, NonSymbs, MultSymbs, TNonSymbs, TMultSymbs ) :-
    debuc( std_maps_gont(details), 'GO bared:~w did led to multiple symbols...,~w', [Bared,[S1,S2|Sail]] ),
    memberchk( Symb, [S1,S2|Sail] ),
    NonSymbs = TNonSymbs,
    MultSymbs = [Bared-[S1,S2|Sail]|TMultSymbs].

clense_goa_hs( [H|T], Mtx ) :-
    functor( H, _, 1 ),
    !,
    clense_goa_hs( T, Mtx ).
clense_goa_hs( Mtx, Mtx ).