:- use_module(library(filesex)).

% if library(lib) is missing, install via pack_install(lib).
%
:- use_module( library(lib) ).

% external code, lib knowns how to deal with these (will install if missing)
% :- lib(bio_db).
:- lib(os_lib).
:- lib(by_unix).
:- lib(options).
:- lib(debug_call).
:- lib(stoics_lib:at_con/3).
:- lib(stoics_lib:arg_add/4).
:- lib(stoics_lib:portray_clauses/2).

% also sets lib alias that dir
:- ensure_loaded('../../lib/bio_db_build_aliases').  % /1.

% local libs & sources
:- lib(de_semi/3).
:- lib(mtx_map/4).
:- lib(bio_db_dnt_times/3).
:- lib(link_to_bio_sub/2).
:- lib(build_dnload_loc/3).
:- lib(bio_db_source_url/3).
:- lib(url_file_local_date_mirror/3).

std_multi_maps_vgnc_defaults( Defs ) :-
                                   Defs = [ db(vgnc),
                                            debug(true),
                                            debug_fetch(true),
                                            debug_url(false),
                                            download(true),
                                            iactive(true),
                                            vgnc_genes_file('vgnc_gene_set_All.txt.gz'),
                                            maps_sub_dir(maps),
                                            org(multi)
                                          ].
/** std_multi_maps_vgnc(+Opts).

Create some maps from HGNC's "complete" data file.

Opts
  * db(Db=vgnc)
    source database
  * debug(Dbg=true)
    debugging, informational messages
  * debug_fetch(Fbg=true)
    whether to debug the fetching of the url (via url_file_local_date_mirror/3)
  * debug_url(Ubg=false)
    whether to debug the concatenation of the url (via bio_db_source_url/3)
  * download(Dn=true)
    set to false to skip downloading a fresh copy of the HGNC file(s)
  * iactive(Iact=true)
    whether the session is interactive, otherwise wget gets --no-verbose
  * maps_sub_dir(MsubD=maps)
    relative name for generated maps within downloads directory
  * org(Org=multi)
    organism, multi covers the case of relations over multiple organisms
  * vgnc_genes_file(VgncF='vgnc_gene_set_All.txt.gz')
    the file name for the URL download

==
?- std_multi_maps_vgnc.
==

@author nicos angelopoulos
@version  0.1 2023/8/16,  based on hgnc support- but modernised

*/
std_multi_maps_vgnc :-
    std_multi_maps_vgnc( [] ).

std_multi_maps_vgnc( Args ) :-
     Self = std_multi_maps_vgnc,
     options_append( Self, Args, Opts ),
     bio_db_build_aliases( Opts ),
     build_dnload_loc( Self, DnlD, Opts ),
     bio_db_source_url( SrcUrl, [vgnc_genes_file-url_file,debug_url-debug], Opts ),
     options( debug_fetch(Fbg), Opts ),
     url_file_local_date_mirror( SrcUrl, DnlD, [debug(Fbg),dnld_file(GzF)|Opts] ),
     working_directory( Old, DnlD ),
     @ gunzip( -f, -k, GzF ),
     bio_db_dnt_times( GzF, DnDt, _DnEnd ),
     options( maps_sub_dir(MapsD), Opts ),
     os_make_path( MapsD ),
     % options_propagate( map_prefix, Opts, StdOT, true ),
     MOpts = [mtx(_Mtx),sep(tab),db(vgnc),org(multi),odir(MapsD),
              datetime(DnDt),source(SrcUrl)|Opts],
     os_ext( gz, TxtF, GzF ),
     std_multi_maps_vgnc_fix_dnload( Self, TxtF ),
     mtx_map( TxtF, [vgnc_id:vgnc:de_semi('VGNC'),taxon_id,symbol], VgncSymbF, MOpts ),
     mtx_map( TxtF, [vgnc_id:vgnc:de_semi('VGNC'),taxon_id,name], VgncNameF, MOpts ),
     % debuc( Self, 'doing links...', [] ),
     Files = [VgncSymbF,VgncNameF],
     link_to_bio_sub( vgnc, Files, [org(multi),type(maps)] ),
     % file_name_extension( TxtF, gz, GzF ),
     % delete_file( TxtF ),
     working_directory( _, Old ).

% I reported that in September, but never heard from them.
std_multi_maps_vgnc_fix_dnload( Self, TxtF ) :-
     mtx( TxtF, Mtx, [match(false),sep(tab)] ),
     Mtx = [Faulty,Row|Rows],
     functor( Faulty, _, Farity ),
     functor( Row, _, Rarity ),
     ( Farity =:= Rarity ->   % this will always be false, as the caller gzip afresh...
          debuc( Self, 'File: ~p had already been corrected.', [TxtF] )
          ;
          debuc( Self, 'Correcting header of file: ~p. Arities were ~d/~d .', [TxtF,Farity,Rarity] )
     ),
     arg_add( -2, Faulty, '', Naulty ),
     mtx( TxtF, [Naulty|Rows], sep(tab) ).