:- lib(suggests(bioc("GOstats"))). :- lib(suggests(bioc("GSEABase"))). % installed with GOstats :- lib(suggests(bioc("GO.db"))). % installed with GOstats :- lib(suggests(bioc("Category"))). % installed with GOstats :- lib(stoics_lib:kv_decompose/3). exp_go_over_defaults( Defs ) :- Defs = [ go('BP'), go_over_pv_cut(0.05), org(hs), stem(go_over), to_file(false), universe(go_exp) ]. /** exp_go_over( +CsvF, -GoOver, +Opts ). Perform gene ontology over-representation analysis. For experimental data in CsvF select de-regulated genes and on
those perform over representation analysis in gene ontology.
Results in GoOver are either as a values list of a csv file. Opts * go(GoSec='BP') gene ontology section, in: =|[BP,MF,CC]|= * go_over_pv_cut(PvCut=0.05) p value filter for the results * org(Org=hs) one of bio_db_organism/2 first argument values (hs or mouse for now) * stem(Stem=false) stem for output csv file. when false use basename of CsvF * to_file(ToF=false) when GoOver is unbound, this controls whether the output goes to a file or a values list * universe(Univ=go_exp) Univ in : =|[genome,go_exp,experiment]|= Options are also passed to exp_diffex/4. == ?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ), exp_go_over( CsvF, GoOver, [] ). CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv', GoOver = [row('GOBPID', 'Pvalue', adj.pvalue, ...), row('GO:0061387', 0.000187, ...)|...]. ?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ), exp_go_over( CsvF, GoOver, [stem(here),to_file(true)] ). CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv', GoOver = here_gontBP_p0.05_univExp.csv. ?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ), exp_go_over( CsvF, 'a_file.csv', [stem(here),to_file(true)] ). CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv', ?- lib(by_unix). ?- @wc(-l,'a_file.csv'). 183 a_file.csv ?- @wc(-l,'here_gontBP_p0.05_univExp.csv'). 183 here_gontBP_p0.05_univExp.csv true. ?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ), exp_go_over( CsvF, OverF, [to_file(true)] ). CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv', OverF = go_over_gontBP_p0.05_univExp.csv. ?- absolute_file_name( pack('bio_analytics/data/silac/bt.csv'), CsvF ), exp_go_over( CsvF, OverF, [to_file(true),stem(false)] ). CsvF = '.../swipl/pack/bio_analytics/data/silac/bt.csv', OverF = '.../swipl/pack/bio_analytics/data/silac/bt_gontBP_p0.05_univExp.csv'. == @author nicos angelopoulos @version 0.1 2019/5/2 @see go_over_universe/5 */ exp_go_over( CsvF, GoOver, Args ) :- Self = exp_go_over, options_append( Self, Args, Opts ), debug_call( exp_go_over, options, Self/Opts ), exp_diffex( CsvF, DEPrs, NDEPrs, Opts ), options( org(OrgPrv), Opts ), bio_db_organism( OrgPrv, Org ), kv_decompose( DEPrs, DEGenes, _ ), debug_call( exp_go_over, length, de_pairs/DEPrs ), sort( DEGenes, DEGenesSet ), debug_call( exp_go_over, length, de_genes_set/DEGenesSet ), org_symb_go_over_gene_ids( Org, DEGenesSet, Gids ), debug_call( exp_go_over, length, gids/Gids ), go_over_frame( Org, goFrameData, GofOrg ), goFrame <- 'GOFrame'(goFrameData, organism= +GofOrg), goAllFrame <- 'GOAllFrame'(goFrame), gsc <- 'GeneSetCollection'(goAllFrame, setType = 'GOCollection'() ), genes <- Gids, options( universe(UnivOpt), Opts ), go_over_universe( UnivOpt, Org, Gids, NDEPrs, Univ ), debug_call( exp_go_over, length, universe/Univ ), options( go(GoAspect), Opts ), universe <- Univ, universe <- 'as.character'(universe), genes <- 'as.character'(genes), options( go_over_pv_cut(PvCut), Opts ), ( debugging(Self) -> <- print(paste("length of universe: ", length(universe))) ; true ), gGparams <- 'GSEAGOHyperGParams'( name="bio_analytics_gont", geneSetCollection=gsc, geneIds = genes, universeGeneIds = universe, ontology = + GoAspect, pvalueCutoff = PvCut, conditional = 'FALSE', testDirection = "over" ), over <- hyperGTest(gGparams), dfOver <- 'data.frame'( summary(over) ), dfOver$'adj.pvalue' <- 'p.adjust'( dfOver$'Pvalue', method=+'BH' ), dfOver <- dfOver[*,c(1,2,8,3,4,5,6,7)], Use = use(Self,GoAspect,UnivOpt,PvCut), exp_go_over_return( GoOver, dfOver, CsvF, Use, Opts ). exp_go_over_return( GoOver, DfOveR, _CsvF, _Use, Opts ) :- var( GoOver ), options( to_file(false), Opts ), !, Rlist <- 'as.list'(DfOveR), findall( List, (member(Head=Tail,Rlist),List=[Head|Tail]), Lists), mtx_lists( GoOver, Lists ). exp_go_over_return( GoOver, DfOveR, CsvF, Use, Opts ) :- Use = use(Self,GoAspect,UnivOpt,PvCut), SubSep = '', options( stem(Stem), Opts ), atomic_list_concat( [gont,GoAspect], SubSep, GontAspTkn ), capit( UnivOpt, ShUniv ), atomic_list_concat( [univ,ShUniv], SubSep, UnivTkn ), (Stem == false -> TempF = CsvF; os_ext(csv,Stem,TempF)), atom_concat( p, PvCut, PvTkn ), Postfixes = [GontAspTkn,PvTkn,UnivTkn], (var(GoOver) -> os_postfix(Postfixes, TempF, GoOver) ; true), <- 'write.csv'(DfOveR, file=+GoOver, 'row.names'='FALSE'), % <- print( warnings() ), debug( Self, 'Wrote: ~p', GoOver ). %% go_over_universe( +Token, +Org, +DEGenes, +NDEPrs, -Universe ) % % Universe is the list of gene identifiers to be used as universe/background for GOstats. % % In human (=|Org=hs|=), this is a list of Entrez ids, and in =|Org=mouse|=, a list of Mgim identifiers.
% DEGenes is a list of deferentially expressed gene identifiers, and NDEPrs is a list of non-differential
% expressed Symbol-Pvalue pairs. % go_over_universe( experiment, Org, DeGids, NDEPrs, Univ ) :- go_over_universe_exp( Org, DeGids, NDEPrs, Univ ). go_over_universe( genome, Org, _DEGids, _NDEPrs, Univ ) :- go_over_universe_genome( Org, Univ ). go_over_universe( go_exp, Org, DEGids, NDEPrs, Univ ) :- go_over_universe_go_exp( Org, DEGids, NDEPrs, Univ ). go_over_universe_genome( hs, Univ ) :- findall( Entz, map_hgnc_symb_entz(_Symb,Entz), Entzs ), sort( Entzs, Univ ). go_over_universe_genome( mouse, Univ ) :- findall( Mgim, map_mgim_mouse_mgim_symb(Mgim,_Symb), Mgims ), sort( Mgims, Univ ). go_over_universe_exp( hs, DeGids, NDEPrs, Univ ) :- findall( Entz, (member(Symb-_,NDEPrs),map_hgnc_symb_entz(Symb,Entz)), NDEEntzs ), % findall( Entz1, (member(Symb,DEGenes),map_hgnc_symb_entz(Symb,Entz1)), DEEntzs ), % append( DEEntzs, NDEEntzs, Entzs ), append( DeGids, NDEEntzs, Entzs ), sort( Entzs, Univ ). go_over_universe_exp( mouse, DeGids, NDEPrs, Univ ) :- findall( Mgim, (member(Symb-_,NDEPrs),map_mgim_mouse_mgim_symb(Mgim,Symb)), NDEMgims ), append( DeGids, NDEMgims, Mgims ), sort( Mgims, Univ ). go_over_universe_go_exp( hs, DEGids, NDEPrs, Univ ) :- findall( Entz, (member(Symb-_,NDEPrs),once(map_gont_gont_symb(_,_,Symb)),map_hgnc_symb_entz(Symb,Entz)), NDEMgims ), findall( Entz, (member(Entz,DEGids),map_hgnc_symb_entz(Symb,Entz),once(map_gont_gont_symb(_,_,Symb))), DEMgims ), append( DEMgims, NDEMgims, Mgims ), sort( Mgims, Univ ). go_over_universe_go_exp( mouse, DEGids, NDEPrs, Univ ) :- findall( Mgim, (member(Symb-_,NDEPrs),once(map_gont_mouse_gont_symb(_,_,Symb)),map_mgim_mouse_mgim_symb(Mgim,Symb)), NDEMgims ), findall( Mgim, (member(Mgim,DEGids),map_mgim_mouse_mgim_symb(Mgim,Symb),once(map_gont_mouse_gont_symb(_,_,Symb))), DEMgims ), append( DEMgims, NDEMgims, Mgims ), sort( Mgims, Univ ). org_symb_go_over_gene_ids( hs, Set, Gids ) :- findall( Entz, (member(Symb,Set),map_hgnc_symb_entz(Symb,Entz)), Entzs ), sort( Entzs, Gids ). org_symb_go_over_gene_ids( mouse, Set, Gids ) :- findall( Mgim, (member(Symb,Set),map_mgim_mouse_mgim_symb(Mgim,Symb)), Mgims ), sort( Mgims, Gids ). go_over_frame( mouse, GoFra, GofOrg ) :- !, findall( row(Gid,E,M), ( map_gont_mouse_mgim_gont(M,E,G), go_id(Gid,G) ), Rows ), go_mtx_df( [row(go_id,evidence,gene_id)|Rows], GoFra, [] ), GofOrg = "Mus musculus". go_over_frame( hs, GoFra, GofOrg ) :- !, findall( row(Gid,E,Entz), ( map_gont_gont_symb(G,E,S), go_id(Gid,G), map_hgnc_symb_entz(S,Entz) ), Rows ), go_mtx_df( [row(go_id,'Evidence',gene_id)|Rows], GoFra, [] ), GofOrg = "Homo sapiens". capit( Atom, Capit ) :- atom_codes( Atom, [A,B,C|_] ), atom_codes( Aat, [A] ), atom_codes( BCat, [B,C] ), upcase_atom( Aat, CapA ), downcase_atom( BCat, DwnBC ), atom_concat( CapA, DwnBC, Capit ). go_mtx_df_defaults([check_names(false)]). go_mtx_df( Csv, Df, Args ) :- options_append( go_mtx_df, Args, Opts ), mtx_lists( Csv, Lists ), findall( Head=Tail, ( member(List,Lists), List=[Head|Tail] ), Pairs ), Df <- Pairs, ( options(check_names(true),Opts) -> Check = 'T'; Check = 'F' ), Df <- 'data.frame'(Df,'check.names'=Check).