1:- module( pubmed, [ pubmed_cited_by/2, pubmed_cited_by/3,
    2                     pubmed_cites/2, pubmed_cites/3,
    3                     pubmed_summary_info/3,
    4                     pubmed_search/2, pubmed_search/3,
    5                     pubmed_summary_display/1, pubmed_summary_display/2, pubmed_summary_display/3,
    6                     pubmed_version/2
    7                   ]
    8         ).

connect to publication services at pubmed

A simple library for communicating with pubmed publications. Currently allows

It requires the curl executable to be in the path. Only tested on Linux. It is being developed on SWI-Prolog 6.1.8 but should also work on Yap Prolog.

author
- Nicos Angelopoulos
version
- 0.0.4, 2013/11/2
See also
- http://stoics.org.uk/~nicos/sware/pubmed
- http://www.ncbi.nlm.nih.gov/books/NBK25500/
- examples.pl file in the packs directory
- sources at http://stoics.org.uk/~nicos/sware/pubmed/pubmed-0.0.4.tgz @tbd this public version lags significantly behind my private sources. people interested in recent developments should feel free to email me. */
   31:- ensure_loaded( library(sgml) ).   32
   33% Section: defaults, shortcuts.
   34
   35eutils( 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/' ).
   36
   37default_names( Names ) :- 
   38     Names = ['Author','Title','Source','Pages','PubDate',
   39              'Volume','Issue','ISSN','PmcRefCount',
   40              'PubType','FullJournalName'].
   41
   42pubmed_search_defaults( [verbose(false),retmax(100),tmp_keep(false)] ).
   43
   44pubmed_summary_display_defaults( [display(['Title','Author']),names(Names)] ) :-
   45     default_names( Names ).
   46     
   47% Section: interface predicates
 pubmed_version(+Version, +Date)
Get version information and date of publication. */
   53pubmed_version( 0:0:4, date(2013,11,2) ).
 pubmed_search(+STerm, -Ids)
Short form of pubmed_search( +STerm, -Ids, [] ).

*/

   60pubmed_search( STerm, Ids ) :-
   61     pubmed_search( STerm, Ids, [] ).
 pubmed_search(+STerm, -Ids, Options)
Search in pubmed for terms in the search term STerm. In this, conjunction is marked by , (comma) and disjunction by ; (semi-column). '-' pair terms are considered as Key-Value and interpreted as Value[Key] in the query. List are thought to be flat conjoint search terms with no pair values in them which are interpreted by pubmed also as OR operations. (See example below.) Known keys are : journal, pdat. au, All Fields The predicate constructs a query that is posted via the http API provided by NCBI (http://www.ncbi.nlm.nih.gov/books/NBK25500/). Options can be a single term or list of terms from :
?-
    St = (journal=science,[breast,cancer],pdat=2008),
    pubmed_search( St, Ids, [verbose(true),qtranslation(QTrans)] ),
    length( Ids, Len ), write( number_of:Len ), nl.

http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmax=100&term=science\[journal\]+AND+breast+cancer+AND+2008\[pdat\]
process_create(path(curl),[-o,/tmp/pl_13858_1,http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmax=100&term=science\[journal\]+AND+breast+cancer+AND+2008\[pdat\]],[])
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  3008    0  3008    0     0   3585      0 --:--:-- --:--:-- --:--:--  4641
tmp_file(/tmp/pl_13858_1)
number_of:6
St = (journal=science, [breast, cancer], pdat=2008),
Ids = ['19008416', '18927361', '18787170', '18487186', '18239126', '18239125'],
QTrans = ['("Science"[Journal] OR "Science (80- )"[Journal] OR "J Zhejiang Univ Sci"[Journal]) AND ("breast neoplasms"[MeSH Terms] OR ("breast"[All Fields] AND "neoplasms"[All Fields]) OR "breast neoplasms"[All Fields] OR ("breast"[All Fields] AND "cancer"[All Fields]) OR "breast cancer"[All Fields]) AND 2008[pdat]'],
Len = 6.


?-
    date(Date), pubmed_search( prolog, Ids ), length( Ids, Len ), write( number_of:Len ), nl.

number_of:100
Date = date(2012, 7, 10),
Ids = ['22586414', '22462194', '22215819', '21980276', '21499053', '21353661', '20123506', '20123505', '19408879'|...],
Len = 100.

?-
    date(Date), pubmed_search( prolog, Ids, retmax(200) ),
    length( Ids, Len ), write( number_of:Len ), nl.

number_of:120
Date = date(2012, 7, 10),
Ids = ['22586414', '22462194', '22215819', '21980276', '21499053', '21353661', '20123506', '20123505', '19408879'|...],
Len = 120.

*/

  130pubmed_search( Sterm, Ids, OptsIn ) :-
  131     to_list( OptsIn, Opts ),
  132     pubmed_search_defaults( Defs ),
  133     append( Opts, Defs, All ),
  134     eutils( Eutils ),
  135     ( ground(Sterm) -> true; type_error(ground,Sterm) ),
  136     search_term_to_query( Sterm, Query ),
  137     memberchk( retmax(Ret), All ), 
  138     atomic_list_concat( [Eutils,'esearch.fcgi?db=pubmed&retmax=',Ret,'&term=',Query], Url ),
  139     memberchk_optional( tmp_file(Tmp), All ),
  140     memberchk( verbose(Verb), All ),
  141     true_writes( Verb, Url ),
  142     get_url_in_tmp( Url, Verb, Tmp ),
  143     true_writes( Verb, tmp_file(Tmp) ),
  144     load_xml_file( Tmp, Xml ),
  145     ( (memberchk(qtranslation(QTrans),All),
  146        QT = 'QueryTranslation',
  147        search_element_in_list(Xml,QT,[],element(_,_,QTrans))) -> true; true
  148     ),
  149     all_subs_in_xml_single( Xml, 'IdList', 'Id', NastyIds ),
  150     flatten( NastyIds, Ids ),
  151     memberchk_optional( tmp_keep(Keep), All),
  152     true_atom_keeps_file( Keep, Tmp ).
 pubmed_summary_display(+Ids)
Short for pubmed_summary_display( Ids, _Summary, [] ).

*/

  159pubmed_summary_display( Ids ) :-
  160     pubmed_summary_display( Ids, _Summary, [] ).
 pubmed_summary_display(+Ids, -Summary)
Short for pubmed_summary_display( Ids, Summary, [] ). */
  166pubmed_summary_display( Ids, Summary ) :-
  167     pubmed_summary_display( Ids, Summary, [] ).
 pubmed_summary_display(+Ids, -Summary, +Opts)
A wrapper around pubmed_summary_info/3. It call this predicate with same arguments before displaying the Summary information. Opts can be a single term option or a list of such terms. In addition to pubmed_summary_info/3 options this wrapper also recognises the term :
?-
     date(Date), pubmed_search((programming,'Prolog'), Ids), Ids = [A,B,C|_], pubmed_summary_display( [A,B,C] ).

----
0:22215819
[Evaluating bacterial gene-finding HMM structures as probabilistic logic programs.]
[Mørk S,Holmes I]
----
1:21980276
[War of ontology worlds: mathematics, computer code, or Esperanto?]
[Rzhetsky A,Evans JA]
----
2:15360781
[Medical expert systems developed in j.MD, a Java based expert system shell: application in clinical laboratories.]
[Van Hoof V,Wormek A,Schleutermann S,Schumacher T,Lothaire O,Trendelenburg C]
----
Date = date(2012, 7, 10),
Ids = ['22215819', '21980276', '15360781', '11809317', '9783213', '9293715', '9390313', '8996790', '15048396'|...],
A = '22215819',
B = '21980276',
C = '15360781'.
?-
     pubmed_cited_by( 20195494, These ), pubmed_summary_display( These, _, [display(['Title','Author','PubDate'])] ).

*/

  209pubmed_summary_display( Ids, Summary, OptsIn ) :-
  210     to_list( OptsIn, Opts ),
  211     pubmed_summary_display_defaults( Defs ),
  212     append( Opts, Defs, All ),
  213     memberchk( display(Disp), All ),
  214     pubmed_summary_info( Ids, Summary, Opts ),
  215     pubmed_summary_display_info( Summary, Disp ).
  216
  217pubmed_summary_display_info( Summary, Disp ) :-
  218     write( '----' ), nl,
  219     nth0( N, Summary, Id-Rec ),
  220     write( N:Id ), nl,
  221     findall( _, (member(D,Disp),member(D-Val,Rec),write(Val),nl), _ ), 
  222     write( '----' ), nl, fail.
  223pubmed_summary_display_info( _Summary, _Disp ).
 pubmed_cited_by(+Id, -Ids)
Redirects to pubmed_cited_by( Id, Ids, [] ).
  230pubmed_cited_by( Id, Ids) :-
  231     pubmed_cited_by( Id, Ids, [] ).
 pubmed_cited_by(+Id, -Ids, +Options)
Ids is the list of pubmed ids that cite Id. Options is a term option or list of terms from the following;
?-
     date(D), pubmed_cited_by( 12075665, By ).

D = date(2012, 7, 9),
By = ['19497389'].

*/

  248pubmed_cited_by( Id, Ids, OptsIn ) :-
  249     to_list( OptsIn, Opts ),
  250     ( memberchk(verbose(Verb),Opts) -> true; Verb = false ),
  251     eutils( Eutils ),
  252     Query = 'elink.fcgi?report=xml&mode=text&tool=curl&db=PubMed&cmd=neighbor&linkname=pubmed_pubmed_citedin&id=',
  253     atomic_list_concat( [Eutils,Query,Id], Url ),
  254     get_url_in_tmp( Url, Verb, Tmp ),
  255     load_xml_file( Tmp, Xml ),
  256     once( search_element_in_list( Xml, 'LinkSetDb', [], element(_,_,LXml) ) ),
  257     findall( CId, search_element_in_list(LXml,'Id',[],element(_,_,[CId])),Ids ),
  258     delete_file( Tmp ).
 pubmed_cites(+Id, -Ids)
Redirects to pubmed_cites( Id, Ids, [] ).
  264pubmed_cites( Id, Ids ) :-
  265     pubmed_cites( Id, Ids, [] ).
 pubmed_cites(+Id, -Ids)
 pubmed_cites(+Id, -Ids, +Options)
Ids is the list of pubmed Ids that are cited by Id. Options is a term option or list of terms from the following;
?-
     date(D), pubmed_cites( 20195494, Ids ), length( Ids, Len ), write( D:Len ), nl, fail.

date(2012,8,15):35

*/

  284pubmed_cites( Id, Ids, OptsIn ) :-
  285     to_list( OptsIn, Opts ),
  286     ( memberchk(verbose(Verb),Opts) -> true; Verb = false ),
  287     eutils( Eutils ),
  288     % Query = 'elink.fcgi?report=xml&mode=text&tool=curl&db=pmc&DbFrom=pubmed&Cmd=link&linkname=pubmed_pmc_refs&id=',
  289     Query = 'elink.fcgi?report=xml&mode=text&tool=curl&db=PubMed&Cmd=neighbor&linkname=pubmed_pubmed_refs&id=',
  290     atomic_list_concat( [Eutils,Query,Id], Url ),
  291     get_url_in_tmp( Url, Verb, Tmp ),
  292     load_xml_file( Tmp, Xml ),
  293     once( search_element_in_list( Xml, 'LinkSetDb', [], element(_,_,LXml) ) ),
  294     findall( CId, search_element_in_list(LXml,'Id',[],element(_,_,[CId])),Ids ),
  295     delete_file( Tmp ).
 pubmed_summary_info(+Id, -Results, +Opts)
Results are the summary information for pubmed id Id. The predicate communicates with pubmed via the http interface with curl. Results are deposited in xml files which are subsequently parsed to produce the termed Results. Id can also be a list of Ids in which case the result is a list of Id-Results pairs.

Options is a single term, or list of the following terms:

?-
  date(Date),  Opts = names(['Author','PmcRefCount','Title']),
  pubmed_summary_info( 12075665, Results, Opts ),
  write( date:Date ), nl, member( R, Results ), write( R ), nl, fail.

date:date(2012,7,9)
Author-[Kemp GJ,Angelopoulos N,Gray PM]
Title-[Architecture of a mediator for a bioinformatics database federation.]
Source-[IEEE Trans Inf Technol Biomed]
Pages-[116-22]
PubDate-[2002 Jun]
Volume-[6]
Issue-[2]
ISSN-[1089-7771]
PmcRefCount-[1]
PubType-[Journal Article]
FullJournalName-[IEEE transactions on information technology in biomedicine : a publication of the IEEE Engineering in Medicine and Biology Society]
false.

*/

  339pubmed_summary_info( Ids, Results, Opts ) :-
  340     % fixme use _defaults
  341     ( memberchk(names(Names),Opts) -> true; default_names(Names) ),
  342     ( memberchk(tmp_file(Tmp),Opts) -> true; true ),
  343     ( is_list(Ids) -> 
  344          findall( Id-Res, (member(Id,Ids),summary_info(Tmp,Id,Names,Res,Opts)), Results )
  345          ;
  346          summary_info( Tmp, Ids, Names, Results, Opts )
  347     ).
  348
  349% Section: non-interface predicates...
  350summary_info( Tmp, Id, WhichIn, Results, Opts ) :-
  351     to_list( WhichIn, Which ),
  352     eutils( Eutils ),
  353     ( memberchk(retmax(RMax),Opts) -> true; RMax = 100 ),
  354     atomic_list_concat( ['esummary.fcgi?report=xml&mode=text&tool=wget&retmax=',RMax,'&db=PubMed&id='], Query ),
  355     atomic_list_concat( [Eutils,Query,Id], Url ),
  356     get_url_in_tmp( Url, false, Tmp ), % fixme
  357     load_xml_file( Tmp, Xml ),
  358     Elem = element(_,_,[Entry]),
  359     findall( Name-Info, ( member(Name,Which),
  360                           findall( Entry,
  361                             search_element_in_list(Xml, 'Item', ['Name'=Name], Elem ),
  362                                   Info )
  363                         ), Results ),
  364     ( memberchk(tmp_keep(true),Opts) -> true; delete_file(Tmp) ).
  365
  366% Section: auxiliaries
 search_term_to_query(Sterm, Query)
Convert a pubmed search term to an atomic query that can be passed through http.
  372search_term_to_query( (A,B), Query ) :-
  373     !,
  374     search_term_to_query( A, Aq ),
  375     search_term_to_query( B, Bq ),
  376     atomic_list_concat( [Aq,'+AND+',Bq], Query ).
  377search_term_to_query( (A;B), Query ) :-
  378     !,
  379     search_term_to_query( A, Aq ),
  380     search_term_to_query( B, Bq ),
  381     atomic_list_concat( [Aq,'OR',Bq], '+', Query ).
  382search_term_to_query( (A=B), Query ) :-
  383     !,
  384     atomic_list_concat( [B,'\\[',A,'\\]'], Query ).
  385search_term_to_query( C, Query ) :-
  386     to_list( C, Clist ),
  387     atomic_list_concat( Clist, '+', Query ).
 memberchk_optional(Elem, List)
Unifies Elem with the first matching term in List if one exists. The predicate always succeeds exactly once.
  394memberchk_optional( Elem, List ) :-
  395     memberchk( Elem, List ),
  396     !.
  397memberchk_optional( _Elem, _List ).
  398     
  399true_atom_keeps_file( Keep, _File ) :-
  400     Keep == true,
  401     !.
  402true_atom_keeps_file( _Keep, File ) :-
  403     delete_file( File ).
  404
  405to_list( Either, List ) :-
  406	( (var(Either);(Either\=[_H|_T],Either\==[]) ) ->
  407		List = [Either]
  408		;
  409		List = Either
  410	).
  411
  412true_writes( true, Report ) :-
  413     !,
  414     write( Report ), nl.
  415true_writes( _Opts, _Report ).
 get_url_in_tmp(+URL, +Verbose, -In)
From SWI-Prolog's doc files (July 2012). tmp_file_stream/3.
  421get_url_in_tmp(URL, Verb, File) :-
  422        ( var(File) ->
  423               tmp_file_stream(text, File, Stream),
  424               close(Stream)
  425               ;
  426               true
  427        ),
  428        ( Verb==true -> Args = ['-o',File,URL] ; Args = ['-s','-o',File,URL] ),
  429        true_writes( Verb, process_create(path(curl),Args,[]) ),
  430        process_create( path(curl), Args, [] ),
  431        exists_file( File ).
  432
  433/*
  434http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=science[journal]+AND+breast+cancer+AND+2008[pdat]
  435        
  436        */
  437
  438all_subs_in_xml_single( Xml, Single, SubSel, Subs ) :-
  439     once( search_element_in_list( Xml, Single, [], element(_,_,Nest) ) ),
  440     findall( Sub, 
  441               search_element_in_list(Nest,SubSel,[],element(_,_,Sub)), 
  442              Subs ).
 search_element_in_list(+Term, +Name, +Attrs, -Elem)
Find an element in an sgml file that have specific Attrs. Got from hostip.pl .
  449search_element_in_list([Content|MoreContent], Name, ListAttributes, Element) :-
  450	(   search_element(Content, Name, ListAttributes, Element)
  451	;   search_element_in_list(MoreContent, Name, ListAttributes, Element)
  452	).
  453
  454search_element(HTML, Name, ListAttributes, HTML) :-
  455	arg(1, HTML, Name),
  456	arg(2, HTML, HTML_Attributi),
  457	forall(member(Attribute, ListAttributes),
  458	       memberchk(Attribute, HTML_Attributi)).
  459search_element(HTML, Name, ListAttributes, Element) :-
  460	arg(3, HTML, Contents),
  461	search_element_in_list(Contents, Name, ListAttributes, Element)