1/* Part of sparkle
    2	Copyright 2014-2015 Samer Abdallah (UCL)
    3	 
    4	This program is free software; you can redistribute it and/or
    5	modify it under the terms of the GNU Lesser General Public License
    6	as published by the Free Software Foundation; either version 2
    7	of the License, or (at your option) any later version.
    8
    9	This program is distributed in the hope that it will be useful,
   10	but WITHOUT ANY WARRANTY; without even the implied warranty of
   11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12	GNU Lesser General Public License for more details.
   13
   14	You should have received a copy of the GNU Lesser General Public
   15	License along with this library; if not, write to the Free Software
   16	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
   17*/
   18
   19:- module(sparkle,[
   20      sparql_endpoint/2
   21   ,  sparql_endpoint/3
   22   ,  current_sparql_endpoint/5
   23   ,  query_goal/3     % Endpoint, Context, Opts
   24   ,  query_phrase/3   % Endpoint, QueryPhrase, Result
   25   ,  query_sparql/3 % Endpoint,QueryText,Result
   26   ,  (??)/1
   27   ,  (??)/2
   28   ,  op(1150,fx,??)
   29   ,  op(1150,xfy,??)
   30	]).

Query to SPARQL endpoints with a more Prolog-like syntax

Samer Abdallah, Dept. of Computer Science, UCL (2014) Based on Yves Raimond's swic package, but completely re-written.

This module provides a little language for expressing SPARQL queries and a database of known SPARQL endpoints. Queries can be executed across multiple endpoints in parallel. When using auto-paging, multiple queries are made automatically to fetch new bindings as they are needed. For example,

EP ?? rdf(A,B,C).

will retrieve all triples from all endpoints in parallel, fetching 100 bindings at a time from each endpoint (assuming the setting sparkle:limit takes it's default value of 100). */

   50:- use_module(library(sandbox)).   51:- use_module(library(settings)).   52:- use_module(library(semweb/sparql_client)).   53:- use_module(library(dcg_core)).   54:- use_module(library(dcg_codes)).   55:- use_module(sparql_dcg).   56:- use_module(concurrency).   57
   58
   59:- dynamic sparql_endpoint/5.   60:- multifile sparql_endpoint/5.   61:- set_prolog_flag(double_quotes, codes).   62
   63:- setting(limit,integer,100,'Default SPARQL SELECT limit').   64:- setting(select_options,list,[distinct(true)],'Default select options').   65
   66:- meta_predicate query_phrase(+,//,-).   67
   68sandbox:safe_meta(sparql_dcg:phrase_to_sparql(Phr,_),[Phr]).
   69sandbox:safe_primitive(sparql_dcg:select(_,_,_,_,_)).
   70sandbox:safe_primitive(sparql_dcg:describe(_,_,_,_)).
   71sandbox:safe_primitive(sparql_dcg:describe(_,_,_)).
   72sandbox:safe_primitive(sparql_dcg:ask(_,_,_)).
 ??(+Goal:sparql_goal) is nondet
Equivalent to _ ?? Goal. Will query all endpoints in parallel. Identical bindings may be returned multiple times. See query_goal/3 for details.
   78??(Spec) :- ??(_,Spec).
 ??(EP, +Goal:sparql_goal) is nondet
Equivalent to query_goal(EP,Goal,Opts) where Opts is the value of the setting sparkle:select_options. See query_goal/3 for details. IF EP is unbound on entry, it is bound to the endpoint from which the current bindings were obtained.
   85??(EP,Spec) :- 
   86   spec_goal_opts(Spec,Goal,Opts),
   87   setting(select_options,Opts0),
   88   merge_options(Opts,Opts0,Opts1),
   89   query_goal(EP,Goal,Opts1).
   90
   91spec_goal_opts(Opts ?? Goal, Goal, Opts) :- !.
   92spec_goal_opts(Goal,Goal,[]).
   93
   94/*
   95 * Assert/declare a new sparql end point
   96 */
 sparql_endpoint(+EP:ground, +URL:atom, +Options) is det
 sparql_endpoint(+EP:ground, +URL:atom) is det
Declares EP as a short name for a SPARQL endpoint with the given URL. No options are defined at the moment.
  103sparql_endpoint(EP,Url) :- sparql_endpoint(EP,Url,[]).
  104sparql_endpoint(EP,Url,Options) :-
  105   url_endpoint(Url,Host,Port,Path), 
  106   (  sparql_endpoint(EP,Host,Port,Path,_)
  107   -> format('% WARNING: Updating already registered SPARQL end point ~w.\n',[Url]),
  108      retractall(sparql_endpoint(EP,Host,Port,Path,_))
  109   ),
  110   debug(sparkle,'Asserting SPARQL end point ~w: ~w ~w ~w ~w.',[EP,Host,Port,Path,Options]),
  111   assert(sparql_endpoint(EP,Host,Port,Path,Options)).
  112
  113user:term_expansion(:-(sparql_endpoint(EP,Url)), Expanded) :- 
  114   endpoint_declaration(EP,Url,[],Expanded).
  115user:term_expansion(:-(sparql_endpoint(EP,Url,Options)), Expanded) :- 
  116   endpoint_declaration(EP,Url,Options,Expanded).
  117
  118endpoint_declaration(EP,Url,Options, sparkle:sparql_endpoint(EP,Host,Port,Path,Options)) :-
  119	debug(sparkle,'Declaring SPARQL end point ~w: ~w ~w ~w ~w.',[EP,Host,Port,Path,Options]),
  120   url_endpoint(Url,Host,Port,Path).
  121
  122url_endpoint(Url,Host,Port,Path) :-
  123	parse_url(Url,Parsed),
  124	member(host(Host),Parsed),
  125	member(path(Path),Parsed),
  126	(member(port(Port),Parsed);Port=80).
 current_sparql_endpoint(-EP:ground, -Host:atom, -Port:natural, -Path:atom, -Options:list) is nondet
Succeeds once for each known endpoint.
  132current_sparql_endpoint(EP,Host,Port,Path,Options) :-
  133   sparql_endpoint(EP,Host,Port,Path,Options).
  134
  135
  136% ----------------------------------------------------
  137% Goal-based queries 
  138% These get translated into phrase-based queries.
 query_goal(+EP, +Goal:sparql_goal, +Opts) is nondet
query_goal(-EP, +Goal:sparql_goal, +Opts) is nondet
Runs a SPARQL query against one or more SPARLQ endpoints. Goal is converted into a textual SPARQL query using the DCG defined in sparql_dcg.pl.

If EP is ground on entry, the query is run against the specified endpoint. If EP is unbound on entry, the query is run agains all endpoints in parallel, possibly returning multiple results from each.

(The following applies only to queries that return bindings, not to simple boolean questions, which return only true or false.) Options are as follows:

limit(L:natural)
At-most this many bindings will be returned per SPARQL call.
offset(O:natural)
Begin returning bindings from the Oth result on.
autopage(Auto:bool)
If false, a single SPARQL call is made using any limit and offset options if supplied. If true, the the offset option is ignored and multiple SPARQL queries are made as necessary to supply results, using the limit option to determine the number of results retrieved from the endpoint at a time. Other options are passed to phrase_to_sparql/2.
  166query_goal(EP,Goal,Opts) :- 
  167   findall(EP,sparql_endpoint(EP,_,_,_,_),EPs),
  168   term_variables(Goal,Vars),
  169   (  Vars = [] % if no variables, do an ASK query, otherwise, SELECT
  170   -> phrase_to_sparql(ask(Goal),SPARQL),
  171      parallel_query(simple_query(SPARQL),EPs,EP-true)
  172   ;  Result =.. [row|Vars],
  173      setting(limit,DefaultLimit),
  174      call_dcg((  option_default_select(limit(Limit),DefaultLimit),
  175                  option_default_select(autopage(Auto),true),
  176                  (  {Auto=true}
  177                  -> {Query = autopage_query(Limit,SPARQL)},
  178                     option_default_select(offset(_),_)
  179                  ;  {Query = simple_query(SPARQL)},
  180                     cons(limit(Limit))
  181                  ) 
  182               ), Opts, Opts1),
  183      phrase_to_sparql(select(Vars,Goal,Opts1),SPARQL),
  184      parallel_query(Query,EPs,EP-Result)
  185   ).
  186
  187cons(X,T,[X|T]).
  188option_default_select(Opt,Def,O1,O2) :- select_option(Opt,O1,O2,Def).
  189simple_query(SPARQL,EP,EP-Result) :- query_sparql(EP,SPARQL,Result).
  190autopage_query(Limit,SPARQL,EP,EP-Result) :- autopage(EP,SPARQL,Limit,0,Result).
  191
  192autopage(EP,SPARQL,Limit,Offset,Result) :-
  193   format(string(Q),'~s LIMIT ~d OFFSET ~d',[SPARQL,Limit,Offset]),
  194   findall(R,query_sparql(EP,Q,R),Results),
  195   (  member(Result,Results)
  196   ;  length(Results,Limit),     % no next page if length(Results) < Limit
  197      Offset1 is Offset + Limit, % next batch of results
  198      autopage(EP,SPARQL,Limit,Offset1,Result)
  199   ).
  200
  201parallel_query(_,[],_) :- !, fail.
  202parallel_query(P,[X],Y) :- !, call(P,X,Y).
  203parallel_query(P,Xs,Y) :-
  204   maplist(par_goal(P,Y),Xs,Goals),
  205   concurrent_or(Y,Goals,[on_error(continue)]).
  206
  207par_goal(P,Y,X,call(P,X,Y)).
 query_phrase(+EP, +Q:sparqle_phrase(R), R) is nondet
query_phrase(-EP, +Q:sparqle_phrase(R), R) is nondet
Phrase-based queries using the DCG defined in sparql_dcg.pl. The return type depends on the query:
select(V:list(var), sparql_goal, options) :: sparql_phrase(row(N)) :- length(V,N).
describe(resource,sparql_goal)            :: sparql_phrase(rdf).
describe(resource)                        :: sparql_phrase(rdf).
ask(sparql_goal)                          :: sparql_phrase(bool).

rdf  ---> rdf(resource,resource,object).
bool ---> true; false.

row(N) is the type of terms of functor row/N.

  227query_phrase(EP,Phrase,Result) :- 
  228   phrase_to_sparql(Phrase,SPARQL),
  229   query_sparql(EP,SPARQL,Result).
  230
  231
  232phrase_to_sparql(Phrase,SPARQL) :-
  233   term_variables(Phrase,Vars),
  234   copy_term(t(Vars,Phrase),t(Vars1,Phrase1)),
  235   numbervars(Vars1,0,_),
  236   (  phrase(Phrase1,Codes) -> true
  237   ;  throw(unrecognised_query(Phrase))
  238   ),
  239   string_codes(SPARQL,Codes),
  240   debug(sparkle,'SPARQL query: ~s',[SPARQL]).
  241
  242% ----------------------------------------------------
  243% In the end, everything comes through this.
 query_sparql(?EP, SPARQL, -Result) is nondet
Runs textual SPARQL query against an endpoint, exactly as with sparql_query/3. If EP is unbound on entry, all known endpoints will be tried sequentially.
  250query_sparql(EP,SPARQL,Result) :-
  251   sparql_endpoint(EP,Host,Port,Path,EPOpts),
  252   debug(sparkle,'Querying endpoint http://~w:~w~w',[Host,Port,Path]),
  253   sparql_query(SPARQL,Result,[host(Host),port(Port),path(Path)|EPOpts])