View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2010-2020, University of Amsterdam
    7                              VU University Amsterdam
    8                              SWI-Prolog Solutions b.v.
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(sparql_client,
   38          [ sparql_query/3,             % +Query, -Row, +Options
   39            sparql_set_server/1,        % +Options
   40            sparql_read_xml_result/2,   % +Stream, -Result
   41            sparql_read_json_result/2   % +Input, -Result
   42          ]).   43:- autoload(library(apply), [maplist/3, maplist/4, partition/4]).   44:- autoload(library(gensym), [gensym/2]).   45:- autoload(library(lists), [member/2]).   46:- autoload(library(option), [select_option/3, select_option/4, merge_options/3]).   47:- autoload(library(rdf), [load_rdf/2]).   48:- autoload(library(readutil), [read_stream_to_codes/2]).   49:- autoload(library(sgml), [load_structure/3]).   50:- autoload(library(uri),
   51            [ uri_components/2,
   52              uri_data/3,
   53              uri_authority_components/2,
   54              uri_authority_data/3
   55            ]).   56:- autoload(library(http/http_open), [http_open/3]).   57:- autoload(library(http/json), [json_read/2]).   58:- autoload(library(semweb/turtle), [rdf_read_turtle/3]).   59
   60/** <module> SPARQL client library
   61
   62This module provides a SPARQL client.  For example:
   63
   64```
   65?- sparql_query('select * where { ?x rdfs:label "Amsterdam" }', Row,
   66                [ host('dbpedia.org'), path('/sparql/')]).
   67
   68Row = row('http://www.ontologyportal.org/WordNet#WN30-108949737') ;
   69false.
   70```
   71
   72Or, querying a local server using an ``ASK`` query:
   73
   74```
   75?- sparql_query('ask { owl:Class rdfs:label "Class" }', Row,
   76                [ host('localhost'), port(3020), path('/sparql/')]).
   77Row = true.
   78```
   79
   80HTTPS servers are supported using the scheme(https) option:
   81
   82```
   83?- sparql_query('select * where { ?x rdfs:label "Amsterdam"@nl }',
   84                Row,
   85                [ scheme(https),
   86                  host('query.wikidata.org'),
   87                  path('/sparql')
   88                ]).
   89```
   90*/
   91
   92
   93%!  sparql_query(+Query, -Result, +Options) is nondet.
   94%
   95%   Execute a SPARQL query on an HTTP  SPARQL endpoint. Query is an atom
   96%   that denotes the query. Result is unified   to a term rdf(S,P,O) for
   97%   ``CONSTRUCT`` and ``DESCRIBE``  queries,   row(...)  for  ``SELECT``
   98%   queries and `true` or `false` for ``ASK`` queries. Options are
   99%
  100%   Variables that are unbound in SPARQL   (e.g., due to SPARQL optional
  101%   clauses), are bound in Prolog to the atom `'$null$'`.
  102%
  103%	- endpoint(+URL)
  104%	  May be used as alternative to Scheme, Host, Port and Path
  105%	  to specify the endpoint in a single option.
  106%       - host(+Host)
  107%       - port(+Port)
  108%       - path(+Path)
  109%       - scheme(+Scheme)
  110%         The above four options set the location of the server.
  111%       - search(+ListOfParams)
  112%         Provide additional query parameters, such as the graph.
  113%       - variable_names(-ListOfNames)
  114%         Unifies ListOfNames with a list of atoms that describe the
  115%         names of the variables in a =SELECT= query.
  116%
  117%   Remaining options are passed to http_open/3.  The defaults for Host,
  118%   Port and Path can be  set   using  sparql_set_server/1.  The initial
  119%   default for port is 80 and path is `/sparql/`.
  120%
  121%   For  example,  the  ClioPatria  server   understands  the  parameter
  122%   `entailment`.  The  code  below  queries    for  all  triples  using
  123%   _rdfs_entailment.
  124%
  125%   ```
  126%   ?- sparql_query('select * where { ?s ?p ?o }',
  127%                   Row,
  128%                   [ search([entailment=rdfs])
  129%                   ]).
  130%   ```
  131%
  132%   Another useful option is the   `request_header`  which, for example,
  133%   may be used to trick force  a   server  to  reply using a particular
  134%   document format:
  135%
  136%   ```
  137%   ?- sparql_query(
  138%          'select * where { ?s ?p ?o }',
  139%           Row,
  140%           [ host('integbio.jp'),
  141%             path('/rdf/sparql'),
  142%             request_header('Accept' =
  143%                            'application/sparql-results+xml')
  144%           ]).
  145%   ```
  146
  147sparql_query(Query, Row, Options) :-
  148    (   select_option(endpoint(URL), Options, Options5)
  149    ->  uri_components(URL, Components),
  150        uri_data(scheme, Components, Scheme),
  151        uri_data(authority, Components, Auth),
  152        uri_data(path, Components, Path),
  153        uri_data(search, Components, Extra),
  154        ignore(Extra = []),
  155        uri_authority_components(Auth, AComp),
  156        uri_authority_data(host, AComp, Host),
  157        uri_authority_data(port, AComp, Port),
  158        (   var(Port)
  159        ->  sparql_port(Scheme, Port, _, _)
  160        ;   true
  161        )
  162    ;   sparql_param(scheme(Scheme), Options,  Options1),
  163        sparql_port(Scheme, Port,    Options1, Options2),
  164        sparql_param(host(Host),     Options2, Options3),
  165        sparql_param(path(Path),     Options3, Options4),
  166        select_option(search(Extra), Options4, Options5, [])
  167    ),
  168    select_option(variable_names(VarNames), Options5, Options6, _),
  169    partition(is_url_option, Options6, UrlOptions, HTTPOptions),
  170    sparql_extra_headers(HTTPOptions0),
  171    merge_options(HTTPOptions, HTTPOptions0, HTTPOptions1),
  172    http_open([ scheme(Scheme),
  173                host(Host),
  174                port(Port),
  175                path(Path),
  176                search([ query = Query
  177                       | Extra
  178                       ])
  179              | UrlOptions
  180              ], In,
  181              [ header(content_type, ContentType),
  182                status_code(Status)
  183              | HTTPOptions1
  184              ]),
  185    plain_content_type(ContentType, CleanType),
  186    read_reply(Status, CleanType, In, VarNames, Row).
  187
  188url_option(scheme).
  189url_option(user).
  190url_option(password).
  191url_option(host).
  192url_option(port).
  193url_option(path).
  194url_option(query_string).
  195url_option(search).
  196url_option(cert_verify_hook).
  197url_option(certificate_file).
  198url_option(key_file).
  199url_option(certificate_key_pairs).
  200url_option(pem_password_hook).
  201url_option(crl).
  202url_option(cacert_file).
  203url_option(cacerts).
  204url_option(cipher_list).
  205url_option(ecdh_curve).
  206url_option(min_protocol_version).
  207url_option(max_protocol_version).
  208url_option(disable_ssl_methods).
  209url_option(ssl_method).
  210
  211is_url_option(Name = _Value) :-
  212    url_option(Name),
  213    !.
  214is_url_option(Opt) :-
  215    compound(Opt),
  216    functor(Opt, Name, 1),
  217    url_option(Name).
  218
  219%!  sparql_extra_headers(-List)
  220%
  221%   Send extra headers with the request. Note that, although we also
  222%   process RDF embedded in HTML, we do  not explicitely ask for it.
  223%   Doing so causes some   (e.g., http://w3.org/2004/02/skos/core to
  224%   reply with the HTML description rather than the RDF).
  225
  226sparql_extra_headers(
  227        [ request_header('Accept' = 'application/sparql-results+xml, \c
  228                                     application/n-triples, \c
  229                                     application/x-turtle; q=0.9, \c
  230                                     application/turtle; q=0.9, \c
  231                                     text/turtle, \c
  232                                     application/sparql-results+json, \c
  233                                     application/rdf+xml, \c
  234                                     text/rdf+xml; q=0.8, \c
  235                                     */*; q=0.1')
  236        ]).
  237
  238%!  read_reply(+Status, +ContentType, +In, -Close, -Row)
  239
  240read_reply(200, ContentType, In, Close, Row) :-
  241    !,
  242    read_reply(ContentType, In, Close, Row).
  243read_reply(Status, _ContentType, In, _Close, _Row) :-
  244    call_cleanup(read_string(In, _, Reply),
  245                 close(In, [force(true)])),
  246    throw(error(sparql_error(Status, Reply), _)).
  247
  248read_reply('application/rdf+xml', In, _, Row) :-
  249    !,
  250    call_cleanup(load_rdf(stream(In), RDF), close(In)),
  251    member(Row, RDF).
  252read_reply(MIME, In, _, Row) :-
  253    turtle_media_type(MIME),
  254    !,
  255    call_cleanup(rdf_read_turtle(stream(In), RDF, []), close(In)),
  256    member(Row, RDF).
  257read_reply(MIME, In, VarNames, Row) :-
  258    sparql_result_mime(MIME),
  259    !,
  260    call_cleanup(sparql_read_xml_result(stream(In), Result),
  261                 close(In)),
  262    varnames(Result, VarNames),
  263    xml_result(Result, Row).
  264read_reply(MIME, In, VarNames, Row) :-
  265    json_result_mime(MIME),
  266    !,
  267    call_cleanup(sparql_read_json_result(stream(In), Result),
  268                 close(In)),
  269    (   Result = select(VarNames, Rows)
  270    ->  member(Row, Rows)
  271    ;   Result = ask(True)
  272    ->  Row = True,
  273        VarNames = []
  274    ).
  275read_reply(Type, In, _, _) :-
  276    read_stream_to_codes(In, Codes),
  277    string_codes(Reply, Codes),
  278    close(In),
  279    throw(error(domain_error(sparql_result_document, Type),
  280                context(_, Reply))).
  281
  282turtle_media_type('application/x-turtle').
  283turtle_media_type('application/turtle').
  284turtle_media_type('application/n-triples').
  285turtle_media_type('text/rdf+n3').
  286turtle_media_type('text/turtle').
  287
  288sparql_result_mime('application/sparql-results+xml'). % official
  289sparql_result_mime('application/sparql-result+xml').
  290
  291json_result_mime('application/sparql-results+json').
  292
  293
  294plain_content_type(Type, Plain) :-
  295    sub_atom(Type, B, _, _, (;)),
  296    !,
  297    sub_string(Type, 0, B, _, Main),
  298    normalize_space(atom(Plain), Main).
  299plain_content_type(Type, Type).
  300
  301xml_result(ask(Bool), Result) :-
  302    !,
  303    Result = Bool.
  304xml_result(select(_VarNames, Rows), Result) :-
  305    member(Result, Rows).
  306
  307varnames(ask(_), _).
  308varnames(select(VarTerm, _Rows), VarNames) :-
  309    VarTerm =.. [_|VarNames].
  310
  311
  312                 /*******************************
  313                 *            SETTINGS          *
  314                 *******************************/
  315
  316:- dynamic
  317    sparql_setting/1.  318
  319sparql_setting(scheme(http)).
  320sparql_setting(path('/sparql/')).
  321
  322sparql_param(Param, Options0, Options) :-
  323    select_option(Param, Options0, Options),
  324    !.
  325sparql_param(Param, Options, Options) :-
  326    sparql_setting(Param),
  327    !.
  328sparql_param(Param, Options, Options) :-
  329    functor(Param, Name, _),
  330    throw(error(existence_error(option, Name), _)).
  331
  332sparql_port(_Scheme, Port, Options0, Options) :-
  333    select_option(port(Port), Options0, Options),
  334    !.
  335sparql_port(_Scheme, Port, Options, Options) :-
  336    sparql_setting(port(Port)),
  337    !.
  338sparql_port(http, 80, Options, Options) :-
  339    !.
  340sparql_port(https, 443, Options, Options) :-
  341    !.
  342
  343
  344%!  sparql_set_server(+OptionOrList)
  345%
  346%   Set sparql server default options.  Provided defaults are:
  347%   host, port and repository.  For example:
  348%
  349%       ==
  350%           sparql_set_server([ host(localhost),
  351%                               port(8080)
  352%                               path(world)
  353%                             ])
  354%       ==
  355%
  356%   The default for port is 80 and path is =|/sparql/|=.
  357
  358sparql_set_server([]) :- !.
  359sparql_set_server([H|T]) :-
  360    !,
  361    sparql_set_server(H),
  362    sparql_set_server(T).
  363sparql_set_server(Term) :-
  364    functor(Term, Name, Arity),
  365    functor(Unbound, Name, Arity),
  366    retractall(sparql_setting(Unbound)),
  367    assert(sparql_setting(Term)).
  368
  369
  370                 /*******************************
  371                 *             RESULT           *
  372                 *******************************/
  373
  374ns(sparql, 'http://www.w3.org/2005/sparql-results#').
  375
  376/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  377Read    the    SPARQL    XML    result     format    as    defined    in
  378http://www.w3.org/TR/rdf-sparql-XMLres/, version 6 April 2006.
  379- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  380
  381                 /*******************************
  382                 *        MACRO HANDLING        *
  383                 *******************************/
  384
  385%       substitute 'sparql' by the namespace   defined  above for better
  386%       readability of the remainder of the code.
  387
  388term_subst(V, _, _, V) :-
  389    var(V),
  390    !.
  391term_subst(F, F, T, T) :- !.
  392term_subst(C, F, T, C2) :-
  393    compound(C),
  394    !,
  395    functor(C, Name, Arity),
  396    functor(C2, Name, Arity),
  397    term_subst(0, Arity, C, F, T, C2).
  398term_subst(T, _, _, T).
  399
  400term_subst(A, A, _, _, _, _) :- !.
  401term_subst(I0, Arity, C0, F, T, C) :-
  402    I is I0 + 1,
  403    arg(I, C0, A0),
  404    term_subst(A0, F, T, A),
  405    arg(I, C, A),
  406    term_subst(I, Arity, C0, F, T, C).
  407
  408term_expansion(T0, T) :-
  409    ns(sparql, NS),
  410    term_subst(T0, sparql, NS, T).
  411
  412
  413                 /*******************************
  414                 *           READING            *
  415                 *******************************/
  416
  417%!  sparql_read_xml_result(+Input, -Result)
  418%
  419%   Specs from http://www.w3.org/TR/rdf-sparql-XMLres/.  The returned
  420%   Result term is of the format:
  421%
  422%           * select(VarNames, Rows)
  423%           Where VarNames is a term v(Name, ...) and Rows is a
  424%           list of row(....) containing the column values in the
  425%           same order as the variable names.
  426%
  427%           * ask(Bool)
  428%           Where Bool is either =true= or =false=
  429
  430:- thread_local
  431    bnode_map/2.  432
  433sparql_read_xml_result(Input, Result) :-
  434    load_structure(Input, DOM,
  435                   [ dialect(xmlns)
  436                   ]),
  437    call_cleanup(dom_to_result(DOM, Result),
  438                 retractall(bnode_map(_,_))).
  439
  440dom_to_result(DOM, Result) :-
  441    (   sub_element(DOM, sparql:head, _HAtt, Content)
  442    ->  variables(Content, Vars)
  443    ;   Vars = []
  444    ),
  445    (   Vars == [],
  446        sub_element(DOM, sparql:boolean, _, [TrueFalse])
  447    ->  Result = ask(TrueFalse)
  448    ;   VarTerm =.. [v|Vars],
  449        Result = select(VarTerm, Rows),
  450        sub_element(DOM, sparql:results, _RAtt, RContent)
  451    ->  rows(RContent, Vars, Rows)
  452    ),
  453    !.                                   % Guarantee finalization
  454
  455%!  variables(+DOM, -Varnames)
  456%
  457%   Deals with <variable name=Name>.  Head   also  may contain <link
  458%   href="..."/>. This points to additional   meta-data.  Not really
  459%   clear what we can do with that.
  460
  461variables([], []).
  462variables([element(sparql:variable, Att, [])|T0], [Name|T]) :-
  463    !,
  464    memberchk(name=Name, Att),
  465    variables(T0, T).
  466variables([element(sparql:link, _, _)|T0], T) :-
  467    !,
  468    variables(T0, T).
  469variables([CDATA|T0], T) :-
  470    atomic(CDATA),
  471    variables(T0, T).
  472
  473
  474rows([], _, []).
  475rows([R|T0], Vars, [Row|T]) :-
  476    R = element(sparql:result, _, _),
  477    !,
  478    row_values(Vars, R, Values),
  479    Row =.. [row|Values],
  480    rows(T0, Vars, T).
  481rows([CDATA|T0], Vars, T) :-
  482    atomic(CDATA),
  483    rows(T0, Vars, T).
  484
  485row_values([], _, []).
  486row_values([Var|VarT], DOM, [Value|ValueT]) :-
  487    (   sub_element(DOM, sparql:binding, Att, Content),
  488        memberchk(name=Var, Att)
  489    ->  value(Content, Value)
  490    ;   Value = '$null$'
  491    ),
  492    row_values(VarT, DOM, ValueT).
  493
  494value([element(sparql:literal, Att, Content)|Rest], literal(Lit)) :-
  495    !,
  496    white(Rest),
  497    lit_value(Content, Value),
  498    (   memberchk(datatype=Type, Att)
  499    ->  Lit = type(Type, Value)
  500    ;   memberchk(xml:lang=Lang, Att)
  501    ->  Lit = lang(Lang, Value)
  502    ;   Lit = Value
  503    ).
  504value([element(sparql:uri, [], [URI])|Rest], URI) :- !,
  505    white(Rest).
  506value([element(sparql:bnode, [], [NodeID])|Rest], URI) :-
  507    !,
  508    white(Rest),
  509    bnode(NodeID, URI).
  510value([element(sparql:unbound, [], [])|Rest], '$null$') :-
  511    !,
  512    white(Rest).
  513value([CDATA|Rest], Value) :-
  514    atomic(CDATA),
  515    value(Rest, Value).
  516
  517
  518white([]).
  519white([CDATA|T]) :-
  520    atomic(CDATA),
  521    white(T).
  522
  523lit_value([], '').
  524lit_value([Value], Value).
  525
  526
  527%!  sub_element(+DOM, +Name, -Atttribs, -Content)
  528
  529sub_element(element(Name, Att, Content), Name, Att, Content).
  530sub_element(element(_, _, List), Name, Att, Content) :-
  531    sub_element(List, Name, Att, Content).
  532sub_element([H|T], Name, Att, Content) :-
  533    (   sub_element(H, Name, Att, Content)
  534    ;   sub_element(T, Name, Att, Content)
  535    ).
  536
  537
  538bnode(Name, URI) :-
  539    bnode_map(Name, URI),
  540    !.
  541bnode(Name, URI) :-
  542    gensym('__bnode', URI0),
  543    assertz(bnode_map(Name, URI0)),
  544    URI = URI0.
  545
  546
  547%!  sparql_read_json_result(+Input, -Result) is det.
  548%
  549%   The returned Result term is of the format:
  550%
  551%           * select(VarNames, Rows)
  552%           Where VarNames is a term v(Name, ...) and Rows is a
  553%           list of row(....) containing the column values in the
  554%           same order as the variable names.
  555%
  556%           * ask(Bool)
  557%           Where Bool is either =true= or =false=
  558%
  559%   @see http://www.w3.org/TR/rdf-sparql-json-res/
  560
  561sparql_read_json_result(Input, Result) :-
  562    setup_call_cleanup(
  563        open_input(Input, In, Close),
  564        read_json_result(In, Result),
  565        close_input(Close)).
  566
  567open_input(stream(In), In, Close) :-
  568    !,
  569    encoding(In, utf8, Close).
  570open_input(In, In, Close) :-
  571    is_stream(In),
  572    !,
  573    encoding(In, utf8, Close).
  574open_input(File, In, close(In)) :-
  575    open(File, read, In, [encoding(utf8)]).
  576
  577encoding(In, Encoding, Close) :-
  578    stream_property(In, encoding(Old)),
  579    (   Encoding == Old
  580    ->  Close = true
  581    ;   set_stream(In, encoding(Encoding)),
  582        Close = set_stream(In, Encoding, Old)
  583    ).
  584
  585close_input(close(In)) :-
  586    !,
  587    retractall(bnode_map(_,_)),
  588    close(In).
  589close_input(_) :-
  590    retractall(bnode_map(_,_)).
  591
  592read_json_result(In, Result) :-
  593    json_read(In, JSON),
  594    json_to_result(JSON, Result).
  595
  596json_to_result(json([ head    = json(Head),
  597                      results = json(Body)
  598                    ]),
  599               select(Vars, Rows)) :-
  600    memberchk(vars=VarList, Head),
  601    Vars =.. [v|VarList],
  602    memberchk(bindings=Bindings, Body),
  603    !,
  604    maplist(json_row(VarList), Bindings, Rows).
  605json_to_result(json(JSon), ask(Boolean)) :-
  606    memberchk(boolean = @(Boolean), JSon).
  607
  608
  609json_row(Vars, json(Columns), Row) :-
  610    maplist(json_cell, Vars, Columns, Values),
  611    !,
  612    Row =.. [row|Values].
  613json_row(Vars, json(Columns), Row) :-
  614    maplist(json_cell_or_null(Columns), Vars, Values),
  615    Row =.. [row|Values].
  616
  617json_cell(Var, Var=json(JValue), Value) :-
  618    memberchk(type=Type, JValue),
  619    jvalue(Type, JValue, Value).
  620
  621json_cell_or_null(Columns, Var, Value) :-
  622    memberchk(Var=json(JValue), Columns),
  623    !,
  624    memberchk(type=Type, JValue),
  625    jvalue(Type, JValue, Value).
  626json_cell_or_null(_, _, '$null$').
  627
  628jvalue(uri, JValue, URI) :-
  629    memberchk(value=URI, JValue).
  630jvalue(literal, JValue, literal(Literal)) :-
  631    memberchk(value=Value, JValue),
  632    (   memberchk('xml:lang'=Lang, JValue)
  633    ->  Literal = lang(Lang, Value)
  634    ;   memberchk('datatype'=Type, JValue)
  635    ->  Literal = type(Type, Value)
  636    ;   Literal = Value
  637    ).
  638jvalue('typed-literal', JValue, literal(type(Type, Value))) :-
  639    memberchk(value=Value, JValue),
  640    memberchk('datatype'=Type, JValue).
  641jvalue(bnode, JValue, URI) :-
  642    memberchk(value=NodeID, JValue),
  643    bnode(NodeID, URI)