View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2006-2015, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(rdf_http_plugin, []).   37:- use_module(library(http/http_open)).   38:- use_module(library(http/http_header)).   39:- use_module(library(semweb/rdf_db), []). % we define hooks for this
   40:- use_module(library(date)).   41:- use_module(library(error)).   42:- use_module(library(lists)).   43:- use_module(library(option)).

RDF HTTP Plugin

This module allows loading data into the semantic web library directly from an HTTP server. The following example loads the RDF core data into the RDF database.

:- use_module(library(semweb/rdf_db)).
:- use_module(library(semweb/rdf_http_plugin)).

    ...,
    rdf_load('http://www.w3.org/1999/02/22-rdf-syntax-ns')

*/

   60:- multifile
   61    rdf_db:rdf_open_hook/8,
   62    rdf_db:url_protocol/1,
   63    rdf_db:rdf_storage_encoding/2,
   64    rdf_db:rdf_file_type/2,
   65    rdf_content_type/3.   66
   67rdf_db:url_protocol(http).
   68rdf_db:url_protocol(https).
   69
   70
   71% define `rdf_format` as a type.
   72:- multifile error:has_type/2.   73error:has_type(rdf_format, Term):-
   74    error:has_type(oneof([nquads,ntriples,rdfa,trig,turtle,xml]), Term).
 rdf_extra_headers(-RequestHeaders:list(compound), +Options:list) is det
Send extra headers with the request. Note that, although we also process RDF embedded in HTML, we do not explicitely ask for it. Doing so causes some (e.g., http://w3.org/2004/02/skos/core to reply with the HTML description rather than the RDF).

When given, option format(+atom) is used in order to prioritize the corresponding RDF content types.

   86rdf_extra_headers([ cert_verify_hook(ssl_verify),
   87                    request_header('Accept'=AcceptValue)
   88                  ], Options) :-
   89    option(format(Format), Options, _VAR),
   90    rdf_accept_header_value(Format, AcceptValue).
 rdf_db:rdf_open_hook(+Scheme, +URL, +HaveModified, -Stream, -Cleanup, -Modified, -Format, +Options) is semidet
Load hook implementation for HTTP(S) URLs.
Arguments:
HaveModified- is bound to a timestamp (number) if we already have a copy and that copy was modified at HaveModified.
Modified- is bound to unknown, not_modified or a timestamp.
  104rdf_db:rdf_open_hook(https, SourceURL, HaveModified, Stream, Cleanup,
  105                     Modified, Format, Options) :-
  106    rdf_db:rdf_open_hook(http, SourceURL, HaveModified, Stream, Cleanup,
  107                         Modified, Format, Options).
  108rdf_db:rdf_open_hook(http, SourceURL, HaveModified, Stream, Cleanup,
  109                     Modified, Format, Options) :-
  110    modified_since_header(HaveModified, Header),
  111    TypeHdr = [ header(content_type, ContentType),
  112                header(last_modified, ModifiedText)
  113              ],
  114    rdf_extra_headers(Extra, Options),
  115    append([Extra, TypeHdr, Header, Options], OpenOptions),
  116    catch(http_open(SourceURL, Stream0,
  117                    [ status_code(Code)
  118                    | OpenOptions
  119                    ]), E, true),
  120    (   Code == 200
  121    ->  (   open_envelope(ContentType, SourceURL,
  122                          Stream0, Stream, Format)
  123        ->  Cleanup = close(Stream),
  124            (   nonvar(ModifiedText),
  125                parse_time(ModifiedText, ModifiedStamp)
  126            ->  Modified = last_modified(ModifiedStamp)
  127            ;   Modified = unknown
  128            )
  129        ;   close(Stream0),
  130            domain_error(content_type, ContentType)
  131        )
  132    ;   Code == 304
  133    ->  Modified = not_modified,
  134        Cleanup = true
  135    ;   var(E)
  136    ->  throw(error(existence_error(url, SourceURL),
  137                    context(_, status(Code,_))))
  138    ;   throw(E)
  139    ).
  140
  141:- public ssl_verify/5.
 ssl_verify(+SSL, +ProblemCert, +AllCerts, +FirstCert, +Error)
Currently we accept all certificates.
  147ssl_verify(_SSL,
  148           _ProblemCertificate, _AllCertificates, _FirstCertificate,
  149           _Error).
 modified_since_header(+LastModified, -ExtraHeaders) is det
Add an If-modified-since if we have a version with the given time-stamp.
  156modified_since_header(HaveModified, []) :-
  157    var(HaveModified),
  158    !.
  159modified_since_header(HaveModified,
  160                      [ request_header('If-modified-since' =
  161                                       Modified)
  162                      ]) :-
  163    http_timestamp(HaveModified, Modified).
 open_envelope(+ContentType, +SourceURL, +Stream0, -Stream, ?Format) is semidet
Open possible envelope formats.
  170open_envelope(ContentType, SourceURL, Stream0, Stream, Format) :-
  171    (   ContentType == 'application/x-gzip'
  172    ;   ContentType == 'application/octet-stream',
  173        file_name_extension(_, gz, SourceURL)
  174    ),
  175    !,
  176    rdf_db:rdf_storage_encoding(_, gzip),
  177    !,
  178    (   var(Format)
  179    ->  file_name_extension(BaseURL, _GzExt, SourceURL),
  180        file_name_extension(_, Ext, BaseURL),
  181        rdf_db:rdf_file_type(Ext, Format)
  182    ;   true
  183    ),
  184    stream_pair(Stream0, Read, _),
  185    rdf_zlib_plugin:zopen(Read, Stream, []).
  186open_envelope(_, _, Stream, Stream, Format) :-
  187    nonvar(Format),
  188    !.
  189open_envelope(ContentType, SourceURL, Stream, Stream, Format) :-
  190    major_content_type(ContentType, Major),
  191    (   rdf_content_type(Major, _, Format)
  192    ->  true
  193    ;   Major == 'text/plain'       % server is not properly configured
  194    ->  file_name_extension(_, Ext, SourceURL),
  195        rdf_db:rdf_file_type(Ext, Format)
  196    ).
  197
  198major_content_type(ContentType, Major) :-
  199    sub_atom(ContentType, Pre, _, _, (;)),
  200    !,
  201    sub_atom(ContentType, 0, Pre, _, Major).
  202major_content_type(Major, Major).
 rdf_accept_header_value(?Format:rdf_format, -AcceptValue:atom) is det
  207rdf_accept_header_value(Format, AcceptValue) :-
  208    findall(AcceptValue, accept_value(Format, AcceptValue), AcceptValues),
  209    atomic_list_concat(['*/*;q=0.001'|AcceptValues], ',', AcceptValue).
  210
  211accept_value(Format, AcceptValue) :-
  212    rdf_content_type(MediaType, QValue0, Format0),
  213    (   Format == Format0
  214    ->  QValue = 1.0
  215    ;   QValue = QValue0
  216    ),
  217    format(atom(AcceptValue), '~a;q=~3f', [MediaType,QValue]).
 rdf_content_type(?MediaType:atom, ?QualityValue:between(0.0,1.0), ?Format:rdf_format) is nondet
Quality values are intended to be used in accordance with RFC 2616. Quality values are determined based on the following criteria:
LabelCriterionValue
ASupported RDF parser0.43
BRDF-specific content type0.33
COfficial content type0.23

For example, text/turtle has quality value 0.99 because it is an official content type that is RDF-specific and that has a parser in Semweb.

This intentionally allows the user to add another content type with a higher Q-value (i.e., >0.99).

Deduce the RDF encoding from the mime-type. This predicate is defined as multifile such that the user can associate additional content types to RDF formats.

See also
- Discussion http://richard.cyganiak.de/blog/2008/03/what-is-your-rdf-browsers-accept-header/
- N-Quadruples http://www.w3.org/ns/formats/N-Quads
- N-Triples http://www.w3.org/ns/formats/N-Triples
- N3 http://www.w3.org/ns/formats/N3
- RDFa http://www.w3.org/ns/formats/RDFa
- TriG http://www.w3.org/ns/formats/TriG
- Turtle http://www.w3.org/ns/formats/Turtle
- XML/RDF http://www.w3.org/ns/formats/RDF_XML
bug
- The turtle parser only parses a subset of n3. (The N3 format is treated as if it were Turtle.)
  254rdf_content_type('application/n-quads',    0.99, nquads  ). %ABC
  255rdf_content_type('application/n-triples',  0.99, ntriples). %ABC
  256rdf_content_type('application/rdf',        0.76, xml     ). %AB
  257rdf_content_type('application/rdf+turtle', 0.76, turtle  ). %AB
  258rdf_content_type('application/rdf+xml',    0.76, xml     ). %AB
  259rdf_content_type('application/rss+xml',    0.66, xml     ). %AC
  260rdf_content_type('application/trig',       0.99, trig    ). %ABC
  261rdf_content_type('application/turtle',     0.76, turtle  ). %AB
  262rdf_content_type('application/x-trig',     0.76, trig    ). %AB
  263rdf_content_type('application/x-turtle',   0.76, turtle  ). %AB
  264rdf_content_type('application/xhtml+xml',  0.66, rdfa    ). %AC
  265rdf_content_type('application/xml',        0.66, xml     ). %AC
  266rdf_content_type('text/html',              0.66, rdfa    ). %AC
  267rdf_content_type('text/n3',                0.56, turtle  ). %BC (N3)
  268rdf_content_type('text/rdf',               0.76, xml     ). %AB
  269rdf_content_type('text/rdf+n3',            0.33, turtle  ). %B (N3)
  270rdf_content_type('text/rdf+xml',           0.76, xml     ). %AB
  271rdf_content_type('text/turtle',            0.99, turtle  ). %ABC
  272rdf_content_type('text/xml',               0.66, xml     ). %AC
  273rdf_content_type('application/x-gzip',     0.23, gzip    ). %C