View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2010-2020, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(rdf_library,
   37          [ rdf_attach_library/1,       % +Dir
   38            rdf_load_library/1,         % +Ontology
   39            rdf_load_library/2,         % +Ontology, +Options
   40            rdf_list_library/0,
   41            rdf_list_library/1,         % +Ontology
   42            rdf_list_library/2,         % +Ontology, +Options
   43            rdf_library_source/2,       % +Ontology, -SourceURL
   44            rdf_library_index/2,        % ?Id, ?Facet
   45            rdf_current_manifest/1      % -Manifest
   46          ]).   47:- use_module(library(semweb/rdf_prefixes),
   48              [ (rdf_meta)/1, op(_,_,rdf_meta)
   49              ]).   50:- use_module(library(semweb/rdf_db),
   51              [ rdf_register_ns/2, rdf_equal/2, rdf_register_ns/3, rdf_load/2
   52              ]).   53
   54:- autoload(library(apply),[exclude/3,maplist/2]).   55:- autoload(library(date),[parse_time/2]).   56:- use_module(library(debug),[debug/3]).   57:- autoload(library(dif),[dif/2]).   58:- autoload(library(error),[must_be/2,existence_error/2]).   59:- autoload(library(lists),[member/2,list_to_set/2]).   60:- autoload(library(option),[option/2,option/3]).   61:- autoload(library(pairs),
   62	    [pairs_values/2,map_list_to_pairs/3,group_pairs_by_key/2]).   63:- autoload(library(rdf),[load_rdf/2]).   64:- autoload(library(solution_sequences),[distinct/2]).   65:- autoload(library(thread),[concurrent/3]).   66:- autoload(library(uri),
   67	    [ uri_file_name/2,
   68	      uri_components/2,
   69	      uri_data/3,
   70	      uri_is_global/1,
   71	      uri_normalized/2
   72	    ]).   73:- autoload(library(http/http_open),[http_open/3]).   74:- autoload(library(semweb/turtle),[rdf_load_turtle/3]).   75
   76:- predicate_options(rdf_list_library/2, 2,
   77                     [ indent(atom),
   78                       show_graph(boolean),
   79                       show_source(boolean),
   80                       show_virtual(boolean)
   81                     ]).   82:- predicate_options(rdf_load_library/2, 2,
   83                     [ concurrent(positive_integer),
   84                       import(boolean),
   85                       load(boolean),
   86                       base_uri(atom),
   87                       claimed_source(atom),
   88                       not_found(oneof([error,warning,silent]))
   89                     ]).   90
   91/** <module> RDF Library Manager
   92
   93This module manages an ontology library. Such   a  library consists of a
   94directory with manifest files named =|Manifest.rdf|= or =|Manifest.ttl|=
   95(Turtle). The manifest files define ontologies  appearing in the library
   96as well as namespace mnemonics and dependencies.
   97
   98The typical usage scenario is
   99
  100==
  101?- rdf_attach_library('/some/directory').
  102?- rdf_load_library(my_ontology).
  103==
  104
  105@tbd    Add caching info
  106@tbd    Allow using Manifests on HTTP servers
  107@author Jan Wielemaker
  108*/
  109
  110:- rdf_register_ns(lib,  'http://www.swi-prolog.org/rdf/library/').  111:- rdf_register_ns(void, 'http://rdfs.org/ns/void#').  112:- rdf_register_ns(vann, 'http://purl.org/vocab/vann/').  113
  114:- dynamic
  115    manifest/2,                     % Path, Time
  116    library_db/3.                   % Name, URL, Facets
  117
  118%       Force compile-time namespace expansion
  119
  120:- rdf_meta
  121    edge(+, r,r,o).  122
  123                 /*******************************
  124                 *            LOADING           *
  125                 *******************************/
  126
  127%!  rdf_load_library(+Id) is det.
  128%!  rdf_load_library(+Id, +Options) is det.
  129%
  130%   Load ontologies from the  library.  A   library  must  first  be
  131%   attached using rdf_attach_library/1.  Defined Options are:
  132%
  133%           * import(Bool)
  134%           If =true= (default), also load ontologies that are
  135%           explicitely imported.
  136%
  137%           * base_uri(URI)
  138%           BaseURI used for loading RDF.  Local definitions in
  139%           ontologies overrule this option.
  140%
  141%           * claimed_source(URL)
  142%           URL from which we claim to have loaded the data.
  143%
  144%           * not_found(+Level)
  145%           The system does a pre-check for the existence of
  146%           all references RDF databases.  If Level is =error=
  147%           it reports missing databases as an error and fails.
  148%           If =warning= it prints them, but continues.  If
  149%           =silent=, no checks are preformed.  Default is =error=.
  150%
  151%           * concurrent(Threads)
  152%           Perform the load concurrently using N threads.  If not
  153%           specified, the number is determined by
  154%           guess_concurrency/2.
  155%
  156%           * load(+Bool)
  157%           If =false=, to all the preparation, but do not execute
  158%           the actual loading.  See also rdf_list_library/2.
  159
  160rdf_load_library(Id) :-
  161    rdf_load_library(Id, []).
  162
  163rdf_load_library(Id, Options) :-
  164    cleaned_load_commands(Id, Cmds, Options),
  165    (   option(concurrent(Threads), Options)
  166    ->  true
  167    ;   guess_concurrency(Cmds, Threads)
  168    ),
  169    length(Cmds, NSources),
  170    print_message(informational, rdf(loading(NSources, Threads))),
  171    (   option(load(true), Options, true)
  172    ->  concurrent(Threads, Cmds, [])
  173    ;   true
  174    ).
  175
  176%!  rdf_library_source(+Id, -Source) is nondet.
  177%
  178%   True of Source is the URL that is  part of the given library Id.
  179%   This predicate finds all indirect   dependencies.  It does _not_
  180%   check whether the source exists or is valid.
  181%
  182%   @see uri_file_name/2 for converting file:// URLs to a filename.
  183
  184rdf_library_source(Id, Source) :-
  185    cleaned_load_commands(Id, Cmds,
  186                          [ import(true),
  187                            not_found(silent)
  188                          ]),
  189    member(rdf_load(Source, _), Cmds).
  190
  191
  192cleaned_load_commands(Id, Cmds, Options) :-
  193    load_commands(Id, Options, Pairs),
  194    pairs_values(Pairs, Commands),
  195    list_to_set(Commands, Cmds2),
  196    delete_virtual(Cmds2, Cmds3),
  197    find_conflicts(Cmds3),
  198    check_existence(Cmds3, Cmds, Options).
  199
  200delete_virtual([], []).
  201delete_virtual([virtual(_)|T0], T) :-
  202    !,
  203    delete_virtual(T0, T).
  204delete_virtual([H|T0], [H|T]) :-
  205    delete_virtual(T0, T).
  206
  207
  208%!  find_conflicts(+LoadCommands) is semidet.
  209%
  210%   Find possibly conflicting options for loading the same source
  211
  212find_conflicts(Commands) :-
  213    no_source_with_different_options(Commands),
  214    no_sources_in_same_graph(Commands).
  215
  216%!  no_source_with_different_options(+Commands) is semidet.
  217%
  218%   True if there are not multiple calls to load the same graph, but
  219%   with  different  load-options.  Prints  a    warning  and  fails
  220%   otherwise.
  221
  222no_source_with_different_options(Commands) :-
  223    sort(Commands, Cmds),
  224    conflicts(Cmds, Conflicts),
  225    report_conflicts(Conflicts),
  226    Conflicts == [].
  227
  228conflicts([], []).
  229conflicts([C1, C2|T0], [C1-C2|T]) :-
  230    conflict(C1, C2),
  231    !,
  232    conflicts([C2|T0], T).
  233conflicts([_|T0], T) :-
  234    conflicts(T0, T).
  235
  236conflict(rdf_load(Src, Options1), rdf_load(Src, Options2)) :-
  237    sort(Options1, S1),
  238    sort(Options2, S2),
  239    S1 \== S2.
  240
  241report_conflicts([]).
  242report_conflicts([C1-C2|T]) :-
  243    print_message(warning, rdf(load_conflict(C1,C2))),
  244    report_conflicts(T).
  245
  246%!  no_sources_in_same_graph(+Commands) is semidet.
  247%
  248%   True if there are not two load   commands  referring to the same
  249%   graph.
  250
  251no_sources_in_same_graph(Commands) :-
  252    map_list_to_pairs(command_graph, Commands, Keyed),
  253    keysort(Keyed, KeySorted),
  254    group_pairs_by_key(KeySorted, SourcesByGraph),
  255    (   member(Graph-Sources, SourcesByGraph),
  256        Sources = [_,_|_]
  257    ->  forall(( member(Graph-Sources, SourcesByGraph),
  258                 Sources = [_,_|_]
  259               ),
  260               print_message(error,
  261                             rdf(multiple_source_for_graph(Graph, Sources)))),
  262        fail
  263    ;   true
  264    ).
  265
  266command_graph(rdf_load(_, Options), Graph) :-
  267    option(graph(Graph), Options),
  268    !.
  269command_graph(rdf_load(URL, _), URL) :- !.
  270command_graph(_, _).                    % Other command.  Each variable it its own key
  271
  272
  273%!  check_existence(+CommandsIn, -Commands, +Options) is det.
  274%
  275%   Report existence errors. Fail if at   least  one source does not
  276%   exist. and the not_found level is not =silent=.
  277%
  278%   @error existence_error(urls, ListOfUrls)
  279
  280check_existence(CommandsIn, Commands, Options) :-
  281    option(not_found(Level), Options, error),
  282    must_be(oneof([error,warning,silent]), Level),
  283    (   Level == silent
  284    ->  Commands = CommandsIn
  285    ;   missing_urls(CommandsIn, Commands, Missing),
  286        (   Missing == []
  287        ->  true
  288        ;   Level == warning
  289        ->  report_missing(Missing, Level)
  290        ;   existence_error(urls, Missing)
  291        )
  292    ).
  293
  294
  295missing_urls([], [], []).
  296missing_urls([H|T0], Cmds, Missing) :-
  297    H = rdf_load(URL, _),
  298    (   catch(exists_url(URL, _Ext), error(existence_error(_,_), _), fail)
  299    ->  Cmds = [H|T],
  300        missing_urls(T0, T, Missing)
  301    ;   Missing = [URL|T],
  302        missing_urls(T0, Cmds, T)
  303    ).
  304
  305report_missing([], _).
  306report_missing([H|T], Level) :-
  307    print_message(Level, error(existence_error(url, H), _)),
  308    report_missing(T, Level).
  309
  310%!  guess_concurrency(+Commands, -Threads) is det.
  311%
  312%   How much concurrency to use? Set to   the  number of CPUs if all
  313%   input comes from  files  or  5   if  network  based  loading  is
  314%   demanded.
  315
  316guess_concurrency(Commands, Threads) :-
  317    count_uris(Commands, FileURLs, OtherURLs),
  318    (   FileURLs > 0
  319    ->  (   current_prolog_flag(cpu_count, CPUs)
  320        ->  true
  321        ;   CPUs = 1
  322        ),
  323        FileThreads is min(FileURLs, CPUs)
  324    ;   FileThreads = 0
  325    ),
  326    (   OtherURLs > 0
  327    ->  OtherThreads is min(5, OtherURLs)
  328    ;   OtherThreads = 0
  329    ),
  330    Threads is FileThreads + OtherThreads.
  331
  332count_uris([], 0, 0).
  333count_uris([rdf_load(URL, _)|T], F, NF) :-
  334    count_uris(T, F0, NF0),
  335    (   web_url(URL)
  336    ->  NF is NF0 + 1,
  337        F = F0
  338    ;   F is F0 + 1,
  339        NF = NF0
  340    ).
  341
  342
  343%!  load_commands(+Id, +Options, -Pairs:list(Level-Command)) is det.
  344%
  345%   Commands are the RDF commands to execute for rdf_load_library/2.
  346%   Splitting  in  command  collection  and   execution  allows  for
  347%   concurrent execution as well  as   forward  checking of possible
  348%   problems.
  349%
  350%   @tbd    Fix poor style; avoid assert/retract.
  351
  352:- thread_local
  353    command/2.  354
  355load_commands(Id, Options, Commands) :-
  356    retractall(command(_,_)),
  357    rdf_update_library_index,
  358    dry_load(Id, 1, Options),
  359    findall(Level-Cmd, retract(command(Level, Cmd)), Commands).
  360
  361dry_load(Id, Level, Options) :-
  362    (   library(Id, File, Facets)
  363    ->  merge_base_uri(Facets, Options, Options1),
  364        merge_source(Facets, Options1, Options2),
  365        merge_blanks(Facets, Options2, Options3),
  366        merge_format(Facets, Options3, Options4),
  367        (   \+ memberchk(virtual, Facets)
  368        ->  load_options(Options4, File, RdfOptions),
  369            assert(command(Level, rdf_load(File, RdfOptions)))
  370        ;   assert(command(Level, virtual(File)))
  371        ),
  372        (   option(import(true), Options, true)
  373        ->  Level1 is Level + 1,
  374            forall(member(imports(Type, Import), Facets),
  375                   import(Import, Level1, [type(Type)|Options4]))
  376        ;   true
  377        )
  378    ;   existence_error(ontology, Id)
  379    ).
  380
  381merge_base_uri(Facets, Options0, Options) :-
  382    (   option(base_uri(Base), Facets)
  383    ->  exclude(name_option(base_uri), Options0, Options1),
  384        Options = [base_uri(Base)|Options1]
  385    ;   Options = Options0
  386    ).
  387
  388merge_source(Facets, Options0, Options) :-
  389    (   option(claimed_source(Base), Facets)
  390    ->  exclude(name_option(claimed_source), Options0, Options1),
  391        Options = [claimed_source(Base)|Options1]
  392    ;   Options = Options0
  393    ).
  394
  395merge_blanks(Facets, Options0, Options) :-
  396    (   option(blank_nodes(Share), Facets)
  397    ->  exclude(name_option(blank_nodes), Options0, Options1),
  398        Options = [blank_nodes(Share)|Options1]
  399    ;   Options = Options0
  400    ).
  401
  402merge_format(Facets, Options0, Options) :-
  403    (   option(format(Format), Facets)
  404    ->  exclude(name_option(format), Options0, Options1),
  405        Options = [format(Format)|Options1]
  406    ;   Options = Options0
  407    ).
  408
  409name_option(Name, Term) :-
  410    functor(Term, Name, 1).
  411
  412load_options(Options, File, RDFOptions) :-
  413    findall(O, load_option(Options, File, O), RDFOptions).
  414
  415load_option(Options, File, graph(Source)) :-
  416    option(claimed_source(Source0), Options),
  417    (   sub_atom(Source0, _, _, 0, /)
  418    ->  file_base_name(File, Base),
  419        atom_concat(Source0, Base, Source)
  420    ;   atom_concat(Source, #, Source0)
  421    ->  true
  422    ).
  423load_option(Options, File, base_uri(BaseURI)) :-
  424    option(base_uri(Base0), Options),
  425    sub_atom(/, _, _, 0, Base0),
  426    atom_concat(Base0, File, BaseURI).
  427load_option(Options, _File, blank_nodes(Share)) :-
  428    option(blank_nodes(Share), Options).
  429load_option(Options, _File, format(Format)) :-
  430    option(format(Format), Options).
  431
  432%!  import(+URL, +Level, +Options) is det.
  433
  434import(Path, Level, Options) :-
  435    option(type(data_dump), Options),
  436    !,
  437    load_options(Options, Path, RdfOptions),
  438    assert(command(Level, rdf_load(Path, RdfOptions))).
  439import(Path, Level, Options) :-
  440    (   (   library(Id, Path, _)
  441        ->  true
  442        ;   manifest_for_path(Path, Manifest),
  443            catch(exists_url(Manifest, _Ext), _, fail)
  444        ->  process_manifest(Manifest),
  445            library(Id, Path, _)
  446        )
  447    ->  dry_load(Id, Level, Options)
  448    ;   load_options(Options, Path, RdfOptions),
  449        assert(command(Level, rdf_load(Path, RdfOptions)))
  450    ).
  451
  452manifest_for_path(URL, Manifest) :-
  453    file_directory_name(URL, Parent),
  454    manifest_file(Base),
  455    rdf_extension(Ext),
  456    atomic_list_concat([Parent, /, Base, '.', Ext], Manifest).
  457
  458%!  rdf_list_library(+Id) is det.
  459%!  rdf_list_library(+Id, +Options) is det.
  460%
  461%   Print library dependency tree to the terminal.  Options include
  462%   options for rdf_load_library/2 and
  463%
  464%           * show_source(+Boolean)
  465%           If =true= (default), show location we are loading
  466%
  467%           * show_graph(+Boolean)
  468%           If =true= (default =false=), show name of graph
  469%
  470%           * show_virtual(+Boolean)
  471%           If =false= (default =true=), do not show virtual
  472%           repositories.
  473%
  474%           * indent(Atom)
  475%           Atom repeated for indentation levels
  476
  477rdf_list_library(Id) :-
  478    rdf_list_library(Id, []).
  479rdf_list_library(Id, Options) :-
  480    load_commands(Id, Options, Commands),
  481    maplist(print_load(Options), Commands).
  482
  483print_load(Options, _Level-virtual(_)) :-
  484    option(show_virtual(false), Options),
  485    !.
  486print_load(Options, Level-Command) :-
  487    option(indent(Indent), Options, '. '),
  488    forall(between(2, Level, _), format(Indent)),
  489    print_command(Command, Options),
  490    format('~N').
  491
  492print_command(virtual(URL), _Options) :-
  493    format('<~w>', [URL]).
  494print_command(rdf_load(URL), Options) :-
  495    print_command(rdf_load(URL, []), Options).
  496print_command(rdf_load(URL, RDFOptions), Options) :-
  497    (   option(show_source(true), Options, true)
  498    ->  format('~w', [URL]),
  499        (   option(blank_nodes(noshare), RDFOptions)
  500        ->  format(' <not shared>')
  501        ;   true
  502        ),
  503        (   exists_url(URL, Ext)
  504        ->  (   Ext == ''
  505            ->  true
  506            ;   format('[.~w]', [Ext])
  507            )
  508        ;   format(' [NOT FOUND]')
  509        )
  510    ;   true
  511    ),
  512    (   option(show_graph(true), Options, false),
  513        option(graph(Base), RDFOptions)
  514    ->  format('~N\tSource: ~w', [Base])
  515    ;   true
  516    ).
  517
  518exists_url(URL, Ext) :-
  519    uri_file_name(URL, Path),
  520    !,
  521    add_storage_extension(Path, Ext, PathEx),
  522    access_file(PathEx, read),
  523    !.
  524exists_url(URL, Ext) :-
  525    uri_components(URL, Components),
  526    uri_data(scheme, Components, Scheme),
  527    atom(Scheme),
  528    url_scheme(Scheme),
  529    add_storage_extension(URL, Ext, URLEx),
  530    catch(http_open(URLEx, Stream, [ method(head) ]), _, fail),
  531    !,
  532    close(Stream).
  533
  534:- multifile
  535    rdf_db:rdf_storage_encoding/2.  536
  537add_storage_extension(File, '', File).
  538add_storage_extension(File, Ext, FileEx) :-
  539    rdf_db:rdf_storage_encoding(Ext, _Format),
  540    \+ file_name_extension(_, Ext, File),
  541    file_name_extension(File, Ext, FileEx).
  542
  543url_scheme(http).
  544url_scheme(https).
  545
  546
  547%!  rdf_list_library
  548%
  549%   Prints known RDF library identifiers to current output.
  550
  551rdf_list_library :-
  552    rdf_update_library_index,
  553    (   rdf_library_index(Id, title(TitleLiteral)),
  554        plain_string(TitleLiteral, Title),
  555        format('~w ~t~20|~w', [Id, Title]),
  556        (   rdf_library_index(Id, version(Version))
  557        ->  format(' (version ~w)', [Version])
  558        ;   true
  559        ),
  560        nl,
  561        fail
  562    ;   true
  563    ).
  564
  565plain_string(String, String) :-
  566    atomic(String),
  567    !.
  568plain_string(lang(en, String), String) :- !.
  569plain_string(lang(_, String), String) :- !.
  570plain_string(type(_, String), String) :- !.
  571
  572%!  rdf_library_index(?Id, ?Facet) is nondet.
  573%
  574%   Query the content of the library.  Defined facets are:
  575%
  576%           * source(URL)
  577%           Location from which to load the ontology
  578%
  579%           * title(Atom)
  580%           Title used for the ontology
  581%
  582%           * comment(Atom)
  583%           Additional comments for the ontology
  584%
  585%           * version(Atom)
  586%           Version information on the ontology
  587%
  588%           * imports(Type, URL)
  589%           URLs needed by this ontology. May succeed multiple
  590%           times.  Type is one of =ontology=, =schema= or =instances=.
  591%
  592%           * base_uri(BaseURI)
  593%           Base URI to use when loading documents. If BaseURI
  594%           ends in =|/|=, the actual filename is attached.
  595%
  596%           * claimed_source(Source)
  597%           URL from which we claim to have loaded the RDF. If
  598%           Source ends in =|/|=, the actual filename is
  599%           attached.
  600%
  601%           * blank_nodes(Share)
  602%           Defines how equivalent blank nodes are handled, where
  603%           Share is one of =share= or =noshare=.  Default is to
  604%           share.
  605%
  606%           * format(Format)
  607%           Format of the resource.  Can be used to overrule
  608%           if the format as derived from the HTTP content type
  609%           is wrong.
  610%
  611%           * provides_ns(URL)
  612%           Ontology provides definitions in the namespace URL.
  613%           The formal definition of this is troublesome, but in
  614%           practice it means the ontology has triples whose
  615%           subjects are in the given namespace.
  616%
  617%           * uses_ns(URL)
  618%           The ontology depends on the given namespace.  Normally
  619%           means it contains triples that have predicates or
  620%           objects in the given namespace.
  621%
  622%           * manifest(URL)
  623%           URL of the manifest in which this ontology is defined.
  624%
  625%           * virtual
  626%           Entry is virtual (cannot be loaded)
  627
  628rdf_library_index(Id, Facet) :-
  629    library(Id, Path, Facets),
  630    (   Facet = source(Path)
  631    ;   member(Facet, Facets)
  632    ).
  633
  634
  635                 /*******************************
  636                 *      MANIFEST PROCESSING     *
  637                 *******************************/
  638
  639%!  rdf_attach_library(+Source)
  640%
  641%   Attach manifest from Source.  Source is one of
  642%
  643%           * URL
  644%           Load single manifest from this URL
  645%           * File
  646%           Load single manifest from this file
  647%           * Directory
  648%           Scan all subdirectories and load all =|Manifest.ttl|= or
  649%           =|Manifest.rdf|= found.  If Directory is a path-alias
  650%           (e.g., ontology(.)), _all_ referenced directories are
  651%           scanned for manifest files.
  652%
  653%   Encountered namespaces are registered   using rdf_register_ns/2.
  654%   Encountered ontologies are added to the index. If a manifest was
  655%   already loaded it will be reloaded  if the modification time has
  656%   changed.
  657
  658rdf_attach_library(URL) :-
  659    atom(URL),
  660    uri_is_global(URL),
  661    \+ is_absolute_file_name(URL),   % avoid interpreting C: as a schema
  662    !,
  663    process_manifest(URL).
  664rdf_attach_library(File) :-
  665    absolute_file_name(File, Path,
  666                       [ extensions([rdf,ttl]),
  667                         access(read),
  668                         file_errors(fail)
  669                       ]),
  670    !,
  671    process_manifest(Path).
  672rdf_attach_library(Dir) :-
  673    forall(absolute_file_name(Dir, Path,
  674                              [ file_type(directory),
  675                                access(read),
  676                                solutions(all)
  677                              ]),
  678           attach_dir(Path, [])).
  679
  680
  681%!  rdf_update_library_index
  682%
  683%   Reload all Manifest files.
  684
  685rdf_update_library_index :-
  686    forall(manifest(Location, _Time),
  687           process_manifest(Location)).
  688
  689attach_dir(Path, Visited) :-
  690    memberchk(Path, Visited),
  691    !.
  692attach_dir(Path, Visited) :-
  693    atom_concat(Path, '/*', Pattern),
  694    expand_file_name(Pattern, Members),
  695    (   manifest_file(MBase),
  696        rdf_extension(Ext),
  697        atomic_list_concat([Path, /, MBase, '.', Ext], Manifest),
  698        exists_file(Manifest)
  699    ->  process_manifest(Manifest)
  700    ;   print_message(silent, rdf(no_manifest(Path)))
  701    ),
  702    (   member(Dir, Members),
  703        exists_directory(Dir),
  704        file_base_name(Dir, Base),
  705        \+ hidden_base(Base),
  706        attach_dir(Dir, [Path|Visited]),
  707        fail ; true
  708    ).
  709
  710hidden_base('CVS').
  711hidden_base('cvs').                     % Windows
  712
  713%!  process_manifest(+Location) is det.
  714%
  715%   Process a manifest file, registering  encountered namespaces and
  716%   creating clauses for library/3. No op if manifest was loaded and
  717%   not changed. Removes old data if the manifest was changed.
  718%
  719%   @param  Location is either a path name or a URL.
  720
  721process_manifest(Source) :-
  722    (   web_url(Source)
  723    ->  uri_normalized(Source, Manifest)
  724    ;   uri_file_name(Source, Manifest0)
  725    ->  absolute_file_name(Manifest0, ManifestFile),
  726        uri_file_name(Manifest, ManifestFile)
  727    ;   absolute_file_name(Source, ManifestFile),
  728        uri_file_name(Manifest, ManifestFile)
  729    ),                              % Manifest is a canonical URI
  730    source_time(Manifest, MT),
  731    (   manifest(Manifest, Time),
  732        (   MT =< Time
  733        ->  !
  734        ;   retractall(manifest(Manifest, Time)),
  735            library_db(Id, URL, Facets),
  736            memberchk(manifest(Manifest), Facets),
  737            retractall(library_db(Id, URL, Facets)),
  738            fail
  739        )
  740    ;   read_triples(Manifest, Triples),
  741        process_triples(Manifest, Triples),
  742        print_message(informational, rdf(manifest(loaded, Manifest))),
  743        assert(manifest(Manifest, MT))
  744    ).
  745
  746process_triples(Manifest, Triples) :-
  747    findall(ns(Mnemonic, NameSpace),
  748            extract_namespace(Triples, Mnemonic, NameSpace),
  749            NameSpaces),
  750    findall(Ontology,
  751            extract_ontology(Triples, Ontology),
  752            Ontologies),
  753    maplist(define_namespace, NameSpaces),
  754    maplist(assert_ontology(Manifest), Ontologies).
  755
  756%!  extract_namespace(+Triples, -Mnemonic, -NameSpace)
  757%
  758%   True if Mnemonic is an abbreviation of NameSpace.
  759
  760extract_namespace(Triples, Mnemonic, Namespace) :-
  761    edge(Triples, Decl, lib:mnemonic, literal(Mnemonic)),
  762    edge(Triples, Decl, lib:namespace, Namespace).
  763extract_namespace(Triples, Mnemonic, Namespace) :-
  764    edge(Triples, Decl, vann:preferredNamespacePrefix, literal(Mnemonic)),
  765    edge(Triples, Decl, vann:preferredNamespaceUri, literal(Namespace)).
  766
  767%!  extract_ontology(+Triples, -Ontology) is nondet.
  768%
  769%   Extract definition of an ontology
  770
  771extract_ontology(Triples, library(Name, URL, Options)) :-
  772    distinct(URL, ontology(Triples, URL)),
  773    file_base_name(URL, BaseName),
  774    file_name_extension(Name, _, BaseName),
  775    findall(Facet, facet(Triples, URL, Facet), Options0),
  776    sort(Options0, Options1),
  777    keep_specialized_facets(Options1, Options).
  778
  779ontology(Triples, URL) :-
  780    edge(Triples, URL, rdf:type, Type),
  781    ontology_type(Type).
  782
  783keep_specialized_facets(All, Special) :-
  784    exclude(more_general(All), All, Special).
  785
  786more_general(All, Facet) :-
  787    generalized(Facet, Special),
  788    memberchk(Special, All).
  789
  790generalized(imports(ontology, Path), imports(Other, Path)) :-
  791    dif(Other, ontology).
  792
  793ontology_type(X) :-
  794    (   rdf_equal(X, lib:'Ontology')
  795    ;   rdf_equal(X, lib:'Schema')
  796    ;   rdf_equal(X, lib:'Instances')
  797    ;   rdf_equal(X, void:'Dataset')
  798    ;   rdf_equal(X, void:'Linkset')
  799    ).
  800
  801%!  facet(+Triples, +File, -Facet) is nondet.
  802%
  803%   Enumerate facets about File from   Triples. Facets are described
  804%   with rdf_library_index/2.
  805
  806facet(Triples, File, title(Title)) :-
  807    edge(Triples, File, dcterms:title, literal(Title)).
  808facet(Triples, File, version(Version)) :-
  809    edge(Triples, File, owl:versionInfo, literal(Version)).
  810facet(Triples, File, comment(Comment)) :-
  811    edge(Triples, File, rdfs:comment, literal(Comment)).
  812facet(Triples, File, base_uri(BaseURI)) :-
  813    edge(Triples, File, lib:baseURI, BaseURI).
  814facet(Triples, File, claimed_source(Source)) :-
  815    edge(Triples, File, lib:source, Source).
  816facet(Triples, File, format(Format)) :-
  817    edge(Triples, File, lib:format, literal(Format)).
  818facet(Triples, File, blank_nodes(Mode)) :-
  819    edge(Triples, File, lib:blankNodes, literal(Mode)),
  820    must_be(oneof([share,noshare]), Mode).
  821facet(Triples, File, imports(ontology, Path)) :-
  822    edge(Triples, File, owl:imports, Path).
  823facet(Triples, File, imports(schema, Path)) :-
  824    edge(Triples, File, lib:schema, Path).
  825facet(Triples, File, imports(instances, Path)) :-
  826    edge(Triples, File, lib:instances, Path).
  827facet(Triples, File, imports(subset, Path)) :-
  828    edge(Triples, File, void:subset, Path).
  829facet(Triples, File, imports(data_dump, Path)) :-
  830    edge(Triples, File, void:dataDump, Path).
  831facet(Triples, File, provides_ns(NS)) :-
  832    edge(Triples, File, lib:providesNamespace, NSDecl),
  833    edge(Triples, NSDecl, lib:namespace, NS).
  834facet(Triples, File, uses_ns(NS)) :-
  835    edge(Triples, File, lib:usesNamespace, NSDecl),
  836    edge(Triples, NSDecl, lib:namespace, NS).
  837facet(Triples, File, virtual) :-
  838    (   edge(Triples, File, rdf:type, lib:'Virtual')
  839    ;   edge(Triples, File, rdf:type, void:'Dataset')
  840    ;   edge(Triples, File, rdf:type, void:'Linkset')
  841    ) -> true.
  842
  843%!  edge(+Triples, ?S, ?P, ?O) is nondet.
  844%
  845%   Like rdf_has/3 over a list of Triples.
  846
  847edge(Triples, S, P, O) :-
  848    nonvar(P),
  849    !,
  850    sub_p(SubP, P),
  851    member(rdf(S,SubP,O), Triples).
  852edge(Triples, S, P, O) :-
  853    member(rdf(S,SubP,O), Triples),
  854    sub_p(SubP, P).
  855
  856sub_p(P, P).
  857sub_p(Sub, P) :-
  858    (   nonvar(Sub)
  859    ->  sub_property_of(Sub, Sub1),
  860        sub_p(Sub1, P)
  861    ;   sub_property_of(Sub1, P),
  862        sub_p(Sub, Sub1)
  863    ).
  864
  865:- rdf_meta
  866    sub_property_of(r,r).  867
  868sub_property_of(void:subset,         owl:imports).
  869sub_property_of(dcterms:description, rdfs:comment).
  870sub_property_of(void:dataDump,       owl:imports).
  871sub_property_of(dc:title,            dcterms:title).
  872
  873%!  source_time(+Source, -Modified) is semidet.
  874%
  875%   Modified is the last modification time of Source.
  876%
  877%   @error  existence_error(Type, Source).
  878
  879source_time(URL, Modified) :-
  880    web_url(URL),
  881    !,
  882    http_open(URL, Stream,
  883              [ header(last_modified, Date),
  884                method(head)
  885              ]),
  886    close(Stream),
  887    Date \== '',
  888    parse_time(Date, Modified).
  889source_time(URL, Modified) :-
  890    uri_file_name(URL, File),
  891    !,
  892    time_file(File, Modified).
  893source_time(File, Modified) :-
  894    time_file(File, Modified).
  895
  896web_url(URL) :-
  897    sub_atom(URL, 0, _, _, 'http://').
  898
  899
  900%!  read_triples(+URL, -Triples) is det.
  901%
  902%   Read RDF/XML or Turtle file into a list of triples.
  903
  904read_triples(FileURL, Triples) :-
  905    uri_file_name(FileURL, File),
  906    !,
  907    (   file_name_extension(_, rdf, File)
  908    ->  load_rdf(File, Triples)
  909    ;   rdf_load_turtle(File, Triples, [])
  910    ).
  911read_triples(HTTPURL, Triples) :-
  912    file_name_extension(_, Ext, HTTPURL),
  913    setup_call_cleanup(
  914        http_open(HTTPURL, In, []),
  915        stream_triples(In, Ext, Triples),
  916        close(In)).
  917
  918stream_triples(Stream, rdf, Triples) :-
  919    load_rdf(stream(Stream), Triples).
  920stream_triples(Stream, ttl, Triples) :-
  921    rdf_load_turtle(stream(Stream), Triples, []).
  922
  923
  924manifest_file('void').                  % make order optional?
  925manifest_file('Manifest').
  926manifest_file('manifest').
  927
  928rdf_extension(ttl).
  929rdf_extension(rdf).
  930
  931
  932%!  assert_ontology(+Manifest, +Term:library(Name, File, Facets)) is det.
  933%
  934%   Add ontology to our library.
  935%
  936%   @tbd    Proper behaviour of re-definition?
  937
  938assert_ontology(Manifest, Term) :-
  939    Term = library(Name, URL, Facets),
  940    (   library(Name, _URL2, Facets2)
  941    ->  memberchk(manifest(Manifest2), Facets2),
  942        print_message(warning, rdf(redefined(Manifest, Name, Manifest2)))
  943    ;   true
  944    ),
  945    assert(library_db(Name, URL,
  946                   [ manifest(Manifest)
  947                   | Facets
  948                   ])).
  949
  950
  951%!  library(?Id, ?URL, ?Facets)
  952%
  953%   Access DB for library information.
  954
  955library(Id, URL, Facets) :-
  956    nonvar(URL),
  957    normalize_url(URL, CanonicalURL),
  958    library_db(Id, CanonicalURL, Facets).
  959library(Id, URL, Facets) :-
  960    library_db(Id, URL, Facets).
  961
  962%!  normalize_url(+URL, -Normalized)
  963%
  964%   Like uri_normalized/2, but we  also   need  (platform dependent)
  965%   filename canonization.
  966
  967normalize_url(URL, CanonicalURL) :-
  968    uri_file_name(URL, File),
  969    !,
  970    absolute_file_name(File, CanFile),
  971    uri_file_name(CanonicalURL, CanFile).
  972normalize_url(URL, CanonicalURL) :-
  973    uri_normalized(URL, CanonicalURL).
  974
  975%!  define_namespace(NS:ns(Mnemonic, Namespace)) is det.
  976%
  977%   Add namespace declaration for Mnemonic.
  978
  979define_namespace(ns(Mnemonic, Namespace)) :-
  980    debug(rdf_library, 'Adding NS ~w = ~q', [Mnemonic, Namespace]),
  981    rdf_register_ns(Mnemonic, Namespace,
  982                    [
  983                        ]).
  984
  985%!  rdf_current_manifest(-URL) is nondet.
  986%
  987%   True if URL is the URL of a currently loaded manifest file.
  988
  989rdf_current_manifest(URL) :-
  990    manifest(URL, _Time).
  991
  992
  993
  994                 /*******************************
  995                 *            MESSAGES          *
  996                 *******************************/
  997
  998:- multifile
  999    prolog:message/3. 1000
 1001prolog:message(rdf(no_manifest(Path))) -->
 1002    [ 'Directory ~w has no Manifest.{ttl,rdf} file'-[Path] ].
 1003prolog:message(rdf(redefined(Manifest, Name, Manifest2))) -->
 1004    [ '~w: Ontology ~w already defined in ~w'-
 1005      [Manifest, Name, Manifest2]
 1006    ].
 1007prolog:message(rdf(manifest(loaded, Manifest))) -->
 1008    [ 'Loaded RDF manifest ~w'-[Manifest]
 1009    ].
 1010prolog:message(rdf(load_conflict(C1, C2))) -->
 1011    [ 'Conflicting loads: ~p <-> ~p'-[C1, C2] ].
 1012prolog:message(rdf(multiple_source_for_graph(Graph, Sources))) -->
 1013    [ 'Multiple sources for graph ~p:'-[Graph] ],
 1014    sources(Sources).
 1015prolog:message(rdf(loading(Files, Threads))) -->
 1016    [ 'Loading ~D files using ~D threads ...'-[Files, Threads] ].
 1017
 1018sources([]) --> [].
 1019sources([rdf_load(From, _Options)|T]) -->
 1020    [ nl, '\t~p'-[From] ],
 1021    sources(T)