1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2017, VU University Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(hdt,
   36	  [ hdt_open/2,			% -HDT, +Path
   37	    hdt_open/3,			% -HDT, +Path, +Options
   38	    hdt_close/1,		% +HDT
   39	    hdt_search/4,		% +HDT, ?S,?P,?O
   40	    hdt_header/4,		% +HDT, ?S,?P,?O
   41
   42	    hdt_subject/2,		% +HDT, ?Subject
   43	    hdt_predicate/2,		% +HDT, ?Predicate
   44	    hdt_shared/2,		% +HDT, ?Shared
   45	    hdt_object/2,		% +HDT, ?Object
   46	    hdt_node/2,     % +HDT, ?Node
   47
   48	    hdt_suggestions/5,		% +HDT, +Base, +Role, +MaxCount, -List
   49	    hdt_property/2,		% +HTD, -Property
   50
   51	    hdt_subject_id/3,		% +HDT, ?Subject, ?Id
   52	    hdt_predicate_id/3,		% +HDT, ?Predicate, ?Id
   53	    hdt_object_id/3,		% +HDT, ?Object, ?Id
   54	    hdt_pre_triple/3,		% +HDT, ?StringTriple, -IdTriple
   55	    hdt_post_triple/3,		% +HDT, ?StringTriple, +IdTriple
   56	    hdt_search_id/4,		% +HDT, ?S,?P,?O
   57	    hdt_search_cost/5,		% +HDT, ?S,?P,?O, -Cost
   58
   59	    hdt_create_from_file/3,	% +HDTFile, +RDFFile, +Options
   60
   61	    op(110, xfx, @),		% must be above .
   62	    op(650, xfx, ^^)		% must be above :
   63	  ]).   64:- use_module(library(semweb/rdf11)).   65:- use_module(library(sgml)).   66:- use_module(library(lists)).   67
   68:- use_foreign_library(foreign(hdt4pl)).   69
   70/** <module> Access HDT (Header Dictionary Triples) files
   71*/
   72
   73:- rdf_meta
   74	hdt_search(+,r,r,o),	hdt_subject(+,r),	hdt_predicate(+,r),	hdt_shared(+,r),	hdt_object(+,o),	hdt_subject_id(+, r, ?),	hdt_node(+, o),	hdt_predicate_id(+, r, ?),	hdt_object_id(+, o, ?),	hdt_search_cost(+, r, r, o, -).
 hdt_open(-HDT, +File) is det
 hdt_open(-HDT, +File, +Options) is det
Open an existing HDT file and unify HDT with a handle to it. The handle is an opaque symbol that is subject to (atom) garbage collection.

File is expanded by absolute_file_name/3, with the default extension `.hdt`.

Options:

access(+Access)
How the file is accessed. One of map (map the file into memory, default) or load (load the content of the file).
indexed(+Boolean)
Whether an index is created. Default is true. Such an index is needed for partially instantiated calls to hdt_search/4. The index is maintained in a file with extension `.index` in the same directory as the HDT file. An index is not needed if you only want to extract all triples.
  108hdt_open(HDT, File) :-
  109	hdt_open(HDT, File, []).
  110
  111hdt_open(HDT, File, Options) :-
  112	absolute_file_name(File, FileAbs, [extensions([hdt]), expand(true), access(read)]),
  113	hdt_open_(HDT, FileAbs, Options).
 hdt_search(+HDT, ?S, ?P, ?O)
True if <S,P,O> is a triple in HDT.
  119hdt_search(HDT, S, P, O) :-
  120	pre_object(HDT, O, OHDT),
  121	hdt_search(HDT, content, S, P, OHDT),
  122	post_object(O, OHDT).
 hdt_header(+HDT, ?S, ?P, ?O)
True if <S,P,O> is a triple in the header of HDT.
  128hdt_header(HDT, S, P, O) :-
  129	hdt_search(HDT, header, S, P, O0),
  130	header_object(O0, O).
  131
  132header_object(O0, O) :-
  133	string(O0), !,
  134	header_untyped_object(O0, O).
  135header_object(O, O).
  136
  137header_untyped_object(O0, O) :-
  138	catch(xsd_number_string(N, O0),
  139	      error(syntax_error(xsd_number), _),
  140	      fail), !,
  141	(   integer(N)
  142	->  rdf_equal(O, N^^xsd:integer)
  143	;   rdf_equal(O, N^^xsd:float)
  144	).
  145header_untyped_object(O0, O) :-
  146	catch(xsd_time_string(Term, Type, O0),
  147	      error(_,_), fail), !,
  148	O = Term^^Type.
  149header_untyped_object(S, O) :-
  150	rdf_equal(O, S^^xsd:string).
 hdt_subject(+HDT, ?S) is nondet
 hdt_predicate(+HDT, ?P) is nondet
 hdt_object(+HDT, ?O) is nondet
 hdt_shared(+HDT, ?SO) is nondet
 hdt_node(+HDT, ?Node) is nondet
Enumerate possible values for the individual components of the triples represented in the HDT. Note that these enumarators do not enumerate blank nodes. The predicate hdt_shared/2 enumerates resources that exist in the dataset both as subject and object. If the second argument is instantiated hdt_search/4 is used to perform an indexed search and the predicates are semidet.
  166hdt_subject(HDT, Subject) :-
  167	(   var(Subject)
  168	->  (   hdt_column_(HDT, shared, Var)
  169	    ;	hdt_column_(HDT, subject, Var)
  170	    ),
  171	    Var = Subject
  172	;   hdt_search(HDT, Subject, _, _)
  173	->  true
  174	).
  175
  176hdt_predicate(HDT, Predicate) :-
  177	(   var(Predicate)
  178	->  hdt_column_(HDT, predicate, Var),
  179	    Var = Predicate
  180	;   hdt_search(HDT, _, Predicate, _)
  181	->  true
  182	).
  183
  184hdt_shared(HDT, Shared) :-
  185	(   var(Shared)
  186	->  hdt_column_(HDT, shared, Var),
  187	    Var = Shared
  188	;   rdf_is_subject(Shared),
  189	    hdt_subject(HDT, Shared),
  190	    hdt_object(HDT, Shared)
  191	->  true
  192	).
  193
  194hdt_object(HDT, Object) :-
  195	(   var(Object)
  196	->  (   hdt_column_(HDT, shared, Var),
  197	        Var = Object
  198	    ;	hdt_object_(HDT, OHDT),
  199		post_object(Object, OHDT)
  200	    )
  201	;   hdt_search(HDT, _, _, Object)
  202	->  true
  203	).
  204
  205hdt_node(HDT, Node) :-
  206	(   var(Node)
  207	->  (   hdt_column_(HDT, shared, Var),
  208	        Var = Node
  209	    ;   hdt_column_(HDT, subject, Var),
  210	        Var = Node
  211	    ;   hdt_object_(HDT, OHDT),
  212	        post_object(Node, OHDT)
  213	    )
  214	;   hdt_search(HDT, Node, _, _)
  215	->  true
  216	;   hdt_search(HDT, _, _, Node)
  217	->  true
  218	).
 pre_object(+HDT, ?O, -OHDT) is det
 post_object(?O, +OHDT) is det
Pre/post object processing. The HDT library itself is purely string based.
  227pre_object(_HDT, O, OHDT) :-
  228	atom(O), \+ boolean(O), !,
  229	OHDT = O.
  230pre_object(_HDT, O, OHDT) :-
  231	ground(O), !,
  232	rdf_lexical_form(O, Lexical),
  233	canonical_string(Lexical, OHDT).
  234pre_object(HDT, O, OHDT) :-
  235	nonvar(O),
  236	O = String@Lang,
  237	ground(String),
  238	atomics_to_string(["\"", String, "\"@"], Prefix),
  239	hdt_suggestions(HDT, Prefix, object, 1000, List),
  240	length(List, Found),
  241	Found < 1000, !,		% we got them all
  242	member(_@Lang, List),
  243	canonical_string(String@Lang, OHDT).
  244pre_object(_, _, _).
  245
  246canonical_string(Lexical^^Type, HDT) :-
  247	atomics_to_string(["\"", Lexical, "\"^^<", Type, ">"], HDT).
  248canonical_string(Lexical@Lang, HDT) :-
  249	atomics_to_string(["\"", Lexical, "\"@", Lang], HDT).
  250
  251boolean(false).
  252boolean(true).
 post_object(?PrologObj, ?HDTObjectString) is semidet
  256post_object(O, _HDT) :-
  257	ground(O), !.
  258post_object(O, IRI) :-
  259	atom(IRI), !,
  260	O = IRI.
  261post_object(O, HDT) :-
  262	rdf_canonical_literal(HDT, O).
 hdt_suggestions(+HDT, +Base, +Role, +MaxResults, -Results:list) is det
True when Results is a list of suggestions for Base in the triple role Role. Some experimentation suggests it performs a prefix match on the internal string representation. This implies that literals are only found if the first character of Base is `"`.
Arguments:
Base- is a string or atom
Role- is one of subject, predicate or object
 hdt_property(+HDT, ?Property) is nondet
True if Property is a property of HTD. Defined properties are
  292hdt_property(HDT, Property) :-
  293	hdt_property(Property),
  294	hdt_property_(HDT, Property).
  295
  296hdt_property(mapping(_)).
  297hdt_property(max_id(_)).
  298hdt_property(max_object_id(_)).
  299hdt_property(max_predicate_id(_)).
  300hdt_property(max_subject_id(_)).
  301hdt_property(objects(_)).
  302hdt_property(predicates(_)).
  303hdt_property(shared(_)).
  304hdt_property(subjects(_)).
  305hdt_property(elements(_)).
  306
  307
  308		 /*******************************
  309		 *	    IDENTIFIERS		*
  310		 *******************************/
 hdt_subject_id(+HDT, ?Subject:atom, ?Id:integer) is semidet
 hdt_predicate_id(+HDT, ?Predicate:atom, ?Id:integer) is semidet
 hdt_object_id(+HDT, ?Object:any, ?Id:integer) is semidet
True if String is mapped to Id in the given role. Fails if the requested String or Id is not known for the given role in HDT.
Arguments:
Role- is one of subject, predicate or object
  321hdt_subject_id(HDT, String, Id) :-
  322	hdt_string_id(HDT, subject, String, Id).
  323hdt_predicate_id(HDT, String, Id) :-
  324	hdt_string_id(HDT, predicate, String, Id).
  325hdt_object_id(HDT, Object, Id) :-
  326	pre_object(HDT, Object, String),
  327	hdt_string_id(HDT, object, String, Id),
  328	post_object(Object, String).
 hdt_pre_triple(+HDT, ?TripleIn, -TripleID) is det
 hdt_post_triple(+HDT, ?TripleIn, +TripleID) is det
Perform term->id and id->term translation for triples. The predicate hdt_search/4 could be defined as:
hdt_search(HDT, S, P, O) :-
    Triple   = t(S,P,O),
    TripleID = t(SID,PID,OID),
    hdt_pre_triple(HDT, Triple, TripleID),
    hdt_search_id(HDT,SID,PID,OID),
    hdt_post_triple(HDT, Triple, TripleID).
See also
- hdt_search_id/4.
  347hdt_pre_triple(HDT, t(S0,P0,O0), t(S,P,O)) :-
  348	pre_iri_id(HDT, subject, S0, S),
  349	pre_iri_id(HDT, predicate, P0, P),
  350	(   ground(O0)
  351	->  pre_object(HDT, O0, String),
  352	    hdt_string_id(HDT, object, String, O)
  353	;   true
  354	).
  355
  356hdt_post_triple(HDT, t(S0,P0,O0), t(S,P,O)) :-
  357	post_iri_id(HDT, subject, S0, S),
  358	post_iri_id(HDT, predicate, P0, P),
  359	(   ground(O0)
  360	->  true
  361	;   hdt_string_id(HDT, object, String, O),
  362	    post_object(O0, String)
  363	).
  364
  365pre_iri_id(_, _, In, _) :-
  366	var(In), !.
  367pre_iri_id(HDT, Role, In, Id) :-
  368	hdt_string_id(HDT, Role, In, Id).
  369
  370post_iri_id(_, _, S0, _) :-
  371	atom(S0), !.
  372post_iri_id(HDT, Role, In, Id) :-
  373	hdt_string_id(HDT, Role, In, Id).
 hdt_search_id(+HDT, ?S:integer, ?P:integer, ?O:integer) is nondet
True if a triple with the indicated identifiers exists.
 hdt_search_cost(HDT, ?S, ?P, ?O, -Cost:nonneg) is det
  382hdt_search_cost(HDT, S, P, O, Cost) :-
  383	Triple   = t(S,P,O),
  384	TripleID = t(SID,PID,OID),
  385	hdt_pre_triple(HDT, Triple, TripleID),
  386	hdt_search_cost_id(HDT, SID, PID, OID, Cost), !.
  387hdt_search_cost(_, _, _, _, 0).
  388
  389
  390		 /*******************************
  391		 *	       CREATE		*
  392		 *******************************/
 hdt_create_from_file(+HDTFile, +RDFFile, +Options)
Create a HDT file from an RDF file. RDFFile must be in ntriples format. Options:
base_uri(+URI)
URI is used for generating the header properties (see http_header/4.
  404		 /*******************************
  405		 *	      MESSAGES		*
  406		 *******************************/
  407
  408:- multifile prolog:error_message//1.  409
  410prolog:error_message(hdt_error(Message)) -->
  411	[ 'HDT: ~w'-[Message] ]