View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2014-2015, VU University Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(rdfa,
   36          [ read_rdfa/3,                % +Input, -RDF, +Options
   37            xml_rdfa/3                  % +XMLDom, -RDF, +Options
   38          ]).   39:- use_module(library(semweb/rdf_db)).   40:- use_module(library(http/http_open)).   41:- use_module(library(dcg/basics)).   42:- use_module(library(xpath)).   43:- use_module(library(apply)).   44:- use_module(library(sgml)).   45:- use_module(library(lists)).   46:- use_module(library(option)).   47:- use_module(library(debug)).   48:- use_module(library(uri)).   49:- use_module(library(error)).

Extract RDF from an HTML or XML DOM

This module implements extraction of RDFa triples from parsed XML or HTML documents. It has two interfaces: read_rdfa/3 to read triples from some input (stream, file, URL) and xml_rdfa/3 to extract triples from an HTML or XML document that is already parsed with load_html/3 or load_xml/3.

See also
- http://www.w3.org/TR/2013/REC-rdfa-core-20130822/
- http://www.w3.org/TR/html-rdfa/ */
   63:- rdf_register_prefix(rdfa, 'http://www.w3.org/ns/rdfa#').   64
   65:- rdf_meta
   66    add_triple(+, r, r, o),
   67    add_incomplete_triple(+, t).   68
   69:- discontiguous
   70    term_expansion/2.   71
   72:- predicate_options(xml_rdfa/3, 3,
   73                     [ base(atom),
   74                       anon_prefix(any),
   75                       lang(atom),
   76                       vocab(atom),
   77                       markup(atom)
   78                     ]).   79:- predicate_options(read_dom/3, 3,
   80                     [ pass_to(load_html/3, 3),
   81                       pass_to(load_xml/3, 3)
   82                     ]).   83:- predicate_options(read_rdfa/3, 3,
   84                     [ pass_to(read_dom/3, 3),
   85                       pass_to(xml_rdfa/3, 3),
   86                       pass_to(system:open/4, 4),
   87                       pass_to(http_open:http_open/3, 3)
   88                     ]).   89
   90
   91                 /*******************************
   92                 *          STREAM READING      *
   93                 *******************************/
 read_rdfa(+Input, -Triples, +Options) is det
True when Triples is a list of rdf(S,P,O) triples extracted from Input. Input is either a stream, a file name, a URL referencing a file name or a URL that is valid for http_open/3. Options are passed to open/4, http_open/3 and xml_rdfa/3. If no base is provided in Options, a base is deduced from Input.
  103read_rdfa(Input, Triples, Options) :-
  104    setup_call_cleanup(
  105        open_input(Input, In, NewOptions, Close, Options),
  106        read_dom(In, DOM, Options),
  107        close_input(Close)),
  108    merge_options(Options, NewOptions, RDFaOptions),
  109    xml_rdfa(DOM, Triples, RDFaOptions).
  110
  111open_input(Input, In, NewOptions, Close, Options) :-
  112    open_input2(Input, In, NewOptions, Close0, Options),
  113    detect_bom(In, Close0, Close).
  114
  115open_input2(stream(In), In, Options, true, _) :-
  116    !,
  117    (   stream_property(In, file_name(Name)),
  118        to_uri(Name, URI)
  119    ->  Options = [base(URI)]
  120    ;   Options = []
  121    ).
  122open_input2(In, In, Options, true, _) :-
  123    is_stream(In),
  124    !,
  125    (   stream_property(In, file_name(Name)),
  126        to_uri(Name, URI)
  127    ->  Options = [base(URI)]
  128    ;   Options = []
  129    ).
  130open_input2(URL, In, [base(URL)], close(In), Options) :-
  131    atom(URL),
  132    uri_file_name(URL, File),
  133    !,
  134    open(File, read, In, Options).
  135open_input2(URL, In, [base(Base)], close(In), Options) :-
  136    atom(URL),
  137    to_uri2(URL, Base),
  138    !,
  139    http_open(URL, In, Options).
  140open_input2(File, In, [base(URI)], close(In), Options) :-
  141    absolute_file_name(File, Path, [access(read)]),
  142    uri_file_name(URI, Path),
  143    open(Path, read, In, Options).
 detect_bom(+In, +Close0, -Close) is det
We may be loading a binary stream. In that case we want to do BOM detection.
  150detect_bom(In, Close0, Close) :-
  151    stream_property(In, type(binary)),
  152    stream_property(In, encoding(Enc)),
  153    catch(set_stream(In, encoding(bom)),_,fail),
  154    !,
  155    merge_close(Close0, set_stream(In, encoding(Enc)), Close).
  156detect_bom(_, Close, Close).
  157
  158merge_close(true, Close, Close) :- !.
  159merge_close(Close, _, Close).
  160
  161to_uri(URI0, URI) :-
  162    to_uri2(URI0, URI),
  163    !.
  164to_uri(URI0, URI) :-
  165    absolute_file_name(URI0, Path),
  166    uri_file_name(URI, Path).
  167
  168to_uri2(URI0, Base) :-
  169    uri_components(URI0, Components),
  170    uri_data(scheme, Components, Scheme),
  171    ground(Scheme),
  172    http_scheme(Scheme),
  173    !,
  174    uri_data(fragment, Components, _, Components2),
  175    uri_components(Base, Components2).
  176
  177http_scheme(http).
  178http_scheme(https).
  179
  180close_input(true).
  181close_input(close(X)) :- close(X).
  182close_input(set_stream(In, encoding(Enc))) :- set_stream(In, encoding(Enc)).
  183
  184read_dom(In, DOM, Options) :-
  185    option(dialect(Dialect), Options),
  186    !,
  187    (   xml_dialect(Dialect)
  188    ->  load_xml(stream(In), DOM, Options)
  189    ;   load_html(stream(In), DOM, Options)
  190    ).
  191read_dom(In, DOM, Options) :-
  192    peek_string(In, 1000, Start),
  193    guess_dialect(Start, Dialect),
  194    read_dom(In, DOM, [dialect(Dialect)|Options]).
  195
  196xml_dialect(xml).
  197xml_dialect(xmlns).
  198xml_dialect(svg).
  199xml_dialect(xhtml).
  200xml_dialect(xhtml5).
  201
  202guess_dialect(Start, Dialect) :-
  203    sub_string(Start, _, _, _, "<?xml"),
  204    !,
  205    Dialect = xml.
  206guess_dialect(Start, Dialect) :-
  207    sub_string(Start, _, _, _, "<html"),
  208    !,
  209    (   sub_string(Start, _, _, _, "xmlns:")
  210    ->  Dialect = xhtml
  211    ;   string_codes(Start, Codes),
  212        phrase(html_doctype(DialectFound), Codes, _)
  213    ->  Dialect = DialectFound
  214    ;   Dialect = html
  215    ).
  216guess_dialect(Start, Dialect) :-
  217    sub_string(Start, _, _, _, "<svg"),
  218    !,
  219    Dialect = svg.
  220guess_dialect(_, xml).
  221
  222html_doctype(html5) -->
  223    blanks,
  224    "<!DOCTYPE", blank, blanks, "html", blanks, ">",
  225    !.
  226html_doctype(html4) -->
  227    blanks,
  228    "<!", icase_string(`doctype`), blank, blanks, icase_string(`html`),
  229    blank, blanks,
  230    icase_string(`public`),
  231    blank,
  232    !.
  233
  234icase_string([]) --> [].
  235icase_string([H|T]) --> alpha_to_lower(H), icase_string(T).
  236
  237
  238                 /*******************************
  239                 *        DOM PROCESSING        *
  240                 *******************************/
 xml_rdfa(+DOM, -RDF, +Options)
True when RDF is a list of rdf(S,P,O) terms extracted from DOM according to the RDFa specification. Options processed:
base(+BaseURI)
URI to use for ''. Normally set to the document URI.
anon_prefix(+AnnonPrefix)
Prefix for blank nodes.
lang(+Lang)
Default for lang
vocab(+Vocab)
Default for vocab
markup(+Markup)
Markup language processed (xhtml, xml, ...)
  258xml_rdfa(DOM, _, _) :-
  259    var(DOM),
  260    !,
  261    instantiation_error(DOM).
  262xml_rdfa(DOM, RDF, Options) :-
  263    is_list(DOM),
  264    !,
  265    maplist(xml_rdfa_aux(Options), DOM, RDFList),
  266    append(RDFList, RDF).
  267xml_rdfa(DOM, RDF, Options) :-
  268    DOM = element(_,_,_),
  269    !,
  270    rdfa_evaluation_context(DOM, EvalContext, Options),
  271    process_node(DOM, EvalContext),
  272    arg(1, EvalContext.triples, List),
  273    reverse(List, RDF0),
  274    apply_patterns(RDF0, RDF).
  275% XML Processing Instruction (PI).
  276xml_rdfa(DOM, [], _) :-
  277    DOM = pi(_),
  278    !.
  279xml_rdfa(DOM, _, _) :-
  280    type_error(xml_dom, DOM).
  281
  282xml_rdfa_aux(Options, DOM, RDF) :-
  283    xml_rdfa(DOM, RDF, Options).
  284
  285process_node(DOM, EvalContext) :-
  286    rdfa_local_context(EvalContext, LocalContext),  % 7.5.1
  287    update_vocab(DOM, LocalContext),                % 7.5.2
  288    update_prefixes(DOM, LocalContext),             % 7.5.3
  289    update_lang(DOM, LocalContext),                 % 7.5.4
  290    update_subject(DOM, LocalContext),              % 7.5.5, 7.5.6
  291    emit_typeof(DOM, LocalContext),                 % 7.5.7
  292    update_list_mapping(DOM, LocalContext),         % 7.5.8
  293    step_7_5_9(DOM, LocalContext),                  % 7.5.9
  294    step_7_5_10(DOM, LocalContext),                 % 7.5.10
  295    update_property_value(DOM, LocalContext),       % 7.5.11
  296    complete_triples(LocalContext),                 % 7.5.12
  297    descent(DOM, LocalContext),                     % 7.5.13
  298    complete_lists(LocalContext),
  299    !.                % 7.5.14
  300process_node(DOM, EvalContext) :-
  301    print_message(warning, rdfa(failed(DOM, EvalContext))),
  302    (   debugging(rdfa(test))
  303    ->  gtrace,
  304        process_node(DOM, EvalContext)
  305    ;   true
  306    ).
 rdfa_evaluation_context(+DOM, -Context, +Options)
7.5.0: Create the initial evaluation context
To be done
- : derive markup from DOM
  315rdfa_evaluation_context(DOM, Context, Options) :-
  316    Context = rdfa_eval{base:Base,                  % atom
  317                        parent_subject:Base,        % atom
  318                        parent_object:null,         % null or atom
  319                        incomplete_triples:[],      % list
  320                        list_mapping:ListMapping,   % IRI --> list(List)
  321                        lang:Lang,                  % null or atom
  322                        iri_mapping:IRIMappings,    % dict
  323                        term_mapping:TermMappings,  % dict
  324                        vocab:Vocab,                % null or atom
  325                        bnode_id:bnode(1),          % integer
  326                        markup:Markup,              % Processing profile
  327                        anon_prefix:AnonPrefix,
  328                        named_bnodes:r{v:_{}},
  329                        root:DOM,                   % XML DOM
  330                        triples:triples([])},       % list
  331    empty_list_mapping(ListMapping),
  332    option(markup(Markup), Options, xhtml),
  333    base(DOM, Options, Base),
  334    default_vocab(Markup, DefaultVocab),
  335    option(lang(Lang), Options, ''),
  336    option(vocab(Vocab), Options, DefaultVocab),
  337    (   option(anon_prefix(AnonPrefix), Options)
  338    ->  true
  339    ;   atom_concat('__', Base, AnonPrefix)
  340    ),
  341    default_prefixes(Markup, DefPrefixes),
  342    mapping(prefixes(IRIMappings0), Options),
  343    put_dict(DefPrefixes, IRIMappings0, IRIMappings),
  344    mapping(terms(TermMappings), Options).
  345
  346base(DOM, _Options, Base) :-
  347    xpath(DOM, //base(@href=Base), _),
  348    !.
  349base(_DOM, Options, Base) :-
  350    option(base(Base0), Options),
  351    rdf_global_id(Base0, Base),
  352    !.
  353base(_, _, 'http://www.example.org/').
  354
  355mapping(Term, Options) :-
  356    Term =.. [Name, Value],
  357    (   TermG =.. [Name, Var],
  358        option(TermG, Options)
  359    ->  dict_create(Value, Name, Var)
  360    ;   dict_create(Value, Name, [])
  361    ).
 default_prefixes(+Markup, -Dict)
Create a default prefix map. Which prefixes are supposed to be in this map?
  368default_prefixes(Markup, _{'':DefPrefix}) :-
  369    default_prefix_mapping(Markup, DefPrefix).
 rdfa_core_prefix(?Prefix, ?URI) is nondet
RDFa initial context prefix declarations.
See also
- http://www.w3.org/2011/rdfa-context/rdfa-1.1
  377rdfa_core_prefix(dcat,    'http://www.w3.org/ns/dcat#').
  378rdfa_core_prefix(qb,      'http://purl.org/linked-data/cube#').
  379rdfa_core_prefix(grddl,   'http://www.w3.org/2003/g/data-view#').
  380rdfa_core_prefix(ma,      'http://www.w3.org/ns/ma-ont#').
  381rdfa_core_prefix(org,     'http://www.w3.org/ns/org#').
  382rdfa_core_prefix(owl,     'http://www.w3.org/2002/07/owl#').
  383rdfa_core_prefix(prov,    'http://www.w3.org/ns/prov#').
  384rdfa_core_prefix(rdf,     'http://www.w3.org/1999/02/22-rdf-syntax-ns#').
  385rdfa_core_prefix(rdfa,    'http://www.w3.org/ns/rdfa#').
  386rdfa_core_prefix(rdfs,    'http://www.w3.org/2000/01/rdf-schema#').
  387rdfa_core_prefix(rif,     'http://www.w3.org/2007/rif#').
  388rdfa_core_prefix(rr,      'http://www.w3.org/ns/r2rml#').
  389rdfa_core_prefix(sd,      'http://www.w3.org/ns/sparql-service-description#').
  390rdfa_core_prefix(skos,    'http://www.w3.org/2004/02/skos/core#').
  391rdfa_core_prefix(skosxl,  'http://www.w3.org/2008/05/skos-xl#').
  392rdfa_core_prefix(wdr,     'http://www.w3.org/2007/05/powder#').
  393rdfa_core_prefix(void,    'http://rdfs.org/ns/void#').
  394rdfa_core_prefix(wdrs,    'http://www.w3.org/2007/05/powder-s#').
  395rdfa_core_prefix(xhv,     'http://www.w3.org/1999/xhtml/vocab#').
  396rdfa_core_prefix(xml,     'http://www.w3.org/XML/1998/namespace').
  397rdfa_core_prefix(xsd,     'http://www.w3.org/2001/XMLSchema#').
  398rdfa_core_prefix(cc,      'http://creativecommons.org/ns#').
  399rdfa_core_prefix(ctag,    'http://commontag.org/ns#').
  400rdfa_core_prefix(dc,      'http://purl.org/dc/terms/').
  401rdfa_core_prefix(dcterms, 'http://purl.org/dc/terms/').
  402rdfa_core_prefix(dc11,    'http://purl.org/dc/elements/1.1/').
  403rdfa_core_prefix(foaf,    'http://xmlns.com/foaf/0.1/').
  404rdfa_core_prefix(gr,      'http://purl.org/goodrelations/v1#').
  405rdfa_core_prefix(ical,    'http://www.w3.org/2002/12/cal/icaltzd#').
  406rdfa_core_prefix(og,      'http://ogp.me/ns#').
  407rdfa_core_prefix(rev,     'http://purl.org/stuff/rev#').
  408rdfa_core_prefix(sioc,    'http://rdfs.org/sioc/ns#').
  409rdfa_core_prefix(v,       'http://rdf.data-vocabulary.org/#').
  410rdfa_core_prefix(vcard,   'http://www.w3.org/2006/vcard/ns#').
  411rdfa_core_prefix(schema,  'http://schema.org/').
  412
  413default_prefix_mapping(xhtml, 'http://www.w3.org/1999/xhtml/vocab#') :- !.
  414default_prefix_mapping(_,     'http://www.example.org/').
  415
  416default_vocab(_, '').
 rdfa_local_context(EvalContext, LocalContext)
7.5.1: Create the local context
  422rdfa_local_context(EvalContext, LocalContext) :-
  423    LocalContext = rdfa_local{skip_element:false,
  424                              new_subject:null,
  425                              current_object_resource:null,
  426                              typed_resource:null,
  427                              iri_mapping:IRIMappings,
  428                              incomplete_triples:[],
  429                              list_mapping:ListMapping,
  430                              lang:Lang,
  431                              term_mapping:TermMapping,
  432                              vocab:Vocab,
  433                              eval_context:EvalContext
  434                             },
  435    _{ iri_mapping:IRIMappings,
  436       list_mapping:ListMapping,
  437       lang:Lang,
  438       term_mapping:TermMapping,
  439       vocab:Vocab
  440     } :< EvalContext.
 update_vocab(+DOM, +Context) is det
7.5.2. Handle @vocab
  447update_vocab(DOM, Context) :-
  448    xpath(DOM, /(*(@vocab=Vocab0)), _),
  449    !,
  450    (   Vocab0 == ''
  451    ->  Vocab = ''                  % Host Language defined default?
  452    ;   iri(Vocab0, Vocab, Context)
  453    ),
  454    nb_set_dict(vocab, Context, Vocab),
  455    add_triple(Context,
  456               Context.eval_context.base,
  457               rdfa:usesVocabulary,
  458               Vocab).
  459update_vocab(_, _).
 update_prefixes(+DOM, +Context) is det
7.5.3: Update prefix map using @prefix and @xmlns. First processes xmlns:Prefix=IRI.
  466update_prefixes(DOM, Context) :-
  467    DOM=element(_,Attrs,_),
  468    xmlns_dict(Attrs, _{}, Dict0),
  469    (   xpath(DOM, /(*(@prefix=PrefixDecl)), _)
  470    ->  prefix_dict(PrefixDecl, Dict0, Dict)
  471    ;   Dict = Dict0
  472    ),
  473    Dict \= _{},
  474    !,
  475    put_dict(Dict, Context.iri_mapping, NewMapping),
  476    b_set_dict(iri_mapping, Context, NewMapping).
  477update_prefixes(_, _).
  478
  479xmlns_dict([], Dict, Dict).
  480xmlns_dict([Attr=IRI|T0], Dict0, Dict) :-
  481    (   Attr = xmlns:Name
  482    ;   atom_concat('xmlns:', Name, Attr)
  483    ),
  484    !,
  485    downcase_atom(Name, Prefix),
  486    put_dict(Prefix, Dict0, IRI, Dict1),
  487    xmlns_dict(T0, Dict1, Dict).
  488xmlns_dict([_|T0], Dict0, Dict) :-
  489    xmlns_dict(T0, Dict0, Dict).
  490
  491prefix_dict(Text, Dict0, Dict) :-
  492    atom_codes(Text, Codes),
  493    phrase(prefixes(Dict0, Dict), Codes).
 update_lang(+DOM, +Context) is det
7.5.4: Update lang
  499update_lang(DOM, Context) :-
  500    DOM=element(_,Attrs,_),
  501    (   (   memberchk(xml:lang=Lang, Attrs)         % XML with namespaces
  502        ;   memberchk('xml:lang'=Lang, Attrs)       % XML without namespaces
  503        ;   memberchk(lang=Lang, Attrs)             % HTML 5
  504        )
  505    ->  nb_set_dict(lang, Context, Lang)
  506    ;   true
  507    ),
  508    (   (   memberchk(xml:base=Base, Attrs)         % XML with namespaces
  509        ;   memberchk('xml:base'=Base, Attrs)       % XML without namespaces
  510        )
  511    ->  nb_set_dict(base, Context.eval_context, Base)
  512    ;   true
  513    ).
 update_subject(+DOM, +Context) is det
7.5.5 and 7.5.6: establish a value for new subject
  520update_subject(DOM, Context) :-
  521    DOM=element(E,Attrs,_),
  522    \+ has_attribute(rel, Attrs, Context),
  523    \+ has_attribute(rev, Attrs, Context),    % Commit to rule-set 7.5.5
  524    !,
  525    (   memberchk(property=_, Attrs),
  526        \+ memberchk(content=_, Attrs),
  527        \+ memberchk(datatype=_, Attrs)
  528    ->  (   (   about(DOM, About, Context)  % 7.5.5.1
  529            ;   About = Context.eval_context.parent_object
  530            ),
  531            About \== null
  532        ->  nb_set_dict(new_subject, Context, About)
  533        ;   true
  534        ),
  535        (   memberchk(typeof=_, Attrs)
  536        ->  (   (   iri_attr(about, Attrs, TypedIRI, Context),
  537                    TypedIRI \== null
  538                ;   DOM == Context.eval_context.root
  539                ->  iri('', TypedIRI, Context)
  540                ;   (   iri_attr(resource, Attrs, TypedIRI, Context)
  541                    ;   iri_attr(href,     Attrs, TypedIRI, Context)
  542                    ;   iri_attr(src,      Attrs, TypedIRI, Context)
  543                    ;   new_bnode(TypedIRI, Context)
  544                    ),
  545                    TypedIRI \== null
  546                ->  nb_set_dict(typed_resource, Context, TypedIRI),
  547                    nb_set_dict(current_object_resource, Context, TypedIRI)
  548                )
  549            ->  nb_set_dict(typed_resource, Context, TypedIRI)
  550            ;   true
  551            )
  552        ;   true
  553        )
  554    ;   (   new_subject_attr_2(SubjectAttr),        % 7.5.5.2
  555            memberchk(SubjectAttr=About0, Attrs),
  556            attr_convert(SubjectAttr, About0, About, Context),
  557            About \== null
  558        ->  true
  559        ;   html_root(E, Context),
  560            About = Context.eval_context.parent_object,
  561            About \== null
  562        ->  true
  563        ;   DOM == Context.eval_context.root
  564        ->  iri('', About, Context)
  565        ;   memberchk(typeof=_, Attrs)
  566        ->  new_bnode(About, Context)
  567        ;   About = Context.eval_context.parent_object,
  568            About \== null
  569        ->  (   \+ memberchk(typeof=_, Attrs)
  570            ->  nb_set_dict(skip_element, Context, true)
  571            ;   true
  572            )
  573        ),
  574        debug(rdfa(new_subject), '~w: set new_subject to ~p', [E, About]),
  575        nb_set_dict(new_subject, Context, About),
  576        (   memberchk(typeof=_, Attrs)
  577        ->  nb_set_dict(typed_resource, Context, About)
  578        ;   true
  579        )
  580    ).
  581update_subject(DOM, Context) :-
  582    DOM=element(_,Attrs,_),                 % 7.5.6
  583    (   iri_attr(about, Attrs, NewSubject, Context)
  584    ->  nb_set_dict(new_subject, Context, NewSubject),
  585        (   memberchk(typeof=_, Attrs)
  586        ->  nb_set_dict(typed_resource, Context, NewSubject)
  587        ;   true
  588        )
  589    ;   true        % was \+ memberchk(resource=_, Attrs):
  590                    % If no resource is provided ...
  591    ->  (   DOM == Context.eval_context.root
  592        ->  iri('', NewSubject, Context),
  593            nb_set_dict(new_subject, Context, NewSubject),
  594            (   memberchk(typeof=_, Attrs)
  595            ->  nb_set_dict(typed_resource, Context, NewSubject)
  596            ;   true
  597            )
  598        ;   NewSubject = Context.eval_context.parent_object,
  599            NewSubject \== null
  600        ->  nb_set_dict(new_subject, Context, NewSubject)
  601        ;   true
  602        )
  603    ),
  604    (   (   iri_attr(resource, Attrs, CurrentObjectResource, Context)
  605        ;   iri_attr(href,     Attrs, CurrentObjectResource, Context)
  606        ;   iri_attr(src,      Attrs, CurrentObjectResource, Context)
  607        ;   memberchk(typeof=_, Attrs),
  608            \+ memberchk(about=_, Attrs),
  609            new_bnode(CurrentObjectResource, Context)
  610        ),
  611        CurrentObjectResource \== null
  612    ->  nb_set_dict(current_object_resource, Context, CurrentObjectResource)
  613    ;   true
  614    ),
  615    (   memberchk(typeof=_, Attrs),
  616        \+ memberchk(about=_, Attrs)
  617    ->  nb_set_dict(typed_resource, Context,
  618                    Context.current_object_resource)
  619    ;   true
  620    ).
  621
  622new_subject_attr_2(about).
  623new_subject_attr_2(resource).
  624new_subject_attr_2(href).
  625new_subject_attr_2(src).
  626
  627html_root(head, Context) :- html_markup(Context.eval_context.markup).
  628html_root(body, Context) :- html_markup(Context.eval_context.markup).
  629
  630html_markup(html).
  631html_markup(xhtml).
 emit_typeof(+DOM, +LocalContext) is det
7.5.7: emit triples for @typeof value.
  637emit_typeof(DOM, Context) :-
  638    DOM = element(_,Attrs,_),
  639    Subject = Context.typed_resource,
  640    Subject \== null,
  641    memberchk(typeof=TypeOf, Attrs),
  642    !,
  643    iri_list(TypeOf, IRIs, Context),
  644    maplist(type_triple(Context), IRIs).
  645emit_typeof(_, _).
  646
  647type_triple(Context, IRI) :-
  648    add_triple(Context, Context.typed_resource, rdf:type, IRI).
 update_list_mapping(+DOM, +Context) is det
7.5.8: Create a list mapping if appropriate
  654update_list_mapping(_DOM, Context) :-
  655    Context.new_subject \== null,
  656    Context.new_subject \== Context.eval_context.parent_object,
  657    !,
  658    empty_list_mapping(ListMapping),
  659    b_set_dict(list_mapping, Context, ListMapping).
  660update_list_mapping(_, _).
 empty_list_mapping(-Mapping) is det
empty_list_mapping(+Mapping) is semidet
 get_list_mapping(+IRI, +Mapping, -List) is semidet
 add_list_mapping(+IRI, !Mapping, +List) is det
Manage a list mapping. Note this needs to be wrapped in a term to be able to extend the mapping while keeping its identity.
  670empty_list_mapping(list_mapping(_{})).
  671
  672get_list_mapping(IRI, list_mapping(Dict), Dict.get(IRI)).
  673
  674add_list_mapping(IRI, LM, List) :-
  675    LM = list_mapping(Dict),
  676    setarg(1, LM, Dict.put(IRI, List)).
  677
  678list_mapping_pairs(list_mapping(Dict), Pairs) :-
  679    dict_pairs(Dict, _, Pairs).
 step_7_5_9(+DOM, +Context)
  684step_7_5_9(_DOM, Context) :-
  685    Context.current_object_resource == null,
  686    !.
  687step_7_5_9(DOM, Context) :-
  688    DOM = element(_,Attrs,_),
  689    memberchk(inlist=_, Attrs),
  690    has_attribute(rel, Attrs, Rel, Context),
  691    !,
  692    iri_list(Rel, Preds, Context),
  693    CurrentObjectResource = Context.current_object_resource,
  694    maplist(add_property_list(Context, CurrentObjectResource),
  695            Preds).
  696step_7_5_9(DOM, Context) :-
  697    DOM = element(_,Attrs,_),
  698    (   has_attribute(rel, Attrs, Rel, Context),
  699        \+ memberchk(inlist=_, Attrs)
  700    ->  iri_list(Rel, RelIRIs, Context),
  701        maplist(rel_triple(Context), RelIRIs)
  702    ;   true
  703    ),
  704    (   has_attribute(rev, Attrs, Rev, Context)
  705    ->  iri_list(Rev, RevIRIs, Context),
  706        maplist(rev_triple(Context), RevIRIs)
  707    ;   true
  708    ).
  709
  710rel_triple(Context, IRI) :-
  711    add_triple(Context,
  712               Context.new_subject, IRI, Context.current_object_resource).
  713
  714rev_triple(Context, IRI) :-
  715    add_triple(Context,
  716               Context.current_object_resource, IRI, Context.new_subject).
 step_7_5_10(+DOM, +Context)
Similar to step_7_5_9, but adding to incomplete triples.
  722step_7_5_10(_DOM, Context) :-
  723    Context.current_object_resource \== null,
  724    !.
  725step_7_5_10(DOM, Context) :-
  726    DOM = element(_,Attrs,_),
  727    memberchk(inlist=_, Attrs),
  728    has_attribute(rel, Attrs, Rel, Context),
  729    !,
  730    set_current_object_resource_to_bnode(Context),
  731    iri_list(Rel, IRIs, Context),
  732    maplist(incomplete_ll_triple(Context), IRIs).
  733step_7_5_10(DOM, Context) :-
  734    DOM = element(_,Attrs,_),
  735    (   has_attribute(rel, Attrs, Rel, Context),
  736        \+ memberchk(inlist=_, Attrs)
  737    ->  iri_list(Rel, RelIRIs, Context),
  738        set_current_object_resource_to_bnode(Context),
  739        maplist(incomplete_rel_triple(Context), RelIRIs)
  740    ;   true
  741    ),
  742    (   has_attribute(rev, Attrs, Rev, Context)
  743    ->  iri_list(Rev, RevIRIs, Context),
  744        set_current_object_resource_to_bnode(Context),
  745        maplist(incomplete_rev_triple(Context), RevIRIs)
  746    ;   true
  747    ).
  748
  749set_current_object_resource_to_bnode(Context) :-
  750    new_bnode(BNode, Context),
  751    b_set_dict(current_object_resource, Context, BNode).
  752
  753incomplete_ll_triple(Context, IRI) :-
  754    LM = Context.list_mapping,
  755    (   get_list_mapping(IRI, LM, LL)
  756    ->  true
  757    ;   LL = list([]),
  758        add_list_mapping(IRI, LM, LL)
  759    ),
  760    add_incomplete_triple(Context, _{list:LL, direction:none}).
  761
  762incomplete_rel_triple(Context, IRI) :-
  763    add_incomplete_triple(Context, _{predicate:IRI, direction:forward}).
  764
  765incomplete_rev_triple(Context, IRI) :-
  766    add_incomplete_triple(Context, _{predicate:IRI, direction:reverse}).
 update_property_value(+DOM, +Context) is det
7.5.11: establish current property value.
  773update_property_value(DOM, Context) :-
  774    DOM = element(Element,Attrs,Content),
  775    memberchk(property=PropSpec, Attrs),
  776    !,
  777    iri_list(PropSpec, Preds, Context),
  778    (   memberchk(datatype=DTSpec, Attrs)
  779    ->  (   DTSpec \== '',
  780            term_or_curie_or_absiri(DTSpec, DataType, Context),
  781            DataType \== null
  782        ->  (   (   rdf_equal(rdf:'XMLLiteral', DataType)
  783                ;   rdf_equal(rdf:'HTML', DataType)
  784                )
  785            ->  content_xml(Content, Text)
  786            ;   content_text(DOM, Text, Context)
  787            ),
  788            Obj0 = literal(type(DataType, Text))
  789        ;   content_text(DOM, Text, Context),
  790            Obj0 = literal(Text)
  791        )
  792    ;   memberchk(content=Text, Attrs)
  793    ->  Obj0 = literal(Text)
  794    ;   \+ has_attribute(rel, Attrs, Context),
  795        \+ has_attribute(rev, Attrs, Context),
  796        %\+ memberchk(content=_, Attrs),    % already guaranteed
  797        (   iri_attr(resource, Attrs, Obj0, Context)
  798        ;   iri_attr(href,     Attrs, Obj0, Context)
  799        ;   iri_attr(src,      Attrs, Obj0, Context)
  800        ),
  801        Obj0 \== null
  802    ->  true
  803    ;   (   memberchk(datetime=DateTime, Attrs)
  804        ;   Element == time,
  805            Content = [DateTime]
  806        ),
  807        html_markup(Context.eval_context.markup)
  808    ->  (   date_time_type(DateTime, DataType)
  809        ->  Obj0 = literal(type(DataType, DateTime))
  810        ;   Obj0 = literal(DateTime)
  811        )
  812    ;   memberchk(typeof=_, Attrs),
  813        \+ memberchk(about=_, Attrs)
  814    ->  Obj0 = Context.typed_resource
  815    ;   content_text(Content, Text, Context), % "as a plain literal"???
  816        Obj0 = literal(Text)
  817    ),
  818    (   Obj0 = literal(Text),
  819        atomic(Text),
  820        Context.lang \== ''
  821    ->  Obj = literal(lang(Context.lang, Text))
  822    ;   Obj = Obj0
  823    ),
  824    (   memberchk(inlist=_, Attrs)
  825    ->  maplist(add_property_list(Context, Obj), Preds)
  826    ;   NewSubject = Context.new_subject,
  827        maplist(add_property(Context, NewSubject, Obj), Preds)
  828    ).
  829update_property_value(_, _).
  830
  831add_property_list(Context, Obj, Pred) :-
  832    LM = Context.list_mapping,
  833    (   get_list_mapping(Pred, LM, LL)
  834    ->  LL = list(Old),
  835        setarg(1, LL, [Obj|Old])
  836    ;   add_list_mapping(Pred, LM, list([Obj]))
  837    ).
  838
  839add_property(Context, Subject, Object, Pred) :-
  840    add_triple(Context, Subject, Pred, Object).
  841
  842content_text(element(_,Attrs,_), Text, _Context) :-
  843    memberchk(content=Text, Attrs),
  844    !.
  845content_text(element(_,Attrs,_), Text, Context) :-
  846    memberchk(datetime=Text, Attrs),
  847    html_markup(Context.eval_context.markup),
  848    !.
  849content_text(element(_,_,Content), Text, _Context) :-
  850    !,
  851    phrase(text_nodes(Content), Texts),
  852    atomic_list_concat(Texts, Text).
  853content_text(Content, Text, _Context) :-
  854    !,
  855    phrase(text_nodes(Content), Texts),
  856    atomic_list_concat(Texts, Text).
  857
  858text_nodes([]) --> !.
  859text_nodes([H|T]) --> !, text_nodes(H), text_nodes(T).
  860text_nodes(element(_,_,Content)) --> !, text_nodes(Content).
  861text_nodes(CDATA) --> [CDATA].
  862
  863content_xml(DOM, Text) :-
  864    with_output_to(atom(Text), xml_write(DOM, [header(false)])).
 complete_triples(+Context)
7.5.12: Complete incomplete triples
  870complete_triples(Context) :-
  871    Context.skip_element == false,
  872    Context.new_subject \== null,
  873    Context.eval_context.incomplete_triples \== [],
  874    !,
  875    reverse(Context.eval_context.incomplete_triples, Incomplete),
  876    maplist(complete_triple(Context), Incomplete).
  877complete_triples(_).
  878
  879complete_triple(Context, Dict) :-
  880    complete_triple(Dict.direction, Dict, Context).
  881
  882complete_triple(none, Dict, Context) :-
  883    List = Dict.list,
  884    List = list(Old),
  885    setarg(1, List, [Context.new_subject|Old]).
  886complete_triple(forward, Dict, Context) :-
  887    add_triple(Context,
  888               Context.eval_context.parent_subject,
  889               Dict.predicate,
  890               Context.new_subject).
  891complete_triple(reverse, Dict, Context) :-
  892    add_triple(Context,
  893               Context.new_subject,
  894               Dict.predicate,
  895               Context.eval_context.parent_subject).
 descent(DOM, Context)
7.5.13: Descent into the children
  902descent(element(_,_,Content), Context) :-
  903    (   Context.skip_element == true
  904    ->  maplist(descent_skip(Context), Content)
  905    ;   maplist(descent_no_skip(Context), Content)
  906    ).
  907
  908descent_skip(Context, DOM) :-
  909    DOM = element(E,_,_),
  910    !,
  911    debug(rdfa(descent), 'skip: ~w: new_subject=~p',
  912          [E, Context.new_subject]),
  913    process_node(DOM, Context.eval_context.put(
  914                          _{ lang:Context.lang,
  915                             vocab:Context.vocab,
  916                             iri_mapping:Context.iri_mapping
  917                           })).
  918descent_skip(_, _).
  919
  920descent_no_skip(Context, DOM) :-
  921    DOM = element(E,_,_),
  922    !,
  923    (   ParentSubject = Context.new_subject,
  924        ParentSubject \== null
  925    ->  true
  926    ;   ParentSubject = Context.eval_context.parent_subject
  927    ),
  928    (   ParentObject = Context.current_object_resource,
  929        ParentObject \== null
  930    ->  true
  931    ;   ParentObject = ParentSubject
  932    ),
  933    debug(rdfa(descent), 'no skip: ~w: parent subject = ~p, object = ~p',
  934          [E, ParentSubject, ParentObject]),
  935    process_node(DOM, Context.eval_context.put(
  936                          _{ parent_subject:ParentSubject,
  937                             parent_object:ParentObject,
  938                             iri_mapping:Context.iri_mapping,
  939                             incomplete_triples:Context.incomplete_triples,
  940                             list_mapping:Context.list_mapping,
  941                             lang:Context.lang,
  942                             vocab:Context.vocab
  943                            })).
  944descent_no_skip(_, _).
 complete_lists(+Context) is det
7.5.14: Complete possibly pending lists
  950complete_lists(Context) :-
  951    empty_list_mapping(Context.list_mapping),
  952    !.
  953complete_lists(Context) :-
  954    (   CurrentSubject = Context.new_subject,
  955        CurrentSubject \== null
  956    ->  true
  957    ;   CurrentSubject = Context.eval_context.base
  958    ),
  959    list_mapping_pairs(Context.list_mapping, Pairs),
  960    maplist(complete_list(Context, CurrentSubject), Pairs).
  961
  962complete_list(Context, _, IRI-_) :-
  963    get_list_mapping(IRI, Context.eval_context.list_mapping, _),
  964    !.
  965complete_list(Context, CurrentSubject, IRI-list(List0)) :-
  966    reverse(List0, List),
  967    emit_list(List, ListURI, Context),
  968    add_triple(Context, CurrentSubject, IRI, ListURI).
  969
  970emit_list([], NIL, _) :-
  971    rdf_equal(NIL, rdf:nil).
  972emit_list([H|T], URI, Context) :-
  973    emit_list(T, TailURI, Context),
  974    new_bnode(URI, Context),
  975    add_triple(Context, URI, rdf:first, H),
  976    add_triple(Context, URI, rdf:rest, TailURI).
 has_attribute(+Name, +Attrs, +Context) is semidet
 has_attribute(+Name, +Attrs, -Value, +Context) is semidet
True if Attrs contains Name. We sometimes need to ignore Attributes if their value is invalid.
See also
- HTML+RDFa, 3.1 Additional RDFa Processing Rules, point 7.
  987has_attribute(Name, Attrs, Context) :-
  988    has_attribute(Name, Attrs, _, Context).
  989
  990has_attribute(rel, Attrs, Rel, Context) :-
  991    memberchk(rel=Rel, Attrs),
  992    html_markup(Context.eval_context.markup),
  993    memberchk(property=_, Attrs),
  994    !,
  995    html_non_empty_rel(Rel, Context).
  996has_attribute(rev, Attrs, Rev, Context) :-
  997    memberchk(rev=Rev, Attrs),
  998    html_markup(Context.eval_context.markup),
  999    memberchk(property=_, Attrs),
 1000    !,
 1001    html_non_empty_rel(Rev, Context).
 1002has_attribute(Name, Attrs, Value, _Context) :-
 1003    memberchk(Name=Value, Attrs).
 1004
 1005html_non_empty_rel(Spec, Context) :-
 1006    Sep = "\s\t\n\r",
 1007    split_string(Spec, Sep, Sep, SpecList),
 1008    member(Spec1, SpecList),
 1009    safe_curie_or_curie_or_absiri(Spec1, _, Context),
 1010    !.
 iri_attr(+AttName, +Attrs, -IRI, +Context) is semidet
 1015iri_attr(Name, Attrs, IRI, Context) :-
 1016    memberchk(Name=IRI0, Attrs),
 1017    attr_convert(Name, IRI0, IRI, Context).
 1018
 1019attr_convert(about, Spec, IRI, Context) :-
 1020    safe_curie_or_curie_or_iri(Spec, IRI, Context).
 1021attr_convert(href, Spec, IRI, Context) :-
 1022    iri(Spec, IRI, Context).
 1023attr_convert(src, Spec, IRI, Context) :-
 1024    iri(Spec, IRI, Context).
 1025attr_convert(resource, Spec, IRI, Context) :-
 1026    safe_curie_or_curie_or_iri(Spec, IRI, Context).
 1027attr_convert(vocab, Spec, IRI, Context) :-
 1028    iri(Spec, IRI, Context).
 1029attr_convert(datatype, Spec, IRI, Context) :-
 1030    term_or_curie_or_absiri(Spec, IRI, Context).
 1031
 1032
 1033about(DOM, About, Context) :-
 1034    DOM=element(_,Attrs,_),
 1035    (   memberchk(about=About0, Attrs)
 1036    ->  safe_curie_or_curie_or_iri(About0, About, Context)
 1037    ;   DOM == Context.eval_context.root
 1038    ->  iri('', About, Context)
 1039    ).
 new_bnode(-BNode, +Context) is det
Create a new blank node. Note that the current id is kept in a term to avoid copying the counter on the descent step.
 1046new_bnode(BNode, Context) :-
 1047    EvalCtx = Context.eval_context,
 1048    Node = EvalCtx.bnode_id,
 1049    arg(1, Node, Id),
 1050    succ(Id, Id1),
 1051    nb_setarg(1, Node, Id1),
 1052    Prefix = EvalCtx.anon_prefix,
 1053    (   atom(Prefix)
 1054    ->  atom_concat(Prefix, Id, BNode)
 1055    ;   BNode = bnode(Id)
 1056    ).
 iri_list(+Spec, -IRIs, +Context) is det
True when IRIs is a list of fulfy qualified IRIs from Spec
 1062iri_list(Spec, IRIs, Context) :-
 1063    Sep = "\s\t\n\r",
 1064    split_string(Spec, Sep, Sep, SpecList),
 1065    (   SpecList == [""]
 1066    ->  IRIs = []
 1067    ;   maplist(ctx_to_iri(Context), SpecList, IRIs0),
 1068        exclude(==(null), IRIs0, IRIs)
 1069    ).
 1070
 1071ctx_to_iri(Context, Spec, IRI) :-
 1072    term_or_curie_or_absiri(Spec, IRI, Context).
 iri(+Spec, -IRI, +Context)
Used for @href and @src attributes
 1078iri(Spec, IRI, Context) :-
 1079    iri_normalized(Spec, Context.eval_context.base, IRI).
 1080
 1081abs_iri(Spec, IRI) :-
 1082    uri_components(Spec, Components),
 1083    uri_data(authority, Components, Authority), nonvar(Authority),
 1084    uri_data(scheme,    Components, Scheme),    nonvar(Scheme),
 1085    !,
 1086    iri_normalized(Spec, IRI).
 safe_curie_or_curie_or_iri(+Spec, -IRI, +Context) is det
Implement section 7.4, CURIE and IRI Processing. Used for @about and @resource
 1094safe_curie_or_curie_or_iri(Spec, IRI, Context) :-
 1095    safe_curie_or_curie_or_absiri(Spec, IRI, Context),
 1096    !.
 1097safe_curie_or_curie_or_iri(Spec, IRI, Context) :-
 1098    uri_normalized(Spec, Context.eval_context.base, IRI).
 1099
 1100safe_curie_or_curie_or_absiri(Spec, IRI, _Context) :-
 1101    abs_iri(Spec, IRI0),
 1102    !,
 1103    IRI = IRI0.
 1104safe_curie_or_curie_or_absiri(Spec, IRI, Context) :-
 1105    atom_codes(Spec, Codes),
 1106    (   safe_curie(Codes, Curie)
 1107    ->  (   phrase(curie(IRI, Context), Curie)
 1108        ->  true
 1109        ;   IRI = null
 1110        )
 1111    ;   phrase(curie(IRI, Context), Codes)
 1112    ).
 1113
 1114safe_curie(Codes, Curie) :-
 1115    append([0'[|Curie], `]`, Codes).
 1116
 1117curie(IRI, Context) -->
 1118    "_:", !, reference_or_empty(Reference),
 1119    {   IRI = Context.eval_context.named_bnodes.v.get(Reference)
 1120    ->  true
 1121    ;   new_bnode(IRI, Context),
 1122        b_set_dict(v, Context.eval_context.named_bnodes,
 1123                   Context.eval_context.named_bnodes.v.put(Reference, IRI))
 1124    }.
 1125curie(IRI, Context) -->
 1126    ":", !, reference_or_empty(Reference),
 1127    { atom_concat(Context.iri_mapping.get(''), Reference, IRI) }.
 1128curie(IRI, Context) -->
 1129    nc_name(Prefix), ":", !, reference_or_empty(Reference),
 1130    {   atom_concat(Context.iri_mapping.get(Prefix), Reference, IRI0)
 1131    ->  IRI = IRI0
 1132    ;   rdfa_core_prefix(Prefix, URIPrefix)
 1133    ->  atom_concat(URIPrefix, Reference, IRI)
 1134    }.
 term_or_curie_or_absiri(+Spec, -IRI, +Context) is det
Used for @datatype and @property, @typeof, @rel and @rev
 1140term_or_curie_or_absiri(Spec, IRI, _Context) :-
 1141    abs_iri(Spec, IRI0),
 1142    !,
 1143    IRI = IRI0.
 1144term_or_curie_or_absiri(Spec, IRI, Context) :-
 1145    atom_codes(Spec, Codes),
 1146    (   phrase(term(Term), Codes),
 1147        downcase_atom(Term, LwrCase)
 1148    ->  (   Vocab = Context.vocab,
 1149            Vocab \== ''
 1150        ->  atom_concat(Vocab, Term, IRI)
 1151        ;   term_iri(LwrCase, Context.eval_context.markup, IRI0)
 1152        ->  IRI = IRI0
 1153        ;   IRI = Context.term_mapping.get(Term)
 1154        ->  true
 1155        ;   dict_pairs(Context.term_mapping, _Tag, Pairs),
 1156            member(TermCaps-IRI, Pairs),
 1157            downcase_atom(TermCaps, LwrCase)
 1158        ->  true
 1159        ;   IRI = null
 1160        )
 1161    ;   phrase(curie(IRI, Context), Codes)
 1162    ->  true
 1163    ;   uri_normalized(Spec, Context.eval_context.base, IRI)
 1164    ).
 term_iri(?Term, ?Markup, ?IRI)
See also
- http://www.w3.org/2011/rdfa-context/xhtml-rdfa-1.1
 1170term_expansion(term_iri(Term, Markup), term_iri(Term, Markup, URI)) :-
 1171    default_prefix_mapping(Markup, Prefix),
 1172    atom_concat(Prefix, Term, URI).
 1173
 1174term_iri(alternate,  xhtml).
 1175term_iri(appendix,   xhtml).
 1176term_iri(cite,       xhtml).
 1177term_iri(bookmark,   xhtml).
 1178term_iri(contents,   xhtml).
 1179term_iri(chapter,    xhtml).
 1180term_iri(copyright,  xhtml).
 1181term_iri(first,      xhtml).
 1182term_iri(glossary,   xhtml).
 1183term_iri(help,       xhtml).
 1184term_iri(icon,       xhtml).
 1185term_iri(index,      xhtml).
 1186term_iri(last,       xhtml).
 1187term_iri(meta,       xhtml).
 1188term_iri(next,       xhtml).
 1189term_iri(prev,       xhtml).
 1190term_iri(previous,   xhtml).
 1191term_iri(section,    xhtml).
 1192term_iri(start,      xhtml).
 1193term_iri(stylesheet, xhtml).
 1194term_iri(subsection, xhtml).
 1195term_iri(top,        xhtml).
 1196term_iri(up,         xhtml).
 1197term_iri(p3pv1,      xhtml).
 1198
 1199term_iri(describedby, _, 'http://www.w3.org/2007/05/powder-s#describedby').
 1200term_iri(license,     _, 'http://www.w3.org/1999/xhtml/vocab#license').
 1201term_iri(role,        _, 'http://www.w3.org/1999/xhtml/vocab#role').
 1202
 1203                 /*******************************
 1204                 *           GRAMMARS           *
 1205                 *******************************/
 1206
 1207prefixes(Dict0, Dict) -->
 1208    ws, nc_name(Name), ws, ":", ws, reference(IRI), !, ws,
 1209    prefixes(Dict0.put(Name,IRI), Dict).
 1210prefixes(Dict, Dict) --> [].
 1211
 1212ws --> ws1, !, ws.
 1213ws --> [].
 1214
 1215ws1 --> " ".
 1216ws1 --> "\t".
 1217ws1 --> "\r".
 1218ws1 --> "\n".
 1219
 1220nc_name(Name) -->
 1221    [H], {nc_name_start_code(H)},
 1222    nc_name_codes(Codes),
 1223    { atom_codes(Name0, [H|Codes]),
 1224      downcase_atom(Name0, Name)
 1225    }.
 term(-Term)//
7.4.3
 1231term(Term) -->
 1232    [H], {nc_name_start_code(H)},
 1233    term_codes(Codes),
 1234    { atom_codes(Term, [H|Codes])
 1235    }.
 1236
 1237
 1238nc_name_codes([H|T]) --> nc_name_code(H), !, nc_name_codes(T).
 1239nc_name_codes([]) --> [].
 1240
 1241nc_name_code(H) --> [H], {nc_name_code(H)}.
 1242
 1243term_codes([H|T]) --> term_code(H), !, term_codes(T).
 1244term_codes([]) --> [].
 1245
 1246term_code(H) --> [H], {term_code(H)}.
 1247
 1248nc_name_start_code(0':) :- !, fail.
 1249nc_name_start_code(C) :- xml_basechar(C), !.
 1250nc_name_start_code(C) :- xml_ideographic(C).
 1251
 1252nc_name_code(0':) :- !, fail.
 1253nc_name_code(C) :- xml_basechar(C), !.
 1254nc_name_code(C) :- xml_digit(C), !.
 1255nc_name_code(C) :- xml_ideographic(C), !.
 1256nc_name_code(C) :- xml_combining_char(C), !.
 1257nc_name_code(C) :- xml_extender(C), !.
 1258
 1259term_code(0'/) :- !.
 1260term_code(C) :- nc_name_code(C).
 1261
 1262reference(IRI) -->
 1263    [H],
 1264    reference_codes(T),
 1265    { atom_codes(IRI, [H|T]) }.
 1266
 1267reference_codes([])    --> ws1, !.
 1268reference_codes([H|T]) --> [H], !, reference_codes(T).
 1269reference_codes([]) --> [].
 1270
 1271reference_or_empty(IRI) -->
 1272    reference_codes(Codes),
 1273    { atom_codes(IRI, Codes) }.
 date_time_type(+DateTime, -DataType) is semidet
True when DataType is the xsd type that matches the lexical representation of DateTime
 1281date_time_type(DateTime, DataType) :-
 1282    atom_codes(DateTime, Codes),
 1283    phrase(date_time_type(DataType), Codes).
 1284
 1285date_time_type(DT) --> duration,   !, { rdf_equal(DT, xsd:duration) }.
 1286date_time_type(DT) --> date_time,  !, { rdf_equal(DT, xsd:dateTime) }.
 1287date_time_type(DT) --> date,       !, { rdf_equal(DT, xsd:date) }.
 1288date_time_type(DT) --> time,       !, { rdf_equal(DT, xsd:time) }.
 1289date_time_type(DT) --> gyearmonth, !, { rdf_equal(DT, xsd:gYearMonth) }.
 1290date_time_type(DT) --> gyear,      !, { rdf_equal(DT, xsd:gYear) }.
 1291
 1292duration   --> opt_minus, "P",
 1293    opt_dy, opt_dm, opt_dd,
 1294    (   "T"
 1295    ->  opt_dh, opt_dm, opt_ds
 1296    ;   ""
 1297    ).
 1298
 1299date_time  --> opt_minus, yyyy, "-", !, mM, "-", dd,
 1300    "T", hh, ":", mm, ":", ss, opt_fraction, opt_zzzzzz.
 1301date       --> opt_minus, yyyy, "-", !, mM, "-", dd.
 1302time       --> hh, ":", mm, ":", ss, opt_fraction.
 1303gyearmonth --> opt_minus, yyyy, "-", !, mM.
 1304gyear      --> opt_minus, yyyy.
 1305
 1306opt_minus --> "-", !.
 1307opt_minus --> "".
 1308
 1309yyyy --> dnzs, d, d, d, d.
 1310
 1311dnzs --> "".
 1312dnzs --> dnz, dnzs.
 1313
 1314opt_fraction --> ".", !, ds.
 1315opt_fraction --> "".
 1316
 1317mM --> d(V1), d(V2), { M is V1*10+V2, M >= 1, M =< 12 }.
 1318dd --> d(V1), d(V2), { M is V1*10+V2, M >= 1, M =< 31 }.
 1319hh --> d(V1), d(V2), { M is V1*10+V2, M =< 23 }.
 1320mm --> d(V1), d(V2), { M is V1*10+V2, M =< 59 }.
 1321ss --> d(V1), d(V2), { M is V1*10+V2, M =< 59 }.
 1322
 1323d(V) --> [D], { between(0'0, 0'9, D), V is D-0'0 }.
 1324d    --> [D], { between(0'0, 0'9, D) }.
 1325dnz  --> [D], { between(0'1, 0'9, D) }.
 1326
 1327ds --> d, !, ds.
 1328ds --> "".
 1329
 1330opt_zzzzzz --> sign, hh, ":", mm.
 1331opt_zzzzzz --> "Z".
 1332opt_zzzzzz --> "".
 1333
 1334sign --> "+".
 1335sign --> "-".
 1336
 1337opt_dy --> ( int, "Y" | "" ).
 1338opt_dm --> ( int, "M" | "" ).
 1339opt_dd --> ( int, "D" | "" ).
 1340opt_dh --> ( int, "H" | "" ).
 1341opt_ds --> ( int, ("." -> int ; ""), "S" | "" ).
 1342
 1343int --> d, ds.
 1344
 1345                 /*******************************
 1346                 *           TRIPLES            *
 1347                 *******************************/
 add_triple(+Context, +S, +P, +O) is det
Add a triple to the global evaluation context. Triples are embedded in a term, so we can use setarg/3 on the list, while the evaluation context is copied for descending the node hierarchy.
 1356add_triple(Context, S, P, O) :-
 1357    (   debugging(rdfa(triple))
 1358    ->  debug(rdfa(triple), 'Added { ~p ~p ~p }', [S,P,O]),
 1359        backtrace(4)
 1360    ;   true
 1361    ),
 1362    valid_subject(S),
 1363    valid_predicate(P),
 1364    valid_object(O),
 1365    !,
 1366    Triples = Context.eval_context.triples,
 1367    arg(1, Triples, Old),
 1368    setarg(1, Triples, [rdf(S,P,O)|Old]).
 1369add_triple(_, _, _, _).                 % ignored invalid triple.
 1370
 1371valid_subject(S)   :- S \== null.
 1372valid_predicate(P) :- P \== null, \+ rdf_is_bnode(P).
 1373valid_object(O)    :- O \== null, ( atom(O) -> true ; valid_literal(O) ).
 1374
 1375valid_literal(literal(Plain)) :-
 1376    atom(Plain),
 1377    !.
 1378valid_literal(literal(type(T, _))) :-
 1379    !,
 1380    T \== null.
 1381valid_literal(literal(lang(_,_))).
 1382
 1383add_incomplete_triple(Context, Dict) :-
 1384    debug(rdfa(incomplete), 'Incomplete: ~p', [Dict]),
 1385    b_set_dict(incomplete_triples, Context,
 1386               [ Dict
 1387               | Context.incomplete_triples
 1388               ]).
 1389
 1390
 1391                 /*******************************
 1392                 *            PATTERNS          *
 1393                 *******************************/
 apply_patterns(+TriplesIn, -TriplesOut) is det
Apply RDFa patterns. We need several passes do deal with ordering issues and the possibility that patterns are invalid:
  1. find patterns from rdf(_,rdfa:copy,Pattern)
  2. collect the properties for these patterns and delete patterns that do not have rdf:type rdfa:Pattern.
  3. Actually copy the patterns and delete the patterns themselves.
 1405apply_patterns(TriplesIn, TriplesOut) :-
 1406    referenced_patterns(TriplesIn, Pairs),
 1407    (   Pairs == []
 1408    ->  TriplesOut = TriplesIn
 1409    ;   sort(Pairs, UniquePairs),
 1410        dict_pairs(Dict, _, UniquePairs),
 1411        pattern_properties(TriplesIn, Dict),
 1412        delete_invalid_patterns(Dict, Patterns),
 1413        phrase(apply_patterns(TriplesIn, Patterns), TriplesOut)
 1414    ).
 1415
 1416term_expansion(TIn, TOut) :-
 1417    rdf_global_term(TIn, TOut).
 1418
 1419referenced_patterns([], []).
 1420referenced_patterns([rdf(_,rdfa:copy,O)|T0], [O-[]|T]) :-
 1421    !,
 1422    referenced_patterns(T0, T).
 1423referenced_patterns([_|T0], T) :-
 1424    referenced_patterns(T0, T).
 1425
 1426pattern_properties([], _).
 1427pattern_properties([rdf(S,P,O)|T], Dict) :-
 1428    ignore(b_set_dict(S, Dict, [P-O|Dict.get(S)])),
 1429    pattern_properties(T, Dict).
 1430
 1431delete_invalid_patterns(Patterns0, Patterns) :-
 1432    dict_pairs(Patterns0, Tag, Pairs0),
 1433    include(rdfa_pattern, Pairs0, Pairs),
 1434    dict_pairs(Patterns,  Tag, Pairs).
 1435
 1436rdfa_pattern(_-PO) :-
 1437    memberchk((rdf:type)-(rdfa:'Pattern'), PO).
 1438
 1439apply_patterns([], _) --> [].
 1440apply_patterns([rdf(S,rdfa:copy,O)|T0], Dict) -->
 1441    !,
 1442    copy_pattern(Dict.O, S),
 1443    apply_patterns(T0, Dict).
 1444apply_patterns([rdf(S,_,_)|T0], Dict) -->
 1445    { _ = Dict.get(S) },
 1446    !,
 1447    apply_patterns(T0, Dict).
 1448apply_patterns([H|T], Dict) -->
 1449    [H],
 1450    apply_patterns(T, Dict).
 1451
 1452copy_pattern([], _) --> [].
 1453copy_pattern([(rdf:type)-(rdfa:'Pattern')|T], S) -->
 1454    !,
 1455    copy_pattern(T, S).
 1456copy_pattern([P-O|T], S) -->
 1457    [rdf(S,P,O)],
 1458    copy_pattern(T, S).
 1459
 1460
 1461                 /*******************************
 1462                 *       HOOK INTO RDF-DB       *
 1463                 *******************************/
 1464
 1465:- multifile
 1466    rdf_db:rdf_load_stream/3,
 1467    rdf_db:rdf_file_type/2.
 rdf_db:rdf_load_stream(+Format, +Stream, :Options)
Register library(semweb/rdfa) as loader for HTML RDFa files.
To be done
- Which options need to be forwarded to read_rdfa/3?
 1475rdf_db:rdf_load_stream(rdfa, Stream, _Module:Options1):-
 1476    rdf_db:graph(Options1, Graph),
 1477    atom_concat('__', Graph, BNodePrefix),
 1478    merge_options([anon_prefix(BNodePrefix)], Options1, Options2),
 1479    read_rdfa(Stream, Triples, Options2),
 1480    rdf_transaction(( forall(member(rdf(S,P,O), Triples),
 1481                             rdf_assert(S, P, O, Graph)),
 1482                      rdf_set_graph(Graph, modified(false))
 1483                    ),
 1484                    parse(Graph)).
 1485
 1486rdf_db:rdf_file_type(html, rdfa)