View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2002-2020, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(http_open,
   38          [ http_open/3,                % +URL, -Stream, +Options
   39            http_set_authorization/2,   % +URL, +Authorization
   40            http_close_keep_alive/1     % +Address
   41          ]).   42:- autoload(library(aggregate),[aggregate_all/3]).   43:- autoload(library(apply),[foldl/4,include/3]).   44:- autoload(library(base64),[base64/3]).   45:- autoload(library(debug),[debug/3,debugging/1]).   46:- autoload(library(error),
   47	    [ domain_error/2, must_be/2, existence_error/2, instantiation_error/1
   48	    ]).   49:- autoload(library(lists),[last/2,member/2]).   50:- autoload(library(option),
   51	    [ meta_options/3, option/2, select_option/4, merge_options/3,
   52	      option/3, select_option/3
   53	    ]).   54:- autoload(library(readutil),[read_line_to_codes/2]).   55:- autoload(library(socket),
   56	    [tcp_connect/3,negotiate_socks_connection/2]).   57:- autoload(library(uri),
   58	    [ uri_resolve/3, uri_components/2, uri_data/3,
   59              uri_authority_components/2, uri_authority_data/3,
   60	      uri_encoded/3, uri_query_components/2, uri_is_global/1
   61	    ]).   62:- autoload(library(http/http_header),
   63            [ http_parse_header/2, http_post_data/3 ]).   64:- autoload(library(http/http_stream),[stream_range_open/3]).   65:- if(( exists_source(library(ssl)),
   66        \+ current_prolog_flag(pldoc_to_tex,true))).   67:- autoload(library(ssl), [ssl_upgrade_legacy_options/2]).   68:- endif.

HTTP client library

This library defines http_open/3, which opens a URL as a Prolog stream. The functionality of the library can be extended by loading two additional modules that act as plugins:

library(http/http_ssl_plugin)
Loading this library causes http_open/3 to handle HTTPS connections. Relevant options for SSL certificate handling are handed to ssl_context/3. This plugin is loaded automatically if the scheme https is requested using a default SSL context. See the plugin for additional information regarding security.
library(http/http_cookie)
Loading this library adds tracking cookies to http_open/3. Returned cookies are collected in the Prolog database and supplied for subsequent requests.

Here is a simple example to fetch a web-page:

?- http_open('http://www.google.com/search?q=prolog', In, []),
   copy_stream_data(In, user_output),
   close(In).
<!doctype html><head><title>prolog - Google Search</title><script>
...

The example below fetches the modification time of a web-page. Note that Modified is '' (the empty atom) if the web-server does not provide a time-stamp for the resource. See also parse_time/2.

modified(URL, Stamp) :-
        http_open(URL, In,
                  [ method(head),
                    header(last_modified, Modified)
                  ]),
        close(In),
        Modified \== '',
        parse_time(Modified, Stamp).

Then next example uses Google search. It exploits library(uri) to manage URIs, library(sgml) to load an HTML document and library(xpath) to navigate the parsed HTML. Note that you may need to adjust the XPath queries if the data returned by Google changes.

:- use_module(library(http/http_open)).
:- use_module(library(xpath)).
:- use_module(library(sgml)).
:- use_module(library(uri)).

google(For, Title, HREF) :-
        uri_encoded(query_value, For, Encoded),
        atom_concat('http://www.google.com/search?q=', Encoded, URL),
        http_open(URL, In, []),
        call_cleanup(
            load_html(In, DOM, []),
            close(In)),
        xpath(DOM, //h3(@class=r), Result),
        xpath(Result, //a(@href=HREF0, text), Title),
        uri_components(HREF0, Components),
        uri_data(search, Components, Query),
        uri_query_components(Query, Parts),
        memberchk(q=HREF, Parts).

An example query is below:

?- google(prolog, Title, HREF).
Title = 'SWI-Prolog',
HREF = 'http://www.swi-prolog.org/' ;
Title = 'Prolog - Wikipedia',
HREF = 'https://nl.wikipedia.org/wiki/Prolog' ;
Title = 'Prolog - Wikipedia, the free encyclopedia',
HREF = 'https://en.wikipedia.org/wiki/Prolog' ;
Title = 'Pro-Log is logistiek dienstverlener m.b.t. vervoer over water.',
HREF = 'http://www.pro-log.nl/' ;
Title = 'Learn Prolog Now!',
HREF = 'http://www.learnprolognow.org/' ;
Title = 'Free Online Version - Learn Prolog
...
See also
- load_html/3 and xpath/3 can be used to parse and navigate HTML documents.
-
http_get/3 and http_post/4 provide an alternative interface that convert the reply depending on the Content-Type header. */
  163:- multifile
  164    http:encoding_filter/3,           % +Encoding, +In0, -In
  165    http:current_transfer_encoding/1, % ?Encoding
  166    http:disable_encoding_filter/1,   % +ContentType
  167    http:http_protocol_hook/5,        % +Protocol, +Parts, +StreamPair,
  168                                      % -NewStreamPair, +Options
  169    http:open_options/2,              % +Parts, -Options
  170    http:write_cookies/3,             % +Out, +Parts, +Options
  171    http:update_cookies/3,            % +CookieLine, +Parts, +Options
  172    http:authenticate_client/2,       % +URL, +Action
  173    http:http_connection_over_proxy/6.  174
  175:- meta_predicate
  176    http_open(+,-,:).  177
  178:- predicate_options(http_open/3, 3,
  179                     [ authorization(compound),
  180                       final_url(-atom),
  181                       header(+atom, -atom),
  182                       headers(-list),
  183                       connection(+atom),
  184                       method(oneof([delete,get,put,head,post,patch,options])),
  185                       size(-integer),
  186                       status_code(-integer),
  187                       output(-stream),
  188                       timeout(number),
  189                       proxy(atom, integer),
  190                       proxy_authorization(compound),
  191                       bypass_proxy(boolean),
  192                       request_header(any),
  193                       user_agent(atom),
  194                       version(-compound),
  195        % The option below applies if library(http/http_header) is loaded
  196                       post(any),
  197        % The options below apply if library(http/http_ssl_plugin)) is loaded
  198                       pem_password_hook(callable),
  199                       cacert_file(atom),
  200                       cert_verify_hook(callable)
  201                     ]).
 user_agent(-Agent) is det
Default value for User-Agent, can be overruled using the option user_agent(Agent) of http_open/3.
  208user_agent('SWI-Prolog').
 http_open(+URL, -Stream, +Options) is det
Open the data at the HTTP server as a Prolog stream. URL is either an atom specifying a URL or a list representing a broken-down URL as specified below. After this predicate succeeds the data can be read from Stream. After completion this stream must be closed using the built-in Prolog predicate close/1. Options provides additional options:
authenticate(+Boolean)
If false (default true), do not try to automatically authenticate the client if a 401 (Unauthorized) status code is received.
authorization(+Term)
Send authorization. See also http_set_authorization/2. Supported schemes:
basic(+User, +Password)
HTTP Basic authentication.
bearer(+Token)
HTTP Bearer authentication.
digest(+User, +Password)
HTTP Digest authentication. This option is only provided if the plugin library(http/http_digest) is also loaded.
connection(+Connection)
Specify the Connection header. Default is close. The alternative is Keep-alive. This maintains a pool of available connections as determined by keep_connection/1. The library(http/websockets) uses Keep-alive, Upgrade. Keep-alive connections can be closed explicitly using http_close_keep_alive/1. Keep-alive connections may significantly improve repetitive requests on the same server, especially if the IP route is long, HTTPS is used or the connection uses a proxy.
final_url(-FinalURL)
Unify FinalURL with the final destination. This differs from the original URL if the returned head of the original indicates an HTTP redirect (codes 301, 302 or 303). Without a redirect, FinalURL is the same as URL if URL is an atom, or a URL constructed from the parts.
header(Name, -AtomValue)
If provided, AtomValue is unified with the value of the indicated field in the reply header. Name is matched case-insensitive and the underscore (_) matches the hyphen (-). Multiple of these options may be provided to extract multiple header fields. If the header is not available AtomValue is unified to the empty atom ('').
headers(-List)
If provided, List is unified with a list of Name(Value) pairs corresponding to fields in the reply header. Name and Value follow the same conventions used by the header(Name,Value) option.
method(+Method)
One of get (default), head, delete, post, put or patch. The head message can be used in combination with the header(Name, Value) option to access information on the resource without actually fetching the resource itself. The returned stream must be closed immediately.

If post(Data) is provided, the default is post.

size(-Size)
Size is unified with the integer value of Content-Length in the reply header.
version(-Version)
Version is a pair Major-Minor, where Major and Minor are integers representing the HTTP version in the reply header.
range(+Range)
Ask for partial content. Range is a term Unit(From,To), where From is an integer and To is either an integer or the atom end. HTTP 1.1 only supports Unit = bytes. E.g., to ask for bytes 1000-1999, use the option range(bytes(1000,1999))
redirect(+Boolean)
If false (default true), do not automatically redirect if a 3XX code is received. Must be combined with status_code(Code) and one of the header options to read the redirect reply. In particular, without status_code(Code) a redirect is mapped to an exception.
status_code(-Code)
If this option is present and Code unifies with the HTTP status code, do not translate errors (4xx, 5xx) into an exception. Instead, http_open/3 behaves as if 2xx (success) is returned, providing the application to read the error document from the returned stream.
output(-Out)
Unify the output stream with Out and do not close it. This can be used to upgrade a connection.
timeout(+Timeout)
If provided, set a timeout on the stream using set_stream/2. With this option if no new data arrives within Timeout seconds the stream raises an exception. Default is to wait forever (infinite).
post(+Data)
Issue a POST request on the HTTP server. Data is handed to http_post_data/3.
proxy(+Host:Port)
Use an HTTP proxy to connect to the outside world. See also socket:proxy_for_url/3. This option overrules the proxy specification defined by socket:proxy_for_url/3.
proxy(+Host, +Port)
Synonym for proxy(+Host:Port). Deprecated.
proxy_authorization(+Authorization)
Send authorization to the proxy. Otherwise the same as the authorization option.
bypass_proxy(+Boolean)
If true, bypass proxy hooks. Default is false.
request_header(Name=Value)
Additional name-value parts are added in the order of appearance to the HTTP request header. No interpretation is done.
max_redirect(+Max)
Sets the maximum length of a redirection chain. This is needed for some IRIs that redirect indefinitely to other IRIs without looping (e.g., redirecting to IRIs with a random element in them). Max must be either a non-negative integer or the atom infinite. The default value is 10.
user_agent(+Agent)
Defines the value of the User-Agent field of the HTTP header. Default is SWI-Prolog.

The hook http:open_options/2 can be used to provide default options based on the broken-down URL. The option status_code(-Code) is particularly useful to query REST interfaces that commonly return status codes other than 200 that need to be be processed by the client code.

Arguments:
URL- is either an atom or string (url) or a list of parts.

When provided, this list may contain the fields scheme, user, password, host, port, path and either query_string (whose argument is an atom) or search (whose argument is a list of Name(Value) or Name=Value compound terms). Only host is mandatory. The example below opens the URL http://www.example.com/my/path?q=Hello%20World&lang=en. Note that values must not be quoted because the library inserts the required quotes.

http_open([ host('www.example.com'),
            path('/my/path'),
            search([ q='Hello world',
                     lang=en
                   ])
          ])
throws
- error(existence_error(url, Id),Context) is raised if the HTTP result code is not in the range 200..299. Context has the shape context(Message, status(Code, TextCode)), where Code is the numeric HTTP code and TextCode is the textual description thereof provided by the server. Message may provide additional details or may be unbound.
See also
- ssl_context/3 for SSL related options if library(http/http_ssl_plugin) is loaded.
  390:- multifile
  391    socket:proxy_for_url/3.           % +URL, +Host, -ProxyList
  392
  393http_open(URL, Stream, QOptions) :-
  394    meta_options(is_meta, QOptions, Options0),
  395    (   atomic(URL)
  396    ->  parse_url_ex(URL, Parts)
  397    ;   Parts = URL
  398    ),
  399    autoload_https(Parts),
  400    upgrade_ssl_options(Parts, Options0, Options),
  401    add_authorization(Parts, Options, Options1),
  402    findall(HostOptions, hooked_options(Parts, HostOptions), AllHostOptions),
  403    foldl(merge_options_rev, AllHostOptions, Options1, Options2),
  404    (   option(bypass_proxy(true), Options)
  405    ->  try_http_proxy(direct, Parts, Stream, Options2)
  406    ;   term_variables(Options2, Vars2),
  407        findall(Result-Vars2,
  408                try_a_proxy(Parts, Result, Options2),
  409                ResultList),
  410        last(ResultList, Status-Vars2)
  411    ->  (   Status = true(_Proxy, Stream)
  412        ->  true
  413        ;   throw(error(proxy_error(tried(ResultList)), _))
  414        )
  415    ;   try_http_proxy(direct, Parts, Stream, Options2)
  416    ).
  417
  418try_a_proxy(Parts, Result, Options) :-
  419    parts_uri(Parts, AtomicURL),
  420    option(host(Host), Parts),
  421    (   (   option(proxy(ProxyHost:ProxyPort), Options)
  422        ;   is_list(Options),
  423            memberchk(proxy(ProxyHost,ProxyPort), Options)
  424        )
  425    ->  Proxy = proxy(ProxyHost, ProxyPort)
  426    ;   socket:proxy_for_url(AtomicURL, Host, Proxy)
  427    ),
  428    debug(http(proxy),
  429          'http_open: Connecting via ~w to ~w', [Proxy, AtomicURL]),
  430    (   catch(try_http_proxy(Proxy, Parts, Stream, Options), E, true)
  431    ->  (   var(E)
  432        ->  !, Result = true(Proxy, Stream)
  433        ;   Result = error(Proxy, E)
  434        )
  435    ;   Result = false(Proxy)
  436    ),
  437    debug(http(proxy), 'http_open: ~w: ~p', [Proxy, Result]).
  438
  439try_http_proxy(Method, Parts, Stream, Options0) :-
  440    option(host(Host), Parts),
  441    (   Method == direct
  442    ->  parts_request_uri(Parts, RequestURI)
  443    ;   parts_uri(Parts, RequestURI)
  444    ),
  445    select_option(visited(Visited0), Options0, OptionsV, []),
  446    Options = [visited([Parts|Visited0])|OptionsV],
  447    parts_scheme(Parts, Scheme),
  448    default_port(Scheme, DefPort),
  449    url_part(port(Port), Parts, DefPort),
  450    host_and_port(Host, DefPort, Port, HostPort),
  451    (   option(connection(Connection), Options0),
  452        keep_alive(Connection),
  453        get_from_pool(Host:Port, StreamPair),
  454        debug(http(connection), 'Trying Keep-alive to ~p using ~p',
  455              [ Host:Port, StreamPair ]),
  456        catch(send_rec_header(StreamPair, Stream, HostPort,
  457                              RequestURI, Parts, Options),
  458              error(E,_),
  459              keep_alive_error(E))
  460    ->  true
  461    ;   http:http_connection_over_proxy(Method, Parts, Host:Port,
  462                                        SocketStreamPair, Options, Options1),
  463        (   catch(http:http_protocol_hook(Scheme, Parts,
  464                                          SocketStreamPair,
  465                                          StreamPair, Options),
  466                  Error,
  467                  ( close(SocketStreamPair, [force(true)]),
  468                    throw(Error)))
  469        ->  true
  470        ;   StreamPair = SocketStreamPair
  471        ),
  472        send_rec_header(StreamPair, Stream, HostPort,
  473                        RequestURI, Parts, Options1)
  474    ),
  475    return_final_url(Options).
  476
  477http:http_connection_over_proxy(direct, _, Host:Port,
  478                                StreamPair, Options, Options) :-
  479    !,
  480    open_socket(Host:Port, StreamPair, Options).
  481http:http_connection_over_proxy(proxy(ProxyHost, ProxyPort), Parts, _,
  482                                StreamPair, Options, Options) :-
  483    \+ ( memberchk(scheme(Scheme), Parts),
  484         secure_scheme(Scheme)
  485       ),
  486    !,
  487    % We do not want any /more/ proxy after this
  488    open_socket(ProxyHost:ProxyPort, StreamPair,
  489                [bypass_proxy(true)|Options]).
  490http:http_connection_over_proxy(socks(SocksHost, SocksPort), _Parts, Host:Port,
  491                                StreamPair, Options, Options) :-
  492    !,
  493    tcp_connect(SocksHost:SocksPort, StreamPair, [bypass_proxy(true)]),
  494    catch(negotiate_socks_connection(Host:Port, StreamPair),
  495          Error,
  496          ( close(StreamPair, [force(true)]),
  497            throw(Error)
  498          )).
 hooked_options(+Parts, -Options) is nondet
Calls http:open_options/2 and if necessary upgrades old SSL cacerts_file(File) option to a cacerts(List) option to ensure proper merging of options.
  506hooked_options(Parts, Options) :-
  507    http:open_options(Parts, Options0),
  508    upgrade_ssl_options(Parts, Options0, Options).
  509
  510:- if(current_predicate(ssl_upgrade_legacy_options/2)).  511upgrade_ssl_options(Parts, Options0, Options) :-
  512    requires_ssl(Parts),
  513    !,
  514    ssl_upgrade_legacy_options(Options0, Options).
  515:- endif.  516upgrade_ssl_options(_, Options, Options).
  517
  518merge_options_rev(Old, New, Merged) :-
  519    merge_options(New, Old, Merged).
  520
  521is_meta(pem_password_hook).             % SSL plugin callbacks
  522is_meta(cert_verify_hook).
  523
  524
  525http:http_protocol_hook(http, _, StreamPair, StreamPair, _).
  526
  527default_port(https, 443) :- !.
  528default_port(wss,   443) :- !.
  529default_port(_,     80).
  530
  531host_and_port(Host, DefPort, DefPort, Host) :- !.
  532host_and_port(Host, _,       Port,    Host:Port).
 autoload_https(+Parts) is det
If the requested scheme is https or wss, load the HTTPS plugin.
  538autoload_https(Parts) :-
  539    requires_ssl(Parts),
  540    memberchk(scheme(S), Parts),
  541    \+ clause(http:http_protocol_hook(S, _, StreamPair, StreamPair, _),_),
  542    exists_source(library(http/http_ssl_plugin)),
  543    !,
  544    use_module(library(http/http_ssl_plugin)).
  545autoload_https(_).
  546
  547requires_ssl(Parts) :-
  548    memberchk(scheme(S), Parts),
  549    secure_scheme(S).
  550
  551secure_scheme(https).
  552secure_scheme(wss).
 send_rec_header(+StreamPair, -Stream, +Host, +RequestURI, +Parts, +Options) is det
Send header to Out and process reply. If there is an error or failure, close In and Out and return the error or failure.
  560send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  561    (   catch(guarded_send_rec_header(StreamPair, Stream,
  562                                      Host, RequestURI, Parts, Options),
  563              E, true)
  564    ->  (   var(E)
  565        ->  (   option(output(StreamPair), Options)
  566            ->  true
  567            ;   true
  568            )
  569        ;   close(StreamPair, [force(true)]),
  570            throw(E)
  571        )
  572    ;   close(StreamPair, [force(true)]),
  573        fail
  574    ).
  575
  576guarded_send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  577    user_agent(Agent, Options),
  578    method(Options, MNAME),
  579    http_version(Version),
  580    option(connection(Connection), Options, close),
  581    debug(http(send_request), "> ~w ~w HTTP/~w", [MNAME, RequestURI, Version]),
  582    debug(http(send_request), "> Host: ~w", [Host]),
  583    debug(http(send_request), "> User-Agent: ~w", [Agent]),
  584    debug(http(send_request), "> Connection: ~w", [Connection]),
  585    format(StreamPair,
  586           '~w ~w HTTP/~w\r\n\c
  587               Host: ~w\r\n\c
  588               User-Agent: ~w\r\n\c
  589               Connection: ~w\r\n',
  590           [MNAME, RequestURI, Version, Host, Agent, Connection]),
  591    parts_uri(Parts, URI),
  592    x_headers(Options, URI, StreamPair),
  593    write_cookies(StreamPair, Parts, Options),
  594    (   option(post(PostData), Options)
  595    ->  http_post_data(PostData, StreamPair, [])
  596    ;   format(StreamPair, '\r\n', [])
  597    ),
  598    flush_output(StreamPair),
  599                                    % read the reply header
  600    read_header(StreamPair, Parts, ReplyVersion, Code, Comment, Lines),
  601    update_cookies(Lines, Parts, Options),
  602    do_open(ReplyVersion, Code, Comment, Lines, Options, Parts, Host,
  603            StreamPair, Stream).
 http_version(-Version:atom) is det
HTTP version we publish. We can only use 1.1 if we support chunked encoding.
  611http_version('1.1') :-
  612    http:current_transfer_encoding(chunked),
  613    !.
  614http_version('1.0').
  615
  616method(Options, MNAME) :-
  617    option(post(_), Options),
  618    !,
  619    option(method(M), Options, post),
  620    (   map_method(M, MNAME0)
  621    ->  MNAME = MNAME0
  622    ;   domain_error(method, M)
  623    ).
  624method(Options, MNAME) :-
  625    option(method(M), Options, get),
  626    (   map_method(M, MNAME0)
  627    ->  MNAME = MNAME0
  628    ;   map_method(_, M)
  629    ->  MNAME = M
  630    ;   domain_error(method, M)
  631    ).
  632
  633map_method(delete,  'DELETE').
  634map_method(get,     'GET').
  635map_method(head,    'HEAD').
  636map_method(post,    'POST').
  637map_method(put,     'PUT').
  638map_method(patch,   'PATCH').
  639map_method(options, 'OPTIONS').
 x_headers(+Options, +URI, +Out) is det
Emit extra headers from request_header(Name=Value) options in Options.
To be done
- Use user/password fields
  648x_headers(Options, URI, Out) :-
  649    x_headers_(Options, [url(URI)|Options], Out).
  650
  651x_headers_([], _, _).
  652x_headers_([H|T], Options, Out) :-
  653    x_header(H, Options, Out),
  654    x_headers_(T, Options, Out).
  655
  656x_header(request_header(Name=Value), _, Out) :-
  657    !,
  658    debug(http(send_request), "> ~w: ~w", [Name, Value]),
  659    format(Out, '~w: ~w\r\n', [Name, Value]).
  660x_header(proxy_authorization(ProxyAuthorization), Options, Out) :-
  661    !,
  662    auth_header(ProxyAuthorization, Options, 'Proxy-Authorization', Out).
  663x_header(authorization(Authorization), Options, Out) :-
  664    !,
  665    auth_header(Authorization, Options, 'Authorization', Out).
  666x_header(range(Spec), _, Out) :-
  667    !,
  668    Spec =.. [Unit, From, To],
  669    (   To == end
  670    ->  ToT = ''
  671    ;   must_be(integer, To),
  672        ToT = To
  673    ),
  674    debug(http(send_request), "> Range: ~w=~d-~w", [Unit, From, ToT]),
  675    format(Out, 'Range: ~w=~d-~w\r\n', [Unit, From, ToT]).
  676x_header(_, _, _).
 auth_header(+AuthOption, +Options, +HeaderName, +Out)
  680auth_header(basic(User, Password), _, Header, Out) :-
  681    !,
  682    format(codes(Codes), '~w:~w', [User, Password]),
  683    phrase(base64(Codes), Base64Codes),
  684    debug(http(send_request), "> ~w: Basic ~s", [Header, Base64Codes]),
  685    format(Out, '~w: Basic ~s\r\n', [Header, Base64Codes]).
  686auth_header(bearer(Token), _, Header, Out) :-
  687    !,
  688    debug(http(send_request), "> ~w: Bearer ~w", [Header,Token]),
  689    format(Out, '~w: Bearer ~w\r\n', [Header, Token]).
  690auth_header(Auth, Options, _, Out) :-
  691    option(url(URL), Options),
  692    add_method(Options, Options1),
  693    http:authenticate_client(URL, send_auth_header(Auth, Out, Options1)),
  694    !.
  695auth_header(Auth, _, _, _) :-
  696    domain_error(authorization, Auth).
  697
  698user_agent(Agent, Options) :-
  699    (   option(user_agent(Agent), Options)
  700    ->  true
  701    ;   user_agent(Agent)
  702    ).
  703
  704add_method(Options0, Options) :-
  705    option(method(_), Options0),
  706    !,
  707    Options = Options0.
  708add_method(Options0, Options) :-
  709    option(post(_), Options0),
  710    !,
  711    Options = [method(post)|Options0].
  712add_method(Options0, [method(get)|Options0]).
 do_open(+HTTPVersion, +HTTPStatusCode, +HTTPStatusComment, +Header, +Options, +Parts, +Host, +In, -FinalIn) is det
Handle the HTTP status once available. If 200-299, we are ok. If a redirect, redo the open, returning a new stream. Else issue an error.
Errors
- existence_error(url, URL)
  723                                        % Redirections
  724do_open(_, Code, _, Lines, Options0, Parts, _, In, Stream) :-
  725    redirect_code(Code),
  726    option(redirect(true), Options0, true),
  727    location(Lines, RequestURI),
  728    !,
  729    debug(http(redirect), 'http_open: redirecting to ~w', [RequestURI]),
  730    close(In),
  731    parts_uri(Parts, Base),
  732    uri_resolve(RequestURI, Base, Redirected),
  733    parse_url_ex(Redirected, RedirectedParts),
  734    (   redirect_limit_exceeded(Options0, Max)
  735    ->  format(atom(Comment), 'max_redirect (~w) limit exceeded', [Max]),
  736        throw(error(permission_error(redirect, http, Redirected),
  737                    context(_, Comment)))
  738    ;   redirect_loop(RedirectedParts, Options0)
  739    ->  throw(error(permission_error(redirect, http, Redirected),
  740                    context(_, 'Redirection loop')))
  741    ;   true
  742    ),
  743    redirect_options(Options0, Options),
  744    http_open(RedirectedParts, Stream, Options).
  745                                        % Need authentication
  746do_open(_Version, Code, _Comment, Lines, Options0, Parts, _Host, In0, Stream) :-
  747    authenticate_code(Code),
  748    option(authenticate(true), Options0, true),
  749    parts_uri(Parts, URI),
  750    parse_headers(Lines, Headers),
  751    http:authenticate_client(
  752             URI,
  753             auth_reponse(Headers, Options0, Options)),
  754    !,
  755    close(In0),
  756    http_open(Parts, Stream, Options).
  757                                        % Accepted codes
  758do_open(Version, Code, _, Lines, Options, Parts, Host, In0, In) :-
  759    (   option(status_code(Code), Options),
  760        Lines \== []
  761    ->  true
  762    ;   successful_code(Code)
  763    ),
  764    !,
  765    parts_uri(Parts, URI),
  766    parse_headers(Lines, Headers),
  767    return_version(Options, Version),
  768    return_size(Options, Headers),
  769    return_fields(Options, Headers),
  770    return_headers(Options, Headers),
  771    consider_keep_alive(Lines, Parts, Host, In0, In1, Options),
  772    transfer_encoding_filter(Lines, In1, In),
  773                                    % properly re-initialise the stream
  774    set_stream(In, file_name(URI)),
  775    set_stream(In, record_position(true)).
  776do_open(_, _, _, [], Options, _, _, _, _) :-
  777    option(connection(Connection), Options),
  778    keep_alive(Connection),
  779    !,
  780    throw(error(keep_alive(closed),_)).
  781                                        % report anything else as error
  782do_open(_Version, Code, Comment, _,  _, Parts, _, _, _) :-
  783    parts_uri(Parts, URI),
  784    (   map_error_code(Code, Error)
  785    ->  Formal =.. [Error, url, URI]
  786    ;   Formal = existence_error(url, URI)
  787    ),
  788    throw(error(Formal, context(_, status(Code, Comment)))).
  789
  790
  791successful_code(Code) :-
  792    between(200, 299, Code).
 redirect_limit_exceeded(+Options:list(compound), -Max:nonneg) is semidet
True if we have exceeded the maximum redirection length (default 10).
  798redirect_limit_exceeded(Options, Max) :-
  799    option(visited(Visited), Options, []),
  800    length(Visited, N),
  801    option(max_redirect(Max), Options, 10),
  802    (Max == infinite -> fail ; N > Max).
 redirect_loop(+Parts, +Options) is semidet
True if we are in a redirection loop. Note that some sites redirect once to the same place using cookies or similar, so we allow for two tries. In fact, we should probably test whether authorization or cookie headers have changed.
  812redirect_loop(Parts, Options) :-
  813    option(visited(Visited), Options, []),
  814    include(==(Parts), Visited, Same),
  815    length(Same, Count),
  816    Count > 2.
 redirect_options(+Options0, -Options) is det
A redirect from a POST should do a GET on the returned URI. This means we must remove the method(post) and post(Data) options from the original option-list.
  825redirect_options(Options0, Options) :-
  826    (   select_option(post(_), Options0, Options1)
  827    ->  true
  828    ;   Options1 = Options0
  829    ),
  830    (   select_option(method(Method), Options1, Options),
  831        \+ redirect_method(Method)
  832    ->  true
  833    ;   Options = Options1
  834    ).
  835
  836redirect_method(delete).
  837redirect_method(get).
  838redirect_method(head).
 map_error_code(+HTTPCode, -PrologError) is semidet
Map HTTP error codes to Prolog errors.
To be done
- Many more maps. Unfortunately many have no sensible Prolog counterpart.
  848map_error_code(401, permission_error).
  849map_error_code(403, permission_error).
  850map_error_code(404, existence_error).
  851map_error_code(405, permission_error).
  852map_error_code(407, permission_error).
  853map_error_code(410, existence_error).
  854
  855redirect_code(301).                     % Moved Permanently
  856redirect_code(302).                     % Found (previously "Moved Temporary")
  857redirect_code(303).                     % See Other
  858redirect_code(307).                     % Temporary Redirect
  859
  860authenticate_code(401).
 open_socket(+Address, -StreamPair, +Options) is det
Create and connect a client socket to Address. Options
timeout(+Timeout)
Sets timeout on the stream, after connecting the socket.
To be done
- Make timeout also work on tcp_connect/4.
- This is the same as do_connect/4 in http_client.pl
  873open_socket(Address, StreamPair, Options) :-
  874    debug(http(open), 'http_open: Connecting to ~p ...', [Address]),
  875    tcp_connect(Address, StreamPair, Options),
  876    stream_pair(StreamPair, In, Out),
  877    debug(http(open), '\tok ~p ---> ~p', [In, Out]),
  878    set_stream(In, record_position(false)),
  879    (   option(timeout(Timeout), Options)
  880    ->  set_stream(In, timeout(Timeout))
  881    ;   true
  882    ).
  883
  884
  885return_version(Options, Major-Minor) :-
  886    option(version(Major-Minor), Options, _).
  887
  888return_size(Options, Headers) :-
  889    (   memberchk(content_length(Size), Headers)
  890    ->  option(size(Size), Options, _)
  891    ;   true
  892    ).
  893
  894return_fields([], _).
  895return_fields([header(Name, Value)|T], Headers) :-
  896    !,
  897    (   Term =.. [Name,Value],
  898        memberchk(Term, Headers)
  899    ->  true
  900    ;   Value = ''
  901    ),
  902    return_fields(T, Headers).
  903return_fields([_|T], Lines) :-
  904    return_fields(T, Lines).
  905
  906return_headers(Options, Headers) :-
  907    option(headers(Headers), Options, _).
 parse_headers(+Lines, -Headers:list(compound)) is det
Parse the header lines for the headers(-List) option. Invalid header lines are skipped, printing a warning using pring_message/2.
  915parse_headers([], []) :- !.
  916parse_headers([Line|Lines], Headers) :-
  917    catch(http_parse_header(Line, [Header]), Error, true),
  918    (   var(Error)
  919    ->  Headers = [Header|More]
  920    ;   print_message(warning, Error),
  921        Headers = More
  922    ),
  923    parse_headers(Lines, More).
 return_final_url(+Options) is semidet
If Options contains final_url(URL), unify URL with the final URL after redirections.
  931return_final_url(Options) :-
  932    option(final_url(URL), Options),
  933    var(URL),
  934    !,
  935    option(visited([Parts|_]), Options),
  936    parts_uri(Parts, URL).
  937return_final_url(_).
 transfer_encoding_filter(+Lines, +In0, -In) is det
Install filters depending on the transfer encoding. If In0 is a stream-pair, we close the output side. If transfer-encoding is not specified, the content-encoding is interpreted as a synonym for transfer-encoding, because many servers incorrectly depend on this. Exceptions to this are content-types for which disable_encoding_filter/1 holds.
  949transfer_encoding_filter(Lines, In0, In) :-
  950    transfer_encoding(Lines, Encoding),
  951    !,
  952    transfer_encoding_filter_(Encoding, In0, In).
  953transfer_encoding_filter(Lines, In0, In) :-
  954    content_encoding(Lines, Encoding),
  955    content_type(Lines, Type),
  956    \+ http:disable_encoding_filter(Type),
  957    !,
  958    transfer_encoding_filter_(Encoding, In0, In).
  959transfer_encoding_filter(_, In, In).
  960
  961transfer_encoding_filter_(Encoding, In0, In) :-
  962    stream_pair(In0, In1, Out),
  963    (   nonvar(Out)
  964    ->  close(Out)
  965    ;   true
  966    ),
  967    (   http:encoding_filter(Encoding, In1, In)
  968    ->  true
  969    ;   autoload_encoding(Encoding),
  970        http:encoding_filter(Encoding, In1, In)
  971    ->  true
  972    ;   domain_error(http_encoding, Encoding)
  973    ).
  974
  975:- multifile
  976    autoload_encoding/1.  977
  978:- if(exists_source(library(zlib))).  979autoload_encoding(gzip) :-
  980    use_module(library(zlib)).
  981:- endif.  982
  983content_type(Lines, Type) :-
  984    member(Line, Lines),
  985    phrase(field('content-type'), Line, Rest),
  986    !,
  987    atom_codes(Type, Rest).
 http:disable_encoding_filter(+ContentType) is semidet
Do not use the Content-encoding as Transfer-encoding encoding for specific values of ContentType. This predicate is multifile and can thus be extended by the user.
  995http:disable_encoding_filter('application/x-gzip').
  996http:disable_encoding_filter('application/x-tar').
  997http:disable_encoding_filter('x-world/x-vrml').
  998http:disable_encoding_filter('application/zip').
  999http:disable_encoding_filter('application/x-gzip').
 1000http:disable_encoding_filter('application/x-zip-compressed').
 1001http:disable_encoding_filter('application/x-compress').
 1002http:disable_encoding_filter('application/x-compressed').
 1003http:disable_encoding_filter('application/x-spoon').
 transfer_encoding(+Lines, -Encoding) is semidet
True if Encoding is the value of the Transfer-encoding header.
 1010transfer_encoding(Lines, Encoding) :-
 1011    what_encoding(transfer_encoding, Lines, Encoding).
 1012
 1013what_encoding(What, Lines, Encoding) :-
 1014    member(Line, Lines),
 1015    phrase(encoding_(What, Debug), Line, Rest),
 1016    !,
 1017    atom_codes(Encoding, Rest),
 1018    debug(http(What), '~w: ~p', [Debug, Rest]).
 1019
 1020encoding_(content_encoding, 'Content-encoding') -->
 1021    field('content-encoding').
 1022encoding_(transfer_encoding, 'Transfer-encoding') -->
 1023    field('transfer-encoding').
 content_encoding(+Lines, -Encoding) is semidet
True if Encoding is the value of the Content-encoding header.
 1030content_encoding(Lines, Encoding) :-
 1031    what_encoding(content_encoding, Lines, Encoding).
 read_header(+In:stream, +Parts, -Version, -Code:int, -Comment:atom, -Lines:list) is det
Read the HTTP reply-header. If the reply is completely empty an existence error is thrown. If the replied header is otherwise invalid a 500 HTTP error is simulated, having the comment Invalid reply header.
Arguments:
Parts- A list of compound terms that describe the parsed request URI.
Version- HTTP reply version as Major-Minor pair
Code- Numeric HTTP reply-code
Comment- Comment of reply-code as atom
Lines- Remaining header lines as code-lists.
Errors
- existence_error(http_reply, Uri)
 1050read_header(In, Parts, Major-Minor, Code, Comment, Lines) :-
 1051    read_line_to_codes(In, Line),
 1052    (   Line == end_of_file
 1053    ->  parts_uri(Parts, Uri),
 1054        existence_error(http_reply,Uri)
 1055    ;   true
 1056    ),
 1057    Line \== end_of_file,
 1058    phrase(first_line(Major-Minor, Code, Comment), Line),
 1059    debug(http(open), 'HTTP/~d.~d ~w ~w', [Major, Minor, Code, Comment]),
 1060    read_line_to_codes(In, Line2),
 1061    rest_header(Line2, In, Lines),
 1062    !,
 1063    (   debugging(http(open))
 1064    ->  forall(member(HL, Lines),
 1065               debug(http(open), '~s', [HL]))
 1066    ;   true
 1067    ).
 1068read_header(_, _, 1-1, 500, 'Invalid reply header', []).
 1069
 1070rest_header([], _, []) :- !.            % blank line: end of header
 1071rest_header(L0, In, [L0|L]) :-
 1072    read_line_to_codes(In, L1),
 1073    rest_header(L1, In, L).
 content_length(+Header, -Length:int) is semidet
Find the Content-Length in an HTTP reply-header.
 1079content_length(Lines, Length) :-
 1080    member(Line, Lines),
 1081    phrase(content_length(Length0), Line),
 1082    !,
 1083    Length = Length0.
 1084
 1085location(Lines, RequestURI) :-
 1086    member(Line, Lines),
 1087    phrase(atom_field(location, RequestURI), Line),
 1088    !.
 1089
 1090connection(Lines, Connection) :-
 1091    member(Line, Lines),
 1092    phrase(atom_field(connection, Connection0), Line),
 1093    !,
 1094    Connection = Connection0.
 1095
 1096first_line(Major-Minor, Code, Comment) -->
 1097    "HTTP/", integer(Major), ".", integer(Minor),
 1098    skip_blanks,
 1099    integer(Code),
 1100    skip_blanks,
 1101    rest(Comment).
 1102
 1103atom_field(Name, Value) -->
 1104    field(Name),
 1105    rest(Value).
 1106
 1107content_length(Len) -->
 1108    field('content-length'),
 1109    integer(Len).
 1110
 1111field(Name) -->
 1112    { atom_codes(Name, Codes) },
 1113    field_codes(Codes).
 1114
 1115field_codes([]) -->
 1116    ":",
 1117    skip_blanks.
 1118field_codes([H|T]) -->
 1119    [C],
 1120    { match_header_char(H, C)
 1121    },
 1122    field_codes(T).
 1123
 1124match_header_char(C, C) :- !.
 1125match_header_char(C, U) :-
 1126    code_type(C, to_lower(U)),
 1127    !.
 1128match_header_char(0'_, 0'-).
 1129
 1130
 1131skip_blanks -->
 1132    [C],
 1133    { code_type(C, white)
 1134    },
 1135    !,
 1136    skip_blanks.
 1137skip_blanks -->
 1138    [].
 integer(-Int)//
Read 1 or more digits and return as integer.
 1144integer(Code) -->
 1145    digit(D0),
 1146    digits(D),
 1147    { number_codes(Code, [D0|D])
 1148    }.
 1149
 1150digit(C) -->
 1151    [C],
 1152    { code_type(C, digit)
 1153    }.
 1154
 1155digits([D0|D]) -->
 1156    digit(D0),
 1157    !,
 1158    digits(D).
 1159digits([]) -->
 1160    [].
 rest(-Atom:atom)//
Get rest of input as an atom.
 1166rest(Atom) --> call(rest_(Atom)).
 1167
 1168rest_(Atom, L, []) :-
 1169    atom_codes(Atom, L).
 1170
 1171
 1172                 /*******************************
 1173                 *   AUTHORIZATION MANAGEMENT   *
 1174                 *******************************/
 http_set_authorization(+URL, +Authorization) is det
Set user/password to supply with URLs that have URL as prefix. If Authorization is the atom -, possibly defined authorization is cleared. For example:
?- http_set_authorization('http://www.example.com/private/',
                          basic('John', 'Secret'))
To be done
- Move to a separate module, so http_get/3, etc. can use this too.
 1190:- dynamic
 1191    stored_authorization/2,
 1192    cached_authorization/2. 1193
 1194http_set_authorization(URL, Authorization) :-
 1195    must_be(atom, URL),
 1196    retractall(stored_authorization(URL, _)),
 1197    (   Authorization = (-)
 1198    ->  true
 1199    ;   check_authorization(Authorization),
 1200        assert(stored_authorization(URL, Authorization))
 1201    ),
 1202    retractall(cached_authorization(_,_)).
 1203
 1204check_authorization(Var) :-
 1205    var(Var),
 1206    !,
 1207    instantiation_error(Var).
 1208check_authorization(basic(User, Password)) :-
 1209    must_be(atom, User),
 1210    must_be(text, Password).
 1211check_authorization(digest(User, Password)) :-
 1212    must_be(atom, User),
 1213    must_be(text, Password).
 authorization(+URL, -Authorization) is semidet
True if Authorization must be supplied for URL.
To be done
- Cleanup cache if it gets too big.
 1221authorization(_, _) :-
 1222    \+ stored_authorization(_, _),
 1223    !,
 1224    fail.
 1225authorization(URL, Authorization) :-
 1226    cached_authorization(URL, Authorization),
 1227    !,
 1228    Authorization \== (-).
 1229authorization(URL, Authorization) :-
 1230    (   stored_authorization(Prefix, Authorization),
 1231        sub_atom(URL, 0, _, _, Prefix)
 1232    ->  assert(cached_authorization(URL, Authorization))
 1233    ;   assert(cached_authorization(URL, -)),
 1234        fail
 1235    ).
 1236
 1237add_authorization(_, Options, Options) :-
 1238    option(authorization(_), Options),
 1239    !.
 1240add_authorization(Parts, Options0, Options) :-
 1241    url_part(user(User), Parts),
 1242    url_part(password(Passwd), Parts),
 1243    !,
 1244    Options = [authorization(basic(User,Passwd))|Options0].
 1245add_authorization(Parts, Options0, Options) :-
 1246    stored_authorization(_, _) ->   % quick test to avoid work
 1247    parts_uri(Parts, URL),
 1248    authorization(URL, Auth),
 1249    !,
 1250    Options = [authorization(Auth)|Options0].
 1251add_authorization(_, Options, Options).
 parse_url_ex(+URL, -Parts)
Parts: Scheme, Host, Port, User:Password, RequestURI (no fragment).
 1259parse_url_ex(URL, [uri(URL)|Parts]) :-
 1260    uri_components(URL, Components),
 1261    phrase(components(Components), Parts),
 1262    (   option(host(_), Parts)
 1263    ->  true
 1264    ;   domain_error(url, URL)
 1265    ).
 1266
 1267components(Components) -->
 1268    uri_scheme(Components),
 1269    uri_path(Components),
 1270    uri_authority(Components),
 1271    uri_request_uri(Components).
 1272
 1273uri_scheme(Components) -->
 1274    { uri_data(scheme, Components, Scheme), nonvar(Scheme) },
 1275    !,
 1276    [ scheme(Scheme)
 1277    ].
 1278uri_scheme(_) --> [].
 1279
 1280uri_path(Components) -->
 1281    { uri_data(path, Components, Path0), nonvar(Path0),
 1282      (   Path0 == ''
 1283      ->  Path = (/)
 1284      ;   Path = Path0
 1285      )
 1286    },
 1287    !,
 1288    [ path(Path)
 1289    ].
 1290uri_path(_) --> [].
 1291
 1292uri_authority(Components) -->
 1293    { uri_data(authority, Components, Auth), nonvar(Auth),
 1294      !,
 1295      uri_authority_components(Auth, Data)
 1296    },
 1297    [ authority(Auth) ],
 1298    auth_field(user, Data),
 1299    auth_field(password, Data),
 1300    auth_field(host, Data),
 1301    auth_field(port, Data).
 1302uri_authority(_) --> [].
 1303
 1304auth_field(Field, Data) -->
 1305    { uri_authority_data(Field, Data, EncValue), nonvar(EncValue),
 1306      !,
 1307      (   atom(EncValue)
 1308      ->  uri_encoded(query_value, Value, EncValue)
 1309      ;   Value = EncValue
 1310      ),
 1311      Part =.. [Field,Value]
 1312    },
 1313    [ Part ].
 1314auth_field(_, _) --> [].
 1315
 1316uri_request_uri(Components) -->
 1317    { uri_data(path, Components, Path0),
 1318      uri_data(search, Components, Search),
 1319      (   Path0 == ''
 1320      ->  Path = (/)
 1321      ;   Path = Path0
 1322      ),
 1323      uri_data(path, Components2, Path),
 1324      uri_data(search, Components2, Search),
 1325      uri_components(RequestURI, Components2)
 1326    },
 1327    [ request_uri(RequestURI)
 1328    ].
 parts_scheme(+Parts, -Scheme) is det
 parts_uri(+Parts, -URI) is det
 parts_request_uri(+Parts, -RequestURI) is det
 parts_search(+Parts, -Search) is det
 parts_authority(+Parts, -Authority) is semidet
 1336parts_scheme(Parts, Scheme) :-
 1337    url_part(scheme(Scheme), Parts),
 1338    !.
 1339parts_scheme(Parts, Scheme) :-          % compatibility with library(url)
 1340    url_part(protocol(Scheme), Parts),
 1341    !.
 1342parts_scheme(_, http).
 1343
 1344parts_authority(Parts, Auth) :-
 1345    url_part(authority(Auth), Parts),
 1346    !.
 1347parts_authority(Parts, Auth) :-
 1348    url_part(host(Host), Parts, _),
 1349    url_part(port(Port), Parts, _),
 1350    url_part(user(User), Parts, _),
 1351    url_part(password(Password), Parts, _),
 1352    uri_authority_components(Auth,
 1353                             uri_authority(User, Password, Host, Port)).
 1354
 1355parts_request_uri(Parts, RequestURI) :-
 1356    option(request_uri(RequestURI), Parts),
 1357    !.
 1358parts_request_uri(Parts, RequestURI) :-
 1359    url_part(path(Path), Parts, /),
 1360    ignore(parts_search(Parts, Search)),
 1361    uri_data(path, Data, Path),
 1362    uri_data(search, Data, Search),
 1363    uri_components(RequestURI, Data).
 1364
 1365parts_search(Parts, Search) :-
 1366    option(query_string(Search), Parts),
 1367    !.
 1368parts_search(Parts, Search) :-
 1369    option(search(Fields), Parts),
 1370    !,
 1371    uri_query_components(Search, Fields).
 1372
 1373
 1374parts_uri(Parts, URI) :-
 1375    option(uri(URI), Parts),
 1376    !.
 1377parts_uri(Parts, URI) :-
 1378    parts_scheme(Parts, Scheme),
 1379    ignore(parts_authority(Parts, Auth)),
 1380    parts_request_uri(Parts, RequestURI),
 1381    uri_components(RequestURI, Data),
 1382    uri_data(scheme, Data, Scheme),
 1383    uri_data(authority, Data, Auth),
 1384    uri_components(URI, Data).
 1385
 1386parts_port(Parts, Port) :-
 1387    parts_scheme(Parts, Scheme),
 1388    default_port(Scheme, DefPort),
 1389    url_part(port(Port), Parts, DefPort).
 1390
 1391url_part(Part, Parts) :-
 1392    Part =.. [Name,Value],
 1393    Gen =.. [Name,RawValue],
 1394    option(Gen, Parts),
 1395    !,
 1396    Value = RawValue.
 1397
 1398url_part(Part, Parts, Default) :-
 1399    Part =.. [Name,Value],
 1400    Gen =.. [Name,RawValue],
 1401    (   option(Gen, Parts)
 1402    ->  Value = RawValue
 1403    ;   Value = Default
 1404    ).
 1405
 1406
 1407                 /*******************************
 1408                 *            COOKIES           *
 1409                 *******************************/
 1410
 1411write_cookies(Out, Parts, Options) :-
 1412    http:write_cookies(Out, Parts, Options),
 1413    !.
 1414write_cookies(_, _, _).
 1415
 1416update_cookies(_, _, _) :-
 1417    predicate_property(http:update_cookies(_,_,_), number_of_clauses(0)),
 1418    !.
 1419update_cookies(Lines, Parts, Options) :-
 1420    (   member(Line, Lines),
 1421        phrase(atom_field('set_cookie', CookieData), Line),
 1422        http:update_cookies(CookieData, Parts, Options),
 1423        fail
 1424    ;   true
 1425    ).
 1426
 1427
 1428                 /*******************************
 1429                 *           OPEN ANY           *
 1430                 *******************************/
 1431
 1432:- multifile iostream:open_hook/6.
 iostream:open_hook(+Spec, +Mode, -Stream, -Close, +Options0, -Options) is semidet
Hook implementation that makes open_any/5 support http and https URLs for Mode == read.
 1440iostream:open_hook(URL, read, Stream, Close, Options0, Options) :-
 1441    (atom(URL) -> true ; string(URL)),
 1442    uri_is_global(URL),
 1443    uri_components(URL, Components),
 1444    uri_data(scheme, Components, Scheme),
 1445    http_scheme(Scheme),
 1446    !,
 1447    Options = Options0,
 1448    Close = close(Stream),
 1449    http_open(URL, Stream, Options0).
 1450
 1451http_scheme(http).
 1452http_scheme(https).
 1453
 1454
 1455                 /*******************************
 1456                 *          KEEP-ALIVE          *
 1457                 *******************************/
 consider_keep_alive(+HeaderLines, +Parts, +Host, +Stream0, -Stream, +Options) is det
 1463consider_keep_alive(Lines, Parts, Host, StreamPair, In, Options) :-
 1464    option(connection(Asked), Options),
 1465    keep_alive(Asked),
 1466    connection(Lines, Given),
 1467    keep_alive(Given),
 1468    content_length(Lines, Bytes),
 1469    !,
 1470    stream_pair(StreamPair, In0, _),
 1471    connection_address(Host, Parts, HostPort),
 1472    debug(http(connection),
 1473          'Keep-alive to ~w (~D bytes)', [HostPort, Bytes]),
 1474    stream_range_open(In0, In,
 1475                      [ size(Bytes),
 1476                        onclose(keep_alive(StreamPair, HostPort))
 1477                      ]).
 1478consider_keep_alive(_, _, _, Stream, Stream, _).
 1479
 1480connection_address(Host, _, Host) :-
 1481    Host = _:_,
 1482    !.
 1483connection_address(Host, Parts, Host:Port) :-
 1484    parts_port(Parts, Port).
 1485
 1486keep_alive(keep_alive) :- !.
 1487keep_alive(Connection) :-
 1488    downcase_atom(Connection, 'keep-alive').
 1489
 1490:- public keep_alive/4. 1491
 1492keep_alive(StreamPair, Host, _In, 0) :-
 1493    !,
 1494    debug(http(connection), 'Adding connection to ~p to pool', [Host]),
 1495    add_to_pool(Host, StreamPair).
 1496keep_alive(StreamPair, Host, In, Left) :-
 1497    Left < 100,
 1498    debug(http(connection), 'Reading ~D left bytes', [Left]),
 1499    read_incomplete(In, Left),
 1500    add_to_pool(Host, StreamPair),
 1501    !.
 1502keep_alive(StreamPair, _, _, _) :-
 1503    debug(http(connection),
 1504          'Closing connection due to excessive unprocessed input', []),
 1505    (   debugging(http(connection))
 1506    ->  catch(close(StreamPair), E,
 1507              print_message(warning, E))
 1508    ;   close(StreamPair, [force(true)])
 1509    ).
 read_incomplete(+In, +Left) is semidet
If we have not all input from a Keep-alive connection, read the remainder if it is short. Else, we fail and close the stream.
 1516read_incomplete(In, Left) :-
 1517    catch(setup_call_cleanup(
 1518              open_null_stream(Null),
 1519              copy_stream_data(In, Null, Left),
 1520              close(Null)),
 1521          _,
 1522          fail).
 1523
 1524:- dynamic
 1525    connection_pool/4,              % Hash, Address, Stream, Time
 1526    connection_gc_time/1. 1527
 1528add_to_pool(Address, StreamPair) :-
 1529    keep_connection(Address),
 1530    get_time(Now),
 1531    term_hash(Address, Hash),
 1532    assertz(connection_pool(Hash, Address, StreamPair, Now)).
 1533
 1534get_from_pool(Address, StreamPair) :-
 1535    term_hash(Address, Hash),
 1536    retract(connection_pool(Hash, Address, StreamPair, _)).
 keep_connection(+Address) is semidet
Succeeds if we want to keep the connection open. We currently keep a maximum of 10 connections waiting and a maximum of 2 waiting for the same address. Connections older than 2 seconds are closed.
 1545keep_connection(Address) :-
 1546    close_old_connections(2),
 1547    predicate_property(connection_pool(_,_,_,_), number_of_clauses(C)),
 1548    C =< 10,
 1549    term_hash(Address, Hash),
 1550    aggregate_all(count, connection_pool(Hash, Address, _, _), Count),
 1551    Count =< 2.
 1552
 1553close_old_connections(Timeout) :-
 1554    get_time(Now),
 1555    Before is Now - Timeout,
 1556    (   connection_gc_time(GC),
 1557        GC > Before
 1558    ->  true
 1559    ;   (   retractall(connection_gc_time(_)),
 1560            asserta(connection_gc_time(Now)),
 1561            connection_pool(Hash, Address, StreamPair, Added),
 1562            Added < Before,
 1563            retract(connection_pool(Hash, Address, StreamPair, Added)),
 1564            debug(http(connection),
 1565                  'Closing inactive keep-alive to ~p', [Address]),
 1566            close(StreamPair, [force(true)]),
 1567            fail
 1568        ;   true
 1569        )
 1570    ).
 http_close_keep_alive(+Address) is det
Close all keep-alive connections matching Address. Address is of the form Host:Port. In particular, http_close_keep_alive(_) closes all currently known keep-alive connections.
 1579http_close_keep_alive(Address) :-
 1580    forall(get_from_pool(Address, StreamPair),
 1581           close(StreamPair, [force(true)])).
 keep_alive_error(+Error)
Deal with an error from reusing a keep-alive connection. If the error is due to an I/O error or end-of-file, fail to backtrack over get_from_pool/2. Otherwise it is a real error and we thus re-raise it.
 1590keep_alive_error(keep_alive(closed)) :-
 1591    !,
 1592    debug(http(connection), 'Keep-alive connection was closed', []),
 1593    fail.
 1594keep_alive_error(io_error(_,_)) :-
 1595    !,
 1596    debug(http(connection), 'IO error on Keep-alive connection', []),
 1597    fail.
 1598keep_alive_error(Error) :-
 1599    throw(Error).
 1600
 1601
 1602                 /*******************************
 1603                 *     HOOK DOCUMENTATION       *
 1604                 *******************************/
 http:open_options(+Parts, -Options) is nondet
This hook is used by the HTTP client library to define default options based on the the broken-down request-URL. The following example redirects all trafic, except for localhost over a proxy:
:- multifile
    http:open_options/2.

http:open_options(Parts, Options) :-
    option(host(Host), Parts),
    Host \== localhost,
    Options = [proxy('proxy.local', 3128)].

This hook may return multiple solutions. The returned options are combined using merge_options/3 where earlier solutions overrule later solutions.

 http:write_cookies(+Out, +Parts, +Options) is semidet
Emit a Cookie: header for the current connection. Out is an open stream to the HTTP server, Parts is the broken-down request (see uri_components/2) and Options is the list of options passed to http_open. The predicate is called as if using ignore/1.
See also
- complements http:update_cookies/3.
- library(http/http_cookie) implements cookie handling on top of these hooks.
 http:update_cookies(+CookieData, +Parts, +Options) is semidet
Update the cookie database. CookieData is the value of the Set-Cookie field, Parts is the broken-down request (see uri_components/2) and Options is the list of options passed to http_open.
See also
- complements http:write_cookies
- library(http/http_cookies) implements cookie handling on top of these hooks.