View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@cs.vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (C): 2009-2017, VU University Amsterdam
    7
    8    This program is free software; you can redistribute it and/or
    9    modify it under the terms of the GNU General Public License
   10    as published by the Free Software Foundation; either version 2
   11    of the License, or (at your option) any later version.
   12
   13    This program is distributed in the hope that it will be useful,
   14    but WITHOUT ANY WARRANTY; without even the implied warranty of
   15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   16    GNU General Public License for more details.
   17
   18    You should have received a copy of the GNU General Public
   19    License along with this library; if not, write to the Free Software
   20    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   21
   22    As a special exception, if you link this library with other files,
   23    compiled with a Free Software compiler, to produce an executable, this
   24    library does not by itself cause the resulting executable to be covered
   25    by the GNU General Public License. This exception does not however
   26    invalidate any other reasons why the executable file might be covered by
   27    the GNU General Public License.
   28*/
   29
   30:- module(http_cgi,
   31	  [ http_run_cgi/3,		% +Script, +Options, +Request
   32	    http_cgi_handler/2		% +Alias, +Request
   33	  ]).   34:- use_module(library(process)).   35:- use_module(library(uri)).   36:- use_module(library(debug)).   37:- use_module(library(lists)).   38:- use_module(library(http/http_dispatch)).   39:- use_module(library(http/http_wrapper)).   40:- use_module(library(http/http_stream)).   41:- use_module(library(http/http_host)).   42
   43:- predicate_options(http_run_cgi/3, 2,
   44		     [ argv(list),
   45		       transfer_encoding(atom),
   46		       buffer(oneof([full,line,none]))
   47		     ]).   48
   49/** <module> Run CGI scripts from the SWI-Prolog web-server
   50
   51The Prolog HTTP server is primarily designed   to be able to handle HTTP
   52requests from a running Prolog process,  which avoids the Prolog startup
   53time and, at least as  interesting,  allows   you  to  keep state in the
   54Prolog database. It is _not_ designed to   run  as a generic web server.
   55There are tools that are much better   for that job. Nevertheless, it is
   56useful to host a complete server  in   one  process,  mainly to simplify
   57deployment.  For  this  reason,  the  SWI-Prolog  HTTP  server  provides
   58libraries     to     serve     static      files     (http_reply_file/3,
   59http_reply_from_files/3) and this library, which   allows  executing CGI
   60scripts.
   61
   62A sensible alternative setup for  a  mixed   server  is  to use a normal
   63server such as Apache  as  main   server,  serving  files,  CGI scripts,
   64modules, etc., and use Apache's proxy  facilities to host a subdirectory
   65of the server using a Prolog server.   That approach is most likely more
   66efficient  for  production  environments,  but    harder  to  setup  for
   67development purposes.
   68
   69This module provides two interfaces:
   70
   71  * http_run_cgi/3 can be used to call a CGI script located exernally.
   72  This is typically used for an individual script used to extend the
   73  server functionality.  For example, the handler declaration below
   74  runs the PHP script =myscript.php= from the location =/myscript/=.
   75  Note that this requires the commandline version of PHP to be
   76  installed as =php= in the current =PATH=.
   77
   78    ==
   79    :- http_handler(root(myscript),
   80		    http_run_cgi(path(php), [argv('myscript.php')]),
   81		    []).
   82    ==
   83
   84  * Setup a path =cgi_bin= for absolute_file_name/3. If this path is
   85  present, calls to /cgi-bin/... are translated into calling the script.
   86  For example, if programs in the directory =cgi-bin= must be accessible
   87  as CGI services, add a rule
   88
   89    ==
   90    :- multifile user:file_search_path/2.
   91
   92    user:file_search_path(cgi_bin, 'cgi-bin').
   93    ==
   94
   95@tbd complete environment translation.  See env/3.
   96@tbd testing.  Notably for POST and PUT commands.
   97@see http://wiht.link/CGIaccessvariables
   98*/
   99
  100:- multifile
  101	environment/2.  102
  103:- meta_predicate
  104	copy_post_data(+, -, 0).  105
  106:- http_handler(root('cgi-bin'), http_cgi_handler(cgi_bin),
  107		[prefix, spawn([])]).  108
  109%%	http_cgi_handler(+Alias, +Request)
  110%
  111%	Locate a CGI script  in  the   file-search-path  Alias  from the
  112%	=path_info=  in  Request   and   execute    the   script   using
  113%	http_run_cgi/3. This library installs one handler using:
  114%
  115%	  ==
  116%	  :- http_handler(root('cgi-bin'), http_run_cgi(cgi_bin, []),
  117%			  [prefix, spawn([])]).
  118%	  ==
  119
  120http_cgi_handler(Alias, Request) :-
  121	select(path_info(PathInfo), Request, Request1),
  122	ensure_no_leading_slash(PathInfo, Relative),
  123	path_info(Relative, Script, Request1, Request2),
  124	Spec =.. [Alias, Script],
  125	absolute_file_name(Spec, ScriptFileName,
  126			   [ access(execute)
  127			   ]),
  128	http_run_cgi(ScriptFileName, [], Request2).
  129
  130
  131ensure_no_leading_slash(Abs, Rel) :-
  132	atom_concat(/, Rel, Abs), !.
  133ensure_no_leading_slash(Rel, Rel).
  134
  135ensure_leading_slash(PathInfo, Abs) :-
  136	(   sub_atom(PathInfo, 0, _, _, /)
  137	->  Abs = PathInfo
  138	;   atom_concat(/, PathInfo, Abs)
  139	).
  140
  141path_info(RelPath, Script, Req, [path_info(Info)|Req]) :-
  142	sub_atom(RelPath, Before, _, After, /), !,
  143	sub_atom(RelPath, 0, Before, _, Script),
  144	sub_atom(RelPath, _, After, 0, Info).
  145path_info(Script, Script, Request, Request).
  146
  147
  148%%	http_run_cgi(+Script, +Options, +Request) is det.
  149%
  150%	Execute the given CGI script.  Options processed:
  151%
  152%	  * argv(+List)
  153%	  Argument vector to give to the CGI script.  Defaults to
  154%	  no arguments.
  155%	  * transfer_encoding(Encoding)
  156%	  Emit a =|Transfer-encoding|= header
  157%	  * buffer(+Buffer)
  158%	  Set buffering of the CGI output stream.  Typically used
  159%	  together with transfer_encoding(chunked).
  160%
  161%	@param	Script specifies the location of the script as a
  162%		specification for absolute_file_name/3.
  163%	@param	Request holds the current HTTP request passed from
  164%		the HTTP handler.
  165
  166http_run_cgi(ScriptSpec, Options, Request) :-
  167	option(argv(Argv), Options, []),
  168	absolute_file_name(ScriptSpec, Script,
  169			   [ access(execute)
  170			   ]),
  171	input_handle(Request, ScriptInput),
  172	findall(Name=Value,
  173		env(Name,
  174		    [ script_file_name(Script)
  175		    | Request
  176		    ], Value),
  177		Env),
  178	debug(http(cgi), 'Environment: ~w', [Env]),
  179	process_create(Script, Argv,
  180		       [ stdin(ScriptInput),
  181			 stdout(pipe(CGI)),
  182			 stderr(std),
  183			 env(Env),
  184			 process(PID)
  185		       ]),
  186	setup_input(ScriptInput, Request),
  187	set_stream(CGI, encoding(octet)),
  188	debug(http(cgi), 'Waiting for CGI data ...', []),
  189	maplist(header_option, Options),
  190	call_cleanup(copy_cgi_data(CGI, current_output, Options),
  191		     cgi_cleanup(Script, CGI, PID)), !.
  192
  193%%	header_option(+Option) is det.
  194%
  195%	Write additional HTTP headers.
  196
  197header_option(transfer_encoding(Encoding)) :- !,
  198	format('Transfer-encoding: ~w\r\n', [Encoding]).
  199header_option(_).
  200
  201%%	cgi_cleanup(+Script, +ScriptStream, +PID) is det.
  202%
  203%	Cleanup the CGI process and close  the   stream  use to read the
  204%	output of the CGI process. Note that  we close the output first.
  205%	This deals with the  possibility  that   the  client  reset  the
  206%	connection, copy_cgi_data/3 returns and exception   and  we wait
  207%	for the process that never  ends.   By  closing  our stream, the
  208%	process will receive a sigpipe if it continues writing.
  209
  210cgi_cleanup(Script, ScriptStream, PID) :-
  211	close(ScriptStream),
  212	process_wait(PID, Status),
  213	debug(http(cgi), '~w ended with status ~w',
  214	      [Script, Status]).
  215
  216%%	input_handle(+Request, -Handle) is det.
  217%
  218%	Decide what to do with the input   stream of the CGI process. If
  219%	this is a PUT/POST request, we must   send data. Otherwise we do
  220%	not redirect the script's input.
  221
  222input_handle(Request, pipe(_)) :-
  223	memberchk(method(Method), Request),
  224	method_has_data(Method), !.
  225input_handle(_, std).
  226
  227method_has_data(post).
  228method_has_data(put).
  229
  230%%	setup_input(+ScriptInput, +Request) is det.
  231%
  232%	Setup passing of the POST/PUT data to the script.
  233
  234setup_input(std, _).
  235setup_input(pipe(Stream), Request) :-
  236	memberchk(input(HTTPIn), Request),
  237	set_stream(Stream, encoding(octet)),
  238	setup_input_filters(HTTPIn, In, Request, Close),
  239	thread_create(copy_post_data(In, Stream, Close), _,
  240		      [ detached(true)
  241		      ]).
  242
  243setup_input_filters(RawIn, In, Request, (Close2,Close1)) :-
  244	setup_length_filter(RawIn, In2, Request, Close1),
  245	setup_encoding_filter(In2, In, Request, Close2).
  246
  247setup_length_filter(In0, In, Request, close(In)) :-
  248	memberchk(content_length(Len), Request), !,
  249	debug(http(cgi), 'Setting input length to ~D', [Len]),
  250	stream_range_open(In0, In, [size(Len)]).
  251setup_length_filter(In, In, _, true).
  252
  253setup_encoding_filter(In0, In, Request, close(In)) :-
  254	memberchk(content_encoding(Enc), Request),
  255	z_format(Enc), !,
  256	debug(http(cgi), 'Adding ~w input filter', [Enc]),
  257	zopen(In0, In, [format(Enc), close_parent(false)]).
  258setup_encoding_filter(In, In, _, true).
  259
  260z_format(gzip).
  261z_format(deflate).
  262
  263
  264%%	copy_post_data(+DataIn, -ScriptIn, :Close) is det.
  265%
  266%	Copy data from the CGI script to the client.
  267
  268copy_post_data(In, Script, Close) :-
  269	debugging(http(cgi)), !,
  270	setup_call_cleanup(open('post.data', write, Debug, [type(binary)]),
  271			   catch(debug_post_data(In, Script, Debug),
  272				 E,
  273				 print_message(error, E)),
  274			   close(Debug)),
  275	catch(Close, E, print_message(error, E)),
  276	close(Script, [force(true)]).
  277copy_post_data(In, Script, Close) :-
  278	catch(copy_stream_data(In, Script), _, true),
  279	catch(Close, E, print_message(error, E)),
  280	close(Script, [force(true)]).
  281
  282
  283debug_post_data(In, Script, Debug) :-
  284	get_code(In, Byte),
  285	(   Byte == -1
  286	->  true
  287	;   put_code(Script, Byte),
  288	    put_code(Debug, Byte),
  289	    debug_post_data(In, Script, Debug)
  290	).
  291
  292
  293%%	copy_cgi_data(+CGI, -Out, +Options) is det.
  294
  295copy_cgi_data(CGI, Out, Options) :-
  296	debugging(http(cgi)), !,
  297	maplist(set_cgi_stream(Out), Options),
  298	setup_call_cleanup(open('cgi.out', write, Debug, [type(binary)]),
  299			   debug_cgi_data(CGI, Out, Debug),
  300			   close(Debug)).
  301copy_cgi_data(CGI, Out, Options) :-
  302	maplist(set_cgi_stream(Out), Options),
  303	copy_stream_data(CGI, Out).
  304
  305set_cgi_stream(Out, buffer(Buffer)) :- !,
  306	set_stream(Out, buffer(Buffer)).
  307set_cgi_stream(_, _).
  308
  309debug_cgi_data(CGI, Out, Debug) :-
  310	get_code(CGI, Byte),
  311	(   Byte == -1
  312	->  true
  313	;   put_code(Out, Byte),
  314	    put_code(Debug, Byte),
  315	    debug_cgi_data(CGI, Out, Debug)
  316	).
  317
  318
  319%%	env(?Name, +Request, -Value) is nondet.
  320%
  321%	Enumerate the environment variables to be   passed  to the child
  322%	process.
  323
  324env('SERVER_SOFTWARE', _, Version) :-
  325	current_prolog_flag(version_data, swi(Major, Minor, Patch, _)),
  326	format(atom(Version), 'SWI-Prolog/~w.~w.~w', [Major, Minor, Patch]).
  327env(Name, Request, Value) :-
  328	http_current_host(Request, Host, Port, [global(true)]),
  329	(   Name = 'SERVER_NAME',
  330	    Value = Host
  331	;   Name = 'SERVER_PORT',
  332	    Value = Port
  333	).
  334env('GATEWAY_INTERFACE', _, 'CGI/1.1').
  335env('SERVER_PROTOCOL', Request, Protocol) :-
  336	memberchk(http(Major-Minor), Request),
  337	format(atom(Protocol), 'HTTP/~w.~w', [Major, Minor]).
  338env('REQUEST_METHOD', Request, Method) :-
  339	memberchk(method(LwrCase), Request),
  340	upcase_atom(LwrCase, Method).
  341env('PATH_INFO', Request, PathInfo) :-
  342	memberchk(path_info(PathInfo0), Request),
  343	ensure_leading_slash(PathInfo0, PathInfo).
  344env('PATH_TRANSLATED', _, _) :- fail.
  345env('SCRIPT_NAME', Request, ScriptName) :-
  346	memberchk(path(FullPath), Request),
  347	memberchk(path_info(PathInfo0), Request),
  348	ensure_leading_slash(PathInfo0, PathInfo),
  349	atom_concat(ScriptName, PathInfo, FullPath).
  350env('SCRIPT_FILENAME', Request, ScriptFilename) :-
  351	memberchk(script_file_name(ScriptFilename), Request).
  352env('QUERY_STRING', Request, QString) :-
  353	memberchk(request_uri(Request), Request),
  354	uri_components(Request, Components),
  355	uri_data(search, Components, QString),
  356	atom(QString).
  357env('REMOTE_HOST', _, _) :- fail.
  358env('REMOTE_ADDR', Request, Peer) :-
  359	http_peer(Request, Peer).
  360env('AUTH_TYPE', _, _) :- fail.
  361env('REMOTE_USER', Request, User) :-
  362	memberchk(user(User), Request).
  363env('REMOTE_IDENT', _, _) :- fail.
  364env('CONTENT_TYPE', Request, ContentType) :-
  365	memberchk(content_type(ContentType), Request).
  366env('CONTENT_LENGTH', Request, ContentLength) :-
  367	memberchk(content_length(ContentLength), Request).
  368env('HTTP_ACCEPT', Request, AcceptAtom) :-
  369	memberchk(accept(Accept), Request),
  370	accept_to_atom(Accept, AcceptAtom).
  371env('HTTP_USER_AGENT', Request, Agent) :-
  372	memberchk(user_agent(Agent), Request).
  373env(Name, _, Value) :-
  374	environment(Name, Value).
  375
  376%%	accept_to_atom(+Accept, -AcceptAtom) is det.
  377%
  378%	Translate back from the parsed accept  specification in the HTTP
  379%	header to an atom.
  380
  381:- dynamic
  382	accept_cache/3.  383
  384accept_to_atom(Accept, AcceptAtom) :-
  385	variant_sha1(Accept, Hash),
  386	(   accept_cache(Hash, Accept, AcceptAtom)
  387	->  true
  388	;   phrase(accept(Accept), Parts),
  389	    atomic_list_concat(Parts, AcceptAtom),
  390	    asserta(accept_cache(Hash, Accept, AcceptAtom))
  391	).
  392
  393accept([H|T]) -->
  394	accept_media(H),
  395	(   { T == [] }
  396	->  []
  397	;   [','],
  398	    accept(T)
  399	).
  400
  401accept_media(media(Type, _, Q, _)) -->
  402	accept_type(Type),
  403	accept_quality(Q).
  404
  405accept_type(M/S) -->
  406	accept_type_part(M), [/], accept_type_part(S).
  407
  408accept_type_part(Var) -->
  409	{ var(Var) }, !,
  410	[*].
  411accept_type_part(Name) -->
  412	[Name].
  413
  414accept_quality(Q) -->
  415	{ Q =:= 1.0 }, !.
  416accept_quality(Q) -->
  417	[ ';q=',Q ].
  418
  419%%	environment(-Name, -Value) is nondet.
  420%
  421%	This hook can  be  defined   to  provide  additional environment
  422%	variables to the CGI script.  For example:
  423%
  424%	  ==
  425%	  :- multifile http_cgi:environment/2.
  426%
  427%	  http_cgi:environment('SERVER_ADMIN', 'bob@example.com').
  428%	  ==