1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@cs.vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (C): 2015, VU University Amsterdam
    7
    8    This program is free software; you can redistribute it and/or
    9    modify it under the terms of the GNU General Public License
   10    as published by the Free Software Foundation; either version 2
   11    of the License, or (at your option) any later version.
   12
   13    This program is distributed in the hope that it will be useful,
   14    but WITHOUT ANY WARRANTY; without even the implied warranty of
   15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   16    GNU General Public License for more details.
   17
   18    You should have received a copy of the GNU General Public
   19    License along with this library; if not, write to the Free Software
   20    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
   21
   22    As a special exception, if you link this library with other files,
   23    compiled with a Free Software compiler, to produce an executable, this
   24    library does not by itself cause the resulting executable to be covered
   25    by the GNU General Public License. This exception does not however
   26    invalidate any other reasons why the executable file might be covered by
   27    the GNU General Public License.
   28*/
   29
   30:- module(logstat,
   31	  [ clean_log/0,
   32	    read_log/1,			% +File
   33	    read_log/2,			% +File, +Options
   34	    logrecord/1,		% +List
   35	    logrecord/10
   36	  ]).   37:- use_module(library(rbtrees)).   38:- use_module(library(debug)).   39:- use_module(library(lists)).   40:- use_module(library(zlib)).   41:- use_module(library(record)).   42
   43:- portray_text(true).

Process SWI-Prolog HTTPD logfiles

This module reads SWI-Prolog HTTPD logfiles and allows asking queries about them. Below is a simple example that extracts SPARQL queries from a ClioPatria log file:

?- use_module(library(http/logstat)).
?- read_log('httpd.log').
?- logrecord([path('/sparql/'), search([query=Query])]).
Query = 'PREFIX rdf: ...'
See also
- library(http/http_log) provides the HTTP framework component to write logfiles that are expected by this library. */
 logrecord(?N, ?Time, ?Session, ?RemoteIP, ?Path, ?Query, ?Referrer, ?Code, ?Result, ?Extra)
Database predicate that represents an HTTP transaction.
Arguments:
N- is the index number of the record
Time- is the POSIX time stamp the query was fired
Session- is the session-id or - if the request has no session
RemoteIP- is the remote IP address
Path- is the HTTP location, starting with /
Query- is a list of Name=Value terms holding the (GET) query parameters
Referrer- is the referer URL or - if unknown
Code- is the HTTP numerical reply code
Result- is Prolog notion of the result as a Prolog term
Extra- is a list with Key(Value) terms holding the keys below. Only cpu(Seconds) is always present.
cpu(Seconds)
CPU time used to process the query
bytes(Count)
Number of bytes sent (if known)
user_agent(UserAgent)
Browser identifier (if known)
http_version(Major-Minor)
HTTP Protocol version
city(City)
Name of the city (if configured)
lat(Lat)
Latitude (if configured)
lon(Lon)
Longitude (if configured)
   96:- dynamic
   97	logrecord/10.
 field(?Index, ?Name) is nondet
Define the field-names for the logrecord/10 predicate.
  103field(1, key).
  104field(2, time).
  105field(3, session).
  106field(4, ip).
  107field(5, path).
  108field(6, query).
  109field(7, referer).
  110field(8, code).
  111field(9, result).
  112field(10, extra).
 clean_log
Cleanup the database.
  119clean_log :-
  120	functor(Term, logrecord, 10),
  121	assertion(predicate_property(Term, dynamic)),
  122	retractall(Term).
  123
  124:- record
  125	log_state(progress:boolean=true,		  skip_bad_requests:boolean=false).
 read_log(+File) is det
 read_log(+File, +Options) is det
Read a SWI-Prolog HTTP logfile. If the file contains errors, these are printed to the terminal and the corresponding records are ignored. Options supported:
progress(+Boolean)
Indicate progress on the terminal (default true)
skip_bad_requests(+Boolean)
Ignore 400 requests.
  140read_log(File) :-
  141	read_log(File, []).
  142
  143read_log(File, Options) :-
  144	make_log_state(Options, State, _),
  145	rb_empty(Open),
  146	setup_call_cleanup(myopen(File, In),
  147			   (   read_skip_errors(In, Term0),
  148			       read_log(Term0, In, 1, Open, State)
  149			   ),
  150			   close(In)),
  151	nl(user_error).
  152
  153myopen(File, In) :-
  154	exists_file(File), !,
  155	open(File, read, In, [encoding(utf8)]).
  156myopen(File, In) :-
  157	file_name_extension(File, gz, ZFile),
  158	exists_file(ZFile),!,
  159	gzopen(ZFile, read, In, [encoding(utf8)]).
  160myopen(File, In) :-			% generate error
  161	open(File, read, In, [encoding(utf8)]).
  162
  163
  164read_log(end_of_file, _, _, _, _) :- !.
  165read_log(Term, In, Count0, Open0, State) :-
  166	(   assert_log(Term, Count0, Count1, Open0, Open1)
  167	->  !
  168	;   debugging(logstat),
  169	    gtrace,
  170	    assert_log(Term, Count0, Count1, Open0, Open1)
  171	),
  172	progress(Count1, State),
  173	read_skip_errors(In, Term2),
  174	read_log(Term2, In, Count1, Open1, State).
  175read_log(Term, In, Count, Open, State) :-
  176	(   skip_term(Term, State)
  177	->  true
  178	;   format(user_error, '~NWarning: failed to process ~p~n', [Term])
  179	),
  180	read_skip_errors(In, Term2),
  181	read_log(Term2, In, Count, Open, State).
  182
  183read_skip_errors(In, Term) :-
  184	repeat,
  185	catch(read(In, Term), E, (print_message(error, E),fail)), !.
  186
  187skip_term(completed(_, _, _, 400, _), _).
  188skip_term(completed(0, _, _, 500, error(_)), _).
  189skip_term(completed(_, _, _, _, _), State) :-
  190	log_state_skip_bad_requests(State, true).
  191
  192assert_log(server(_StartStop, _Time), Count, Count, Open0, Open) :- !,
  193	close_all(Open0),
  194	rb_empty(Open).
  195assert_log(request(I, Time, Request), Count0, Count, Open0, Open) :- !,
  196	Count is Count0+1,
  197	rb_insert_new(Open0, I, r(Count0, Time, Request), Open).
  198assert_log(completed(I, CPU, Status), Count, Count, Open0, Open) :-
  199	rb_delete(Open0, I, r(Id, Time, Request), Open),
  200	save_record(Id, Time, CPU, Request, 0, 0, Status).
  201assert_log(completed(I, CPU, Bytes, Code, Status), Count, Count, Open0, Open) :-
  202	rb_delete(Open0, I, r(Id, Time, Request), Open),
  203	save_record(Id, Time, CPU, Request, Bytes, Code, Status).
  204
  205close_all(Open0) :-
  206	rb_visit(Open0, Pairs),
  207	close_pairs(Pairs).
  208
  209close_pairs([]).
  210close_pairs([_RID-r(Id, Time, Request)|T]) :-
  211	save_record(Id, Time, 0, Request, 0, 500, no_reply),
  212	close_pairs(T).
  213
  214save_record(Id, Time, CPU, Request, Bytes, Code, Status) :-
  215	session(Request, Session),
  216	remote_IP(Request, RemoteIP),
  217	path(Request, Path),
  218	query_parms(Request, Parms),
  219	referer(Request, Referer),
  220	extra(Request, Bytes, Extra),
  221	assert(logrecord(Id, Time, Session, RemoteIP,
  222			 Path, Parms, Referer, Code, Status,
  223			 [ cpu(CPU)
  224			 | Extra
  225			 ])).
  226
  227
  228session(Request, SessionID) :-
  229	memberchk(session(SessionID), Request), !.
  230session(Request, SessionID) :-
  231	memberchk(cookie(Cookie), Request),
  232	memberchk(swipl_session=SessionID, Cookie), !.
  233session(_, -).
 remote_IP(+Request, -IP:atom) is semidet
Find the remote IP address. This is either in x_forwarded_for if we are behind a (Apache) proxy, or it is the peer.
  240remote_IP(Request, IP) :-
  241	memberchk(x_forwarded_for(IP0), Request), !,
  242	final_ip(IP0, IP).
  243remote_IP(Request, IP) :-
  244	memberchk(peer(Peer), Request), !,
  245	peer_to_ip(Peer, IP).
  246remote_IP(_, -).
  247
  248final_ip(IP0, IP) :-
  249	split_string(IP0, ",", " ", IPList),
  250	last(IPList, IPString),
  251	atom_string(IP, IPString).
  252
  253peer_to_ip(ip(A,B,C,D), IP) :-
  254	atomic_list_concat([A,B,C,D], '.', IP).
  255
  256path(Request, Path) :-
  257	memberchk(path(Path), Request).
  258
  259query_parms(Request, Parms) :-
  260	memberchk(search(Parms), Request), !.
  261query_parms(_, []).
  262
  263referer(Request, Referer) :-
  264	memberchk(referer(Referer), Request), !.
  265referer(_, -).
  266
  267extra(Request, Bytes, Extra) :-
  268	findall(E, extra_field(Request, Bytes, E), Extra).
  269
  270extra_field(_, Bytes, Extra) :-
  271	Bytes \== 0,
  272	Extra = bytes(Bytes).
  273extra_field(Request, _, user_agent(Agent)) :-
  274	memberchk(user_agent(Agent), Request).
  275extra_field(Request, _, http_version(Agent)) :-
  276	memberchk(http_version(Agent), Request).
  277extra_field(Request, _, city(City)) :-
  278	memberchk(x_geoip_city(City), Request).
  279extra_field(Request, _, lat(Lat)) :-
  280	memberchk(x_geoip_latitude(LatAtom), Request),
  281	atom_number(LatAtom, Lat).
  282extra_field(Request, _, lon(Lon)) :-
  283	memberchk(x_geoip_longitude(LonAtom), Request),
  284	atom_number(LonAtom, Lon).
 logrecord(+Query) is nondet
Query by list of fieldnames. Query list a list of Name(Value) specifications. Name is one of:
key(Integer)
Integer number of the request (1,2,3,...)
time(Float)
POSIX time stamp of the request.
session(Atom)
The session-id or - if the request has no session
ip(Atom)
The remote IP address
path(Atom)
The HTTP location, starting with /
query(list(Name=Value))
List of Name=Value terms holding the (GET) query parameters
referer(Atom)
The referer URL or - if unknown
code(Integer)
Code is the HTTP numerical reply code
result(Term)
Prolog notion of the result as a Prolog term
cpu(Seconds)
CPU time used to process the query
bytes(Count)
Number of bytes sent (if known)
user_agent(UserAgent)
Browser identifier (if known)
http_version(Major-Minor)
HTTP Protocol version
city(City)
Name of the city (if configured)
lat(Lat)
Latitude (if configured)
lon(Lon)
Longitude (if configured)
after(+TimeSpec)
Only consider records created after TimeSpec. TimeSpec is one of:
  • Year/Month/Day
before(+TimeSpec)
See after(TimeSpec).
search([...(Name=Value)])
Demand the following fields to be present in the query.
  332logrecord(Query) :-
  333	must_be(list, Query),
  334	functor(Term, logrecord, 10),
  335	make_query(Query, Term, RestQuery),
  336	make_condition(RestQuery, Extra, Term, Cond),
  337	(   Extra == [], Cond == true
  338	->  call(Term)
  339	;   Extra == []
  340	->  call((Term,Cond))
  341	;   field(I, extra)
  342	->  arg(I, Term, ExtraDB),
  343	    call((Term,Cond)),
  344	    subset(Extra, ExtraDB)
  345	).
  346
  347make_query([], _, []).
  348make_query([Q|T0], Goal, Extra) :-
  349	Q =.. [Name,Value],
  350	fill_field(Name, Goal, Value), !,
  351	make_query(T0, Goal, Extra).
  352make_query([Q|T0], Goal, [Q|Extra]) :-
  353	make_query(T0, Goal, Extra).
  354
  355fill_field(Name, Term, Value) :-
  356	field(I, Name),
  357	arg(I, Term, Value).
  358
  359make_condition([], [], _, true).
  360make_condition([C0|T0], T, Term, (Goal,More)) :-
  361	condition(C0, Term, Goal), !,
  362	make_condition(T0, T, Term, More).
  363make_condition([H|T0], [H|T], Term, Cond) :-
  364	make_condition(T0, T, Term, Cond).
  365
  366
  367condition(after(TimeSpec), Term, Stamp >= Time) :-
  368	field(I, time), !,
  369	arg(I, Term, Stamp),
  370	time_spec_to_stamp(TimeSpec, Time).
  371condition(before(TimeSpec), Term, Stamp < Time) :-
  372	field(I, time), !,
  373	arg(I, Term, Stamp),
  374	time_spec_to_stamp(TimeSpec, Time).
  375condition(search(Fields), Term, subset(Fields, Query)) :-
  376	field(I, query), !,
  377	arg(I, Term, Query).
  378
  379
  380time_spec_to_stamp(Y/M/D, Stamp) :-
  381	date_time_stamp(date(Y,M,D,0,0,0,0,-,-), Stamp).
  382
  383
  384		 /*******************************
  385		 *	       FEEDBACK		*
  386		 *******************************/
  387
  388progress(Count, State) :-
  389	log_state_progress(State, true),
  390	Count mod 1000 =:= 0, !,
  391	format(user_error, '\r~t~D~20|', [Count]).
  392progress(_, _)