1% * -*- Mode: Prolog -*- */
    2
    3:- module(md5hash,
    4          [
    5	      md5_hash_up_to_date/3,
    6	      ensure_md5_directory_exists/1,
    7	      update_md5_file/3
    8          ]).    9
   10:- use_module(library(readutil)).   11:- use_module(library(biomake/utils)).   12
   13% ----------------------------------------
   14% MD5 HASHES
   15% ----------------------------------------
   16
   17md5_prog("md5sum",[]).  % Ubuntu
   18md5_prog("md5",["-q"]).  % MacOS, BSD
   19
   20find_md5_prog(Path,Args) :-
   21	md5_prog(Prog,Args),
   22	find_on_path(Prog,Path),
   23	!.
   24
   25:- dynamic md5_hash/3.   26:- dynamic md5_valid/4.   27
   28md5_valid(File,_,_,Opts) :- member(old_file(File),Opts), !.
   29
   30md5_hash_up_to_date(T,DL,Opts) :-
   31    atom(T),
   32    !,
   33    atom_chars(T,Tc),
   34    string_chars(Tstr,Tc),
   35    md5_hash_up_to_date(Tstr,DL,Opts).
   36md5_hash_up_to_date(T,DL,Opts) :-
   37    !,
   38    debug(md5,"Checking MD5 hash validity for ~w <-- ~w",[T,DL]),
   39    md5_check(T,S,H,Opts),
   40    md5_valid(T,S,H,Opts).
   41
   42% read_md5_file attempts to find the MD5 hash of a target and (if known) the conditions under which this target is still valid.
   43% It does this by first trying to read the file .biomake/md5/{targetName}, then trying to lookup the hash in its database,
   44% and finally (if all else fails) computing the hash on-the-fly, using the md5 program.
   45read_md5_file(T,Opts) :-
   46    md5_filename(T,F),
   47    exists_file(T),
   48    exists_file(F),
   49    debug(md5,'Reading MD5 hash file: ~w',[F]),
   50    !,
   51    retract_md5_hash(T),
   52    open(F,read,IO,[]),
   53    repeat,
   54    (   at_end_of_stream(IO)
   55     ->  !
   56     ;   read_term(IO,Term,[syntax_errors(error),
   57                            module(biomake)]),
   58         debug(md5,'parsed term: ~w',[Term]),
   59         assert(Term),
   60         fail),
   61    close(IO),
   62    (member(ignore_md5_timestamp(true),Opts)
   63    ; (time_file(T,Ttime),
   64       time_file(F,Ftime),
   65       (Ttime > Ftime
   66        -> (debug(md5,'MD5 hash file ~w has an older timestamp than ~w - recomputing hash',[F,T]),
   67	    retract_md5_hash(T),
   68	    compute_md5(T,_,_,Opts))
   69	; true))).
   70
   71md5_check_size(File,Size,Hash,_Opts) :- exists_file(File), size_file(File,Size), md5_hash(File,Size,Hash).
   72
   73md5_check(File,Size,Hash,Opts) :-
   74	md5_check_size(File,Size,Hash,Opts),
   75	!.
   76md5_check(File,Size,Hash,Opts) :-
   77	\+ member(no_md5_cache(true),Opts),
   78	read_md5_file(File,Opts),
   79	!,
   80	md5_check_size(File,Size,Hash,Opts).
   81md5_check(File,Size,Hash,Opts) :-
   82        compute_md5(File,Size,Hash,Opts).
   83
   84retract_md5_hash(T) :-
   85    md5_hash(T,_,_),
   86    !,
   87    retractall(md5_hash(T,_,_));true.
   88retract_md5_hash(_).
   89
   90compute_md5(T,Size,Hash,_) :-
   91    exists_file(T),
   92    size_file(T,Size),
   93    try_md5_prog(T,Hash),
   94    debug(md5,'MD5 hash of file ~w (size ~w) is ~w',[T,Size,Hash]),
   95    retract_md5_hash(T),
   96    assert(md5_hash(T,Size,Hash)).
   97
   98% clauses of try_md5_prog/2 are discontiguous, due to a couple of renamed versions that don't work,
   99% but are kept in here because it'd be a better world if they did work, and maybe they will one day.
  100:- discontiguous md5hash:try_md5_prog/2.  101
  102% try the md5 executables findable with md5_prog, using a temporary file to stash the hash
  103try_md5_prog(Filename,Hash) :-
  104    find_md5_prog(Md5Prog,Args),
  105    absolute_file_name(Filename,Path),
  106    append(Args,[Path],Md5Args),
  107    atomic_list_concat(Md5Args," ",Md5ArgStr),
  108    biomake_private_filename_dir_exists(Filename,["tmp"],TmpFile),
  109    format(string(Exec),"~w ~w >~w",[Md5Prog,Md5ArgStr,TmpFile]),
  110    debug(md5,'computing hash: ~w',[Exec]),
  111    shell(Exec),
  112    phrase_from_file(first_n(32,HashCodes),TmpFile),
  113    string_codes(HashStr,HashCodes),
  114    debug(md5,'output of ~w ~w: ~w',[Md5Prog,Filename,HashStr]),
  115    string_lower(HashStr,Hash),
  116    !.
  117
  118% this version uses pipes; unfortunately, that crashes on some Macs :-(
  119try_md5_prog_using_pipes(Filename,Hash) :-
  120    find_md5_prog(Md5Prog,Args),
  121    append(Args,[Filename],Md5Args),
  122    setup_call_cleanup(process_create(Md5Prog,Md5Args,[stdout(pipe(Stream))]),
  123		       read_stream_to_codes(Stream,CodeList),
  124		       close(Stream)),
  125    phrase(first_n(32,HashCodes),CodeList),
  126    string_codes(HashStr,HashCodes),
  127    debug(md5,'output of ~w ~w: ~w',[Md5Prog,Filename,HashStr]),
  128    string_lower(HashStr,Hash).
  129
  130% fall back to using Prolog's deprecated in-memory MD5 implementation in the rdf_db library
  131try_md5_prog(Filename,Hash) :-
  132    use_module(library(semweb/rdf_db)),
  133    debug(md5,'reading ~w into memory for native SWI-Prolog MD5 implementation',[Filename]),
  134    read_file_to_string(Filename,Str,[]),
  135    rdf_atom_md5(Str,1,Hash).
  136
  137% this version uses the in-memory MD5 implementation in the md5 library, which is recommended over rdf_atom_md5, but not present in all implementations
  138try_md5_prog_in_memory_md5_library(Filename,Hash) :-
  139    use_module(library(md5), [ md5_hash/3 as library_md5_hash ]),
  140    debug(md5,'reading ~w into memory for native SWI-Prolog MD5 implementation',[Filename]),
  141    read_file_to_string(Filename,Str,[]),
  142    library_md5_hash(Str,Hash,[]).
  143
  144
  145first_n(0,[]) --> [].
  146first_n(0,[]) --> [_], first_n(0,[]).
  147first_n(N,[C|Cs]) --> [C], {Np is N - 1}, first_n(Np,Cs).
  148
  149delete_md5_file(T) :-
  150    md5_filename(T,F),
  151    exists_file(F),
  152    !,
  153    delete_file(F).
  154delete_md5_file(_).
  155
  156ensure_md5_directory_exists(Target) :-
  157    biomake_private_filename_dir_exists(Target,["md5"],_),
  158    biomake_private_filename_dir_exists(Target,["tmp"],_).
  159
  160md5_filename(Target,Filename) :-
  161    biomake_private_filename(Target,["md5"],Filename).
  162
  163open_md5_file(Target,Stream) :-
  164    open_biomake_private_file(Target,["md5"],_,Stream).
  165
  166make_md5_hash_term(T,S,H,Str) :-
  167    format(string(Str),"md5_hash(\"~w\",~d,\"~w\")",[T,S,H]).
  168
  169make_md5_valid_term(T,S,H,Str) :-
  170    format(string(Str),"md5_valid(\"~w\",~d,\"~w\",X)",[T,S,H]).
  171
  172make_md5_check_term(T,S,H,Str) :-
  173    format(string(Str),"md5_check(\"~w\",~d,\"~w\",X)",[T,S,H]).
  174
  175make_md5_valid_goal_list([Dep|Deps],[Goal|Goals],Opts) :-
  176    md5_check(Dep,Size,Hash,Opts),
  177    !,
  178    make_md5_check_term(Dep,Size,Hash,Goal),
  179    make_md5_valid_goal_list(Deps,Goals,Opts).
  180make_md5_valid_goal_list([_|Deps],Goals,Opts) :- make_md5_valid_goal_list(Deps,Goals,Opts), !.
  181make_md5_valid_goal_list([],[],_).
  182
  183update_md5_file(T,DL,Opts) :-
  184    debug(md5,'updating MD5 hash file for ~w <-- ~w',[T,DL]),
  185    delete_md5_file(T),
  186    % retract the database value, which will be stale
  187    retract_md5_hash(T),
  188    md5_check(T,SizeT,HashT,Opts),
  189    make_md5_hash_term(T,SizeT,HashT,HashTerm),
  190    make_md5_valid_term(T,SizeT,HashT,ValidTerm),
  191    make_md5_valid_goal_list(DL,ValidGoals,Opts),
  192    open_md5_file(T,IO),
  193    format(IO,"~w.~n",[HashTerm]),
  194    debug(md5,' ~w',[HashTerm]),
  195    (ValidGoals = [] -> format(IO,"~w.~n",[ValidTerm]), debug(md5,' ~w.',[ValidTerm]);
  196     concat_string_list(ValidGoals,ValidGoalStr,",\n  "),
  197     format(IO,"~w :-~n  ~w.~n",[ValidTerm,ValidGoalStr]),
  198     debug(md5," ~w :-~n  ~w.",[ValidTerm,ValidGoalStr])),
  199    close(IO),
  200    !.
  201
  202update_md5_file(T,DL,_) :-
  203    format("Warning: could not update MD5 file for ~w <-- ~w~n",[T,DL]),
  204    !