1:- module(lcs, [ equality_metric/3
    2               , lcs/3
    3               , lcs/5
    4               ]).    5:- use_module(library(quintus), [otherwise/0]).

Longest common subsequence

Compute a longest common subsequence between two lists. Elements can be compared by means of an arbitrary similarity metric.

lcs(+As:list, +Bs:list, -LCS:list) is det: True if LCS is a longest common subsequence of As and Bs. Elements A and B are can be common if A==B.
Implemented in terms of lcs/5.

   19lcs(A, B, LCS) :-
   20    lcs(equality_metric, A, B, LCS_Pairs, _Length),
   21    maplist(fst, LCS_Pairs, LCS).
   22
   23fst(X-_, X).
   24
   25% Place to stored memoized lcs/5 results
   26:- dynamic lcs_cache/3.

lcs(+Cmp:callable, +As:list, +Bs:list, -LCS:list, -Length) is det

True if LCS is a longest common subsequence of As and Bs. LCS is a list of pairs A-B since Cmp allows non-identical elements to be considered common.

Elements of As and Bs are compared by call(Cmp,A,B,Similarity), where larger Similarity values indicate more similar elements. Length is the sum of similarity scores for elements in the subsequence.

Implemented with memoization on top of a naive, exponential algorithm. It performs fairly well, but patches to use a better algorithm are welcome.

   42:- meta_predicate lcs(3,+,+,-,-).   43lcs(Cmp, As, Bs, LCS, Length) :-
   44    retractall(lcs_cache(_,_,_)),
   45    lcs_(Cmp,As,Bs,LCS,Length),
   46    retractall(lcs_cache(_,_,_)).
   47
   48:- meta_predicate lcs_(3,+,+,-,-).   49lcs_(Cmp, As, Bs, LCS, Length) :-
   50    term_hash((Cmp,As,Bs), Hash),
   51    lcs_cache(Hash, LCS, Length),
   52    !.
   53lcs_(Cmp,[A|As],[B|Bs],LCS,Length) :-
   54    !,
   55    call(Cmp, A, B, Similarity),
   56    lcs_(Cmp,   As ,   Bs ,LCS_AB, Length_AB0),
   57    lcs_(Cmp,   As ,[B|Bs],LCS_A,  Length_A),
   58    lcs_(Cmp,[A|As],   Bs ,LCS_B,  Length_B),
   59    Length_AB is Similarity + Length_AB0,
   60    ( Length_A >= Length_AB, Length_A >= Length_B ->
   61        LCS = LCS_A,
   62        Length is Length_A
   63    ; Length_B >= Length_AB, Length_B >= Length_A ->
   64        LCS = LCS_B,
   65        Length is Length_B
   66    ; otherwise ->
   67        LCS = [A-B|LCS_AB],
   68        Length = Length_AB
   69    ),
   70    term_hash((Cmp,[A|As],[B|Bs]), Hash),
   71    assert(lcs_cache(Hash, LCS, Length)).
   72lcs_(_,[],_,[],0) :- !.
   73lcs_(_,_,[],[],0).

equality_metric(+A, +B, -Similarity) is det: Similarity is 1 if A == B, otherwise 0. This predicate is helpful as the first argument to lcs/5.

   80equality_metric(A,B,Similarity) :-
   81    ( A==B -> Similarity=1
   82    ; true -> Similarity=0
   83    )