:- use_module(library(tabling)).
:- use_module(library(sparqlprog/emulate_builtins)).

%! band(?B) is nondet
%
%    true if B is a band
band(B) :- rdf(B,rdf:type,dbont:'Band').

%! has_shared_band_member(?B1, ?B2, ?SharedMember) is nondet
%
%    links bands by members in common. E.g. Ronnie James Dio in both Rainbow and Black Sabbath
%
%    true if SharedMember is a member of both B1 and B2 (and B1 and B2 are distinct)
%
has_shared_band_member(B1,B2,A) :-
        rdf(A,dbont:associatedBand,B1),
        rdf(A,dbont:associatedBand,B2),
        B1\=B2.

%! has_shared_band_genre(?B1, ?B2, ?SharedGenre) is nondet
%
%    links bands by shared genre common.
%
%    true if SharedGenre is a genre of both B1 and B2 (and B1 and B2 are distinct)
%
has_shared_genre(B1,B2,A) :-
        rdf(B1,dbont:genre,A),
        rdf(B2,dbont:genre,A),
        B1\=B2.

genre_pair(G1,G2,A) :-
        rdf(A,dbont:genre,G1),
        rdf(A,dbont:genre,G2),
        G1\=G2.


        
%! similarity_by_genre(?BandA, ?BandB, ?SumIC)
%
%    calculates the jaccard similarity between two entities based on genres in common
%
%    the entities should be of similar types (e.g. two bands, or two books)
%
%    ==
%    | genres(A) /\ genres(B) | / | genres(A) \/ genres(B) | 
%    ==
%
%    if no genres are in common, then this should equal 0
%    if all genres are in common, then this should equal 1
%
%    note this does not take into account how *meaningful* it is for a genre to be shared;
%    e.g. sharing the common genre 'pop' counts as much as a rarer genre like 'psytrance'.
%    see further on for IC-based metrics.
%
similarity_by_genre(A,B,Sim) :-
        get_all_genres(A,SA),
        get_all_genres(B,SB),
        jaccard(SA,SB,Sim).

jaccard(SA,SB,Sim) :-
        ord_intersection(SA,SB,I),
        ord_union(SA,SB,U),
        length(I,NI),
        length(U,NU),
        Sim is NI/NU.

get_all_genres(Entity,L) :-
        service_query_all(dbpedia,G,rdf(Entity,dbont:genre,G),L).



:- table get_num_bands/1.
%! get_num_bands(?Count) is det
%
%    unifies Count with the total number of bands in the database
%
%    note this is tabled (cached) so that repeated calls do not invoke new SPARQL queries
%
get_num_bands(Count) :-
        ??(dbpedia,num_bands(Count)).
num_bands(Count) :-
        aggregate(count(distinct(B)),band(B),Count).



%! get_genre_num_bands(?Genre,?Count) is nondet.
%! get_genre_num_bands(+Genre,?Count) is det.
%
%    unifies Count with the total number of bands that are categorized as Genre
%
%
%%%%:- table get_genre_num_bands/2.
get_genre_num_bands(G,Count) :-
        ??(dbpedia,genre_num_bands(G,Count)).

genre_num_bands(G,Count) :-
        aggregate_group(count(distinct(B)),[G],(rdf(B,dbont:genre,G),band(B)),Count).

        

%! pair_genre_sum_ic(?BandA, ?BandB, ?SumIC)
%
%   for a pair of bands, SumIC is the sum of the ICs of the genres shared in common.
%
%
% Example: =pair_genre_ic(dbr:'Metallica', dbr:'Megadeth', IC)=
pair_genre_sum_ic(A,B,SumIC) :-
        get_all_genres(A,SA),
        ??(dbpedia,(band(B),has_shared_genre(A,B,_))),
        get_all_genres(B,SB),
        ord_intersection(SA,SB,I),
        debug(dbpedia,'~w vs ~w :: INTERSECTION(~w + ~w) = ~w',[A,B,SA,SB,I]),
        aggregate(sum(IC),G^(member(G,I),genre_ic(G,IC)),SumIC).

%! genre_ic(?Genre, ?InformationContent:float) is nondet.
%
%    gets the IC of a particular genre. The higher the IC, the rarer and more 'surprising' or information-rich it is.
%
%    for example, many bands are pop, so this would have a low IC. Progressive sludge metal is relatively rare and would have a high IC
%
%    ==
%    InformationContent = -log2( Pr(Genre) )
%    ==
%
genre_ic(G,IC) :-
        get_genre_num_bands(G,Count),
        debug(dbpedia,'|bands| in ~w = ~w',[G,Count]),
        get_num_bands(Total),
        debug(dbpedia,'Total bands = ~w',[Total]),
        seval(-log(Count/Total)/log(2), IC).