1:- module(auc,[compute_areas/5,compute_areas_diagrams/5, compute_maxacc/2]).

auc

This module computes the Area Under the Receiving Operating Characteristics and Precision Recall curves using the method of Davis, Jesse, and Mark Goadrich. "The relationship between Precision-Recall and ROC curves." Proceedings of the 23rd international conference on Machine learning. ACM, 2006.

author: - Fabrizio Riguzzi
license: - Artistic License 2.0 */

compute_areas(+LG:list, -AUCROC:float, -ROC:list, -AUCPR:float, -PR:list) is det

The predicate takes as input

a list LG of pairs probability-literal in ascending order on probability where the literal can be an Atom (indicating a positive example) or \+ Atom, indicating a negative example while the probability is the probability of Atom of being true
The predicate returns
AUCROC: the size of area under the ROC curve
ROC: the ROC curve as a list of points that are couples of the form x-y
AUCPR: the size of the area under the PR curve
PR: the PR curve as a list of points that are couples of the form x-y

   30compute_areas(LG,AUCROC,ROC,AUCPR,PR):-
   31  must_be(list, LG),
   32  must_be(var, AUCROC),
   33  must_be(var, ROC),
   34  must_be(var, AUCPR),
   35  must_be(var, PR),
   36  findall(E,member(_- \+(E),LG),Neg),
   37  length(LG,NEx),
   38  length(Neg,NNeg),
   39  NPos is NEx-NNeg,
   40  keysort(LG,LG1),
   41  reverse(LG1,LG2),
   42  catch(compute_pointsroc(LG2,+1e20,0,0,NPos,NNeg,[],ROC), 
   43    error(evaluation_error(zero_divisor),_), 
   44    ROC = []
   45  ),
   46  hull(ROC,0,0,0,AUCROC),
   47  compute_aucpr(LG2,NPos,NNeg,AUCPR,PR).

compute_areas_diagrams(+LG:list, -AUCROC:float, -ROC:dict, -AUCPR:float, -PR:dict) is det

The predicate takes as input

a list LG of pairs probability-literal in ascending order on probability where the literal can be an Atom (indicating a positive example) or \+ Atom, indicating a negative example while the probability is the probability of Atom of being true
The predicate returns
AUCROC: the size of the area under the ROC curve
ROC: the ROC curve as a dict that can be visualized with the c3 renderer of SWISH
AUCPR: the size of the area under the PR curve
PR: the PR curve as a dict that can be visualized with the c3 renderer of SWISH See http://cplint.lamping.unife.it/example/exauc.pl for an example
*/

   69compute_areas_diagrams(LG,AUCROC,ROC,AUCPR,PR):-
   70  must_be(list, LG),
   71  must_be(var, AUCROC),
   72  must_be(var, ROC),
   73  must_be(var, AUCPR),
   74  must_be(var, PR),
   75  compute_areas(LG,AUCROC,ROC0,AUCPR,PR0),
   76  %  write(ROC0),nl,write(PR0),nl,
   77  ROC = c3{data:_{x:x, rows:[x-'ROC'|ROC0]},
   78    axis:_{x:_{min:0.0,max:1.0,padding:0.0,
   79        tick:_{values:[0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]}},
   80           y:_{min:0.0,max:1.0,padding:_{bottom:0.0,top:0.0},
   81        tick:_{values:[0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]}}}},
   82  PR = c3{data:_{x:x, rows:[x-'PR'|PR0]},
   83    axis:_{x:_{min:0.0,max:1.0,padding:0.0,
   84        tick:_{values:[0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]}},
   85           y:_{min:0.0,max:1.0,padding:_{bottom:0.0,top:0.0},
   86        tick:_{values:[0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]}}}}.

compute_maxacc(+LG:list, -MaxAcc) is det

The predicate takes as input

a list LG of pairs probability-literal in ascending order on probability where the literal can be an Atom (indicating a positive example) or \+ Atom, indicating a negative example while the probability is the probability of Atom of being true
The predicate returns
MaxAcc: the maximum obtainable accuracy See http://cplint.lamping.unife.it/example/exauc.pl for an example
*/

  104compute_maxacc(LG, MaxAcc) :-
  105  must_be(list, LG),
  106  must_be(var, MaxAcc),
  107  findall(E,member(_- \+(E),LG),Neg), %find all the pairs that contain a negative examples
  108  length(LG,NEx),
  109  length(Neg,NNeg),
  110  NPos is NEx-NNeg,
  111  keysort(LG,LG1), % ascending order of the pairs on probabilities
  112  reverse(LG1,LG2), % discending order of the pairs on probabilities
  113  compute_acc_list(LG2, 0, 0, NPos, NNeg, [], AccList),
  114  max_list(AccList, MaxAcc).

compute_acc_list(+LG:list, +TP:int, +FP:int, +FN:int, +TN:int, +AccList0:list, -AccList:list) is det

The predicate takes as input

LG: a list LG of pairs probability-literal in ascending order on probability where the literal can be an Atom (indicating a positive example) or \+ Atom, indicating a negative example while the probability is the probability of Atom of being true
TP: the current number of true positive examples
FP: the current number of false positive examples
FN: the current number of false negative examples
TN: the current number of true negative examples
AccList0: the current list of the possible accuracies The predicate returns
AccList: a list of all the possible accuracies */

  135compute_acc_list([], TP, FP, FN, TN, AccList0, AccList) :-
  136  Acc is (TP+TN)/(TP+TN+FP+FN),
  137  append(AccList0, [Acc], AccList).
  138
  139compute_acc_list([_P- (\+ _)|T], TP, FP, FN, TN, AccList0, AccList):-!,
  140  Acc is (TP+TN)/(TP+TN+FP+FN),
  141  append(AccList0, [Acc], AccListNew),% append the new accuracy (it creates a new list called AccListNew)
  142  FP1 is FP+1,
  143  TN1 is TN-1,
  144  compute_acc_list(T, TP, FP1, FN, TN1, AccListNew, AccList).
  145
  146compute_acc_list([_P- _|T], TP, FP, FN, TN, AccList0, AccList):-!,
  147  Acc is (TP+TN)/(TP+TN+FP+FN),
  148  append(AccList0, [Acc], AccListNew),
  149  TP1 is TP+1,
  150  FN1 is FN-1,
  151  compute_acc_list(T, TP1, FP, FN1, TN, AccListNew, AccList).
  152
  153
  154
  155
  156compute_pointsroc([],_P0,_TP,_FP,_FN,_TN,P0,P1):-!,
  157  append(P0,[1.0-1.0],P1).
  158
  159compute_pointsroc([P- (\+ _)|T],P0,TP,FP,FN,TN,Po0,Po1):-!,
  160  (P<P0->
  161    FPR is FP/(FP+TN),
  162    TPR is TP/(TP+FN),
  163    append(Po0,[(FPR-TPR)],Po2),
  164    P1=P
  165  ;
  166    Po2=Po0,
  167    P1=P0
  168  ),
  169  FP1 is FP+1,
  170  TN1 is TN-1,
  171  compute_pointsroc(T,P1,TP,FP1,FN,TN1,Po2,Po1).
  172
  173compute_pointsroc([P- _|T],P0,TP,FP,FN,TN,Po0,Po1):-!,
  174  (P<P0->
  175    FPR is FP/(FP+TN),
  176    TPR is TP/(TP+FN),
  177    append(Po0,[FPR-TPR],Po2),
  178    P1=P
  179  ;
  180    Po2=Po0,
  181    P1=P0
  182  ),
  183  TP1 is TP+1,
  184  FN1 is FN-1,
  185  compute_pointsroc(T,P1,TP1,FP,FN1,TN,Po2,Po1).
  186
  187
  188hull([],FPR,TPR,AUC0,AUC1):-
  189  AUC1 is AUC0+(1-FPR)*(1+TPR)/2.
  190
  191
  192hull([FPR1-TPR1|T],FPR,TPR,AUC0,AUC1):-
  193  AUC2 is AUC0+(FPR1-FPR)*(TPR1+TPR)/2,
  194  hull(T,FPR1,TPR1,AUC2,AUC1).
  195
  196compute_aucpr(L,Pos,Neg,A,PR):-
  197  L=[P_0-E|TL],
  198  (E= (\+ _ )->
  199    FP=1,
  200    TP=0,
  201    FN=Pos,
  202    TN is Neg -1
  203  ;
  204    FP=0,
  205    TP=1,
  206    FN is Pos -1,
  207    TN=Neg
  208  ),
  209  compute_curve_points(TL,P_0,TP,FP,FN,TN,Points),
  210  Points=[R0-P0|_TPoints],
  211  (R0=:=0,P0=:=0->
  212    Flag=true
  213  ;
  214    Flag=false
  215  ),
  216  area(Points,Flag,Pos,0,0,0,A,[],PR).
  217
  218compute_curve_points([],_P0,TP,FP,_FN,_TN,[1.0-Prec]):-!,
  219  Prec is TP/(TP+FP).
  220
  221compute_curve_points([P- (\+ _)|T],P0,TP,FP,FN,TN,Pr):-!,
  222  (P<P0->
  223    Prec is TP/(TP+FP),
  224    Rec is TP/(TP+FN),
  225    Pr=[Rec-Prec|Pr1],
  226    P1=P
  227  ;
  228    Pr=Pr1,
  229    P1=P0
  230  ),
  231  FP1 is FP+1,
  232  TN1 is TN-1,
  233  compute_curve_points(T,P1,TP,FP1,FN,TN1,Pr1).
  234
  235compute_curve_points([P- _|T],P0,TP,FP,FN,TN,Pr):-!,
  236  (P<P0->
  237    Prec is TP/(TP+FP),
  238    Rec is TP/(TP+FN),
  239    Pr=[Rec-Prec|Pr1],
  240    P1=P
  241  ;
  242    Pr=Pr1,
  243    P1=P0
  244  ),
  245  TP1 is TP+1,
  246  FN1 is FN-1,
  247  compute_curve_points(T,P1,TP1,FP,FN1,TN,Pr1).
  248
  249area([],_Flag,_Pos,_TPA,_FPA,A,A,PR,PR).
  250
  251area([R0-P0|T],Flag,Pos,TPA,FPA,A0,A,PR0,PR):-
  252 TPB is R0*Pos,
  253  (TPB=:=0->
  254    A1=A0,
  255    FPB=0,
  256    PR2=PR0,
  257    PR=[R0-P0|PR3]
  258  ;
  259    R_1 is TPA/Pos,
  260    (TPA=:=0->
  261      (Flag=false->
  262        P_1=P0,
  263	PR=[0.0-P0|PR3]
  264      ;
  265        P_1=0.0,
  266	PR=[0.0-0.0|PR3]
  267      )
  268    ;
  269      P_1 is TPA/(TPA+FPA),
  270      PR=PR3
  271    ),
  272    FPB is TPB*(1-P0)/P0,
  273    N is TPB-TPA+0.5,
  274    (N<1.0->
  275      append(PR0,[R0-P0],PR2),
  276      A1=A0
  277    ;
  278      interpolate(1,N,Pos,R_1,P_1,TPA,FPA,TPB,FPB,A0,A1,[],PR1),
  279      append(PR0,PR1,PR2)
  280    )
  281  ),
  282  area(T,Flag,Pos,TPB,FPB,A1,A,PR2,PR3).
  283
  284interpolate(I,N,_Pos,_R0,_P0,_TPA,_FPA,_TPB,_FPB,A,A,PR,PR):-I>N,!.
  285
  286interpolate(I,N,Pos,R0,P0,TPA,FPA,TPB,FPB,A0,A,PR0,[R-P|PR]):-
  287  R is (TPA+I)/Pos,
  288  P is (TPA+I)/(TPA+I+FPA+(FPB-FPA)/(TPB-TPA)*I),
  289  A1 is A0+(R-R0)*(P+P0)/2,
  290  I1 is I+1,
  291  interpolate(I1,N,Pos,R,P,TPA,FPA,TPB,FPB,A1,A,PR0,PR)