1:- module(rdf_matcher,
    2          [
    3           index_pairs/0,
    4           index_pairs/1,
    5           obj_has_prefix/2,
    6           equivalent/2,
    7           set_ontology/1,
    8
    9           filter_mapped_classes/0,
   10
   11           mutate/4,
   12           basic_annot/4,
   13           tr_annot/6,
   14           has_prefix/2,
   15           used_prefix/1,
   16           inject_prefixes/0,
   17
   18           match_is_inexact/1,
   19
   20           create_bitmap_index/0,
   21           atom_bm/3,
   22           atom_semsim_match/4,
   23           pair_semsim_match/5,
   24
   25           class_prop_bm/4,
   26           bm_tokens/2,
   27           bm_resnik/4,
   28           
   29           pair_match/4,
   30           pair_cmatch/4,
   31           inter_pair_match/4,
   32           inter_pair_cmatch/4,
   33           exact_inter_pair_match/11,
   34           new_pair_match/4,
   35           new_pair_cmatch/4,
   36           rightnew_pair_match/4,
   37           rightnew_pair_cmatch/4,
   38           new_unique_pair_match/4,
   39           new_unique_pair_cmatch/4,
   40           new_ambiguous_pair_match/6,
   41           new_ambiguous_pair_cmatch/6,
   42
   43           tri_match/4,
   44           new_unique_match_triad_nc/3,
   45           
   46           transitive_unique_match/2,
   47           transitive_unique_match_set/1,
   48           transitive_unique_match_set_member/2,
   49
   50           transitive_new_match/2,
   51           transitive_new_match_set_pair/3,
   52
   53           eq_from_match/7,
   54           eq_from_shared_xref/5,
   55
   56           unmatched/1,
   57           unmatched_in/2,
   58
   59           remove_inexact_synonyms/0
   60           ]).   61
   62:- use_module(library(porter_stem)).   63:- use_module(library(index_util)).   64:- use_module(library(tabling)).   65:- use_module(library(semweb/rdf11)).   66:- use_module(library(sparqlprog/owl_util)).   67
   68:- use_module(library(settings)).   69:- setting(ontology, atom,'','').   70
   71:- rdf_register_prefix(skos, 'http://www.w3.org/2004/02/skos/core#').   72
   73set_ontology(Ont) :-
   74        debug(rdf_matcher, 'Setting ontology to ~w',[Ont]),
   75        set_setting(ontology, Ont).
   76
   77
   78:- rdf_register_prefix(oio,'http://www.geneontology.org/formats/oboInOwl#').   79%:- rdf_register_prefix(skos,'http://www.w3.org/2004/02/skos/core#"').
   80
   81%:- module_transparent index_pairs/0, index_pairs/1.
   82index_pairs :-
   83        index_pairs(none).
   84index_pairs(Path) :-
   85        materialize_index(obj(+)),
   86        Goals=[
   87               %equivalent(+,+),
   88               basic_annot(+,+,-,-),
   89               tr_annot(+,+,+,-,-,-),
   90               pair_match(+,+,-,-),
   91               inter_pair_match(+,+,-,-)
   92              ],
   93        (   Path=none
   94        ->  maplist(materialize_index,Goals)
   95        ;   materialize_indexes_to_path(Goals, Path)).
   96
   97
   98
   99% TODO: make easier to extend/plugin
  100
  101:- rdf_meta pmap(-,r).
  102
  103pmap(label, rdfs:label).
  104pmap(related, oio:hasRelatedSynonym).
  105pmap(exact, oio:hasExactSynonym).
  106pmap(broad, oio:hasBroadSynonym).
  107pmap(narrow, oio:hasNarrowSynonym).
  108
  109pmap(related, skos:closeMatch).
  110pmap(exact, skos:exactMatch).
  111pmap(broad, skos:broadMatch).
  112pmap(narrow, skos:narrowMatch).
  113
  114pmap(xref, oio:hasDbXref).
  115
  116inexact(broad).
  117inexact(narrow).
  118inexact(related).
  119
  120match_is_inexact(info(P-_,_,_)) :- inexact(P),!.
  121match_is_inexact(info(_-P,_,_)) :- inexact(P),!.
  122match_is_inexact(info(_,_,stem)) :- !.
  123        
  124
  125nonmut(xref).
  126nonmut(id).
  127
  128
  129literal_string(S^^_,S).
  130literal_string(S@_,S).
  131
  132literal_atom(L,A) :- literal_string(L,S),atom_string(A,S).
  133
  134opt_literal_atom(L,A) :- literal_atom(L,A), !.
  135opt_literal_atom(A,A) :- atomic(A).
  136
  137obj(Obj) :-
  138        setof(Obj, rdf(Obj,_ , _), Objs),
  139        member(Obj,Objs),
  140        rdf_is_iri(Obj).
  141
  142%obj(Obj) :-
  143%        setof(Obj, rdf(Obj,rdf:type,owl:'Class'), Objs),
  144%        member(Obj,Objs),
  145%        rdf_is_iri(Obj).
 basic_annot(?Object, ?AnnotProp, ?Val, ?RdfTripleTerm) is nondet
  149basic_annot(Obj,P,V) :-
  150        basic_annot(Obj,P,V,_).
  151basic_annot(Obj,P,V,T) :-
  152        pmap(P,P1),
  153        obj(Obj),
  154        T=rdf(Obj,P1,Lit),
  155        T,
  156        literal_atom(Lit,V).
  157basic_annot(Obj,id,V,id(V)) :-
  158        obj(Obj),
  159        rdf_global_id(Pre:Post,Obj),
  160        concat_atom([Pre,Post],:,V).
  161basic_annot(Obj,uri,Obj,uri(Obj)) :-
  162        obj(Obj).
 tr_annot(?Object, ?AnnotProp, ?MutVal, ?RdfTripleTerm, ?MutFunc, ?OrigVal) is nondet
AnnotProp = id | ...
  168tr_annot(Obj,P,V2,T,F,V) :-
  169        basic_annot(Obj,P,V,T),
  170        \+ nonmut(P),
  171        mutate(F,P,V,V2).
  172tr_annot(Obj,P,V,T,null,V) :-
  173        nonmut(P),
  174        basic_annot(Obj,P,V,T).
  175
  176
  177mutate(stem,_,V,V2) :-
  178        custom_porter_stem(V,V2).
  179mutate(downcase,_,V,V2) :-
  180        downcase_atom(V,V2).
  181
  182% TODO: tokenize
  183custom_porter_stem(T,S) :-
  184        atom_concat(Obj,eous,T),
  185        atom_concat(Obj,eus,T2),
  186        !,
  187        porter_stem(T2,S).
  188custom_porter_stem(T,S) :-
  189        porter_stem(T,S).
  190
  191excluded(C1,_) :-
  192        setting(ontology,P),
  193        P\='',
  194        \+ has_prefix(C1,P).
  195
  196:- dynamic token_index_stored/2.  197token_index(Tok,Ix) :-
  198        (   token_index_stored(Tok,Ix)
  199        *->  true
  200        ;   nonvar(Tok),
  201            gensym('',IxAtom),
  202            atom_number(IxAtom,Ix),
  203            assert(token_index_stored(Tok,Ix))).
  204
  205obj_token(Obj,Tok) :-
  206        obj_token(Obj,Tok,_,_).
  207obj_token(Obj,Tok,P,V) :-
  208        rdf(Obj,rdf:type,_),
  209        tr_annot(Obj,P,V,_,_,_),
  210        every_n(objtoken,10000,debug(index,'OT: ~w ~w ~w',[Obj,P,V])),
  211        concat_atom(Toks,' ',V),
  212        member(Tok,Toks),
  213        atom_length(Tok,TokLen),
  214        % TODO: make this less arbitrary
  215        % designed to filter out things that are not truly word tokens,
  216        % or hard to tokenize chemical names. http://purl.obolibrary.org/obo/CHEBI_36054
  217        TokLen < 50.
  218
  219every_n(Name,Num,Goal) :-
  220        (   nb_current(Name,X)
  221        ->  X2 is X+1
  222        ;   X2=1),
  223        nb_setval(Name,X2),
  224        Mod is X2 mod Num,
  225        (   Mod=0
  226        ->  debug(counter,'~w Iteration ~w',[Name,X2]),
  227            Goal
  228        ;   true).
  229
  230        
  231
  232        
  233
  234:- dynamic position_ic/2.  235create_bitmap_index :-
  236        %assert(token_index_stored('',0)),
  237        debug(index,'Getting all obj-token pairs',[]),
  238        materialize_index(obj_token(+,+,+,+)),
  239        materialize_index(obj_token(+,+)),
  240        debug(index,'getting distinct tokens...',[]),
  241        setof(Token, Obj^obj_token(Obj,Token),Tokens),
  242        debug(index,'aggregating...',[]),
  243        maplist([Token,Count-Token] >> aggregate(count,Obj,obj_token(Obj,Token),Count), Tokens, CTPairs1),
  244        debug(index,'sorting...',[]),
  245        sort(CTPairs1,CTPairs),
  246        %setof(Count-Token, aggregate(count,Obj,obj_token(Obj,Token),Count), CTPairs),
  247        debug(index,'getting all counts...',[]),
  248        findall(Count,member(Count-_,CTPairs),Counts),
  249        sumlist(Counts,Total),
  250        debug(index,'Total objs: ~w',[Total]),
  251        maplist([_-Token,Token-Ix]>>token_index(Token,Ix),CTPairs,_TIPairs),
  252        %debug(bm,'Pairs=~w',[TIPairs]),
  253        debug(index,'Materializing token_index',[]),
  254        materialize_index(token_index(+,+)),
  255        debug(index,'Calculating ICs',[]),
  256        forall((member(Count-Token,CTPairs),token_index(Token,Pos)),
  257               (   IC is -log(Count/Total)/log(2),
  258                   assert(position_ic(Pos,IC)))),
  259        debug(index,'Materializing IC index',[]),
  260        materialize_index(position_ic(+,+)),
  261        debug(index,'Indexing classes',[]),
  262        forall(rdf(C,rdf:type,owl:'Class'),index_class(C)),
  263        debug(index,'Bitmap index complete',[]).
  264        
  265
  266:- dynamic class_prop_bm/4.  267index_class(C) :-
  268        setof(Tok,obj_token(C,Tok,P,V),Toks),
  269        tokens_bm(Toks,BM,_),
  270        assert(class_prop_bm(C,P,V,BM)),
  271        every_n(ixc,1000,debug(index,'Class: ~w',[C])),
  272        fail.
  273index_class(_).
  274
  275atom_bm(A,BM,U) :-
  276        concat_atom(Toks,' ',A),
  277        tokens_bm(Toks,BM,U).
  278
  279tokens_bm(Toks,BM,LenU) :-
  280        maplist([In,N]>>(token_index(In,Ix) -> N is 2**Ix ; N=0),Toks,Nums),
  281        findall(Tok,(member(Tok,Toks),\+token_index(Tok,_)),UToks),
  282        length(UToks,LenU),
  283        sumlist(Nums,BM).
  284
  285bm_simJ(A,B,S) :-
  286        I is A /\ B,
  287        CI is popcount(I),
  288        (   CI=0
  289        ->  S=0
  290        ;   U is A \/ B,
  291            CU is popcount(U),
  292            S is CI/CU).
  293
  294% as above, with unmatched
  295bm_simJ(A,B,Unmatched,S) :-
  296        I is A /\ B,
  297        (   I=0
  298        ->  S=0
  299        ;   U is A \/ B,
  300            CI is popcount(I),
  301            CU is popcount(U) + Unmatched,
  302            S is CI/CU).
  303
  304% fuzzy match of how much A is subsumed by B
  305%  e.g. a b c subsumed_by a c
  306%  e.g. a b c d partially subsumed_by a e
  307bm_subsumed_by_simJ(A,B,S) :-
  308        I is A /\ B,
  309        (   I=0
  310        ->  S=0
  311        ;   CI is popcount(I),
  312            CU is popcount(B),
  313            S is CI/CU).
  314
  315bm_resnik(A,B,Unmatched,S) :-
  316        I is A /\ B,
  317        CI is popcount(I),
  318        (   CI=0
  319        ->  S=0
  320        ;   U is A \/ B,
  321            bm_sum_ic(I,TI),
  322            bm_sum_ic(U,TU),
  323            % default IC for unmatched
  324            Penalty is Unmatched * 5,
  325            S is TI/(TU+Penalty)).
  326
  327bm_subsumed_by_resnik(A,B,S) :-
  328        I is A /\ B,
  329        (   I=0
  330        ->  S=0
  331        ;   bm_sum_ic(I,TI),
  332            bm_sum_ic(B,TU),
  333            S is TI/TU).
  334
  335
  336
  337atom_semsim_match(A,Cls,S,Method) :-
  338        atom_semsim_match(A,Cls,_,_,S,Method).
  339atom_semsim_match(A,Cls,P,V,S,simj) :-
  340        atom_bm(A,ABM,AU),
  341        class_prop_bm(Cls,P,V,CBM),
  342        bm_simJ(ABM,CBM,AU,S).
  343atom_semsim_match(A,Cls,P,V,S,subsumed_by_simj) :-
  344        atom_bm(A,ABM,AU),
  345        class_prop_bm(Cls,P,V,CBM),
  346        bm_subsumed_by_simj(ABM,CBM,AU,S).
  347atom_semsim_match(A,Cls,P,V,S,icratio) :-
  348        atom_bm(A,ABM,AU),
  349        class_prop_bm(Cls,P,V,CBM),
  350        bm_resnik(ABM,CBM,AU,S).
  351atom_semsim_match(A,Cls,P,V,S,subsumed_by_icratio) :-
  352        atom_bm(A,ABM,_),
  353        class_prop_bm(Cls,P,V,CBM),
  354        bm_subsumed_by_resnik(ABM,CBM,S).
  355
  356pair_semsim_match(icratio,C1,C2,Info,S) :-
  357        Info=info(P1-P2,V1-V2,u),
  358        class_prop_bm(C1,P1,V1,BM1),
  359        class_prop_bm(C2,P2,V2,BM2),
  360        bm_resnik(BM1,BM2,0,S).
  361
  362
  363bm_sum_ic(BM,SumIC) :-
  364        bm_positions(BM,Posns),
  365        maplist([Pos,IC]>>position_ic(Pos,IC),Posns,ICs),
  366        sumlist(ICs,SumIC).
  367
  368bm_tokens(BM, Toks) :-
  369        bm_positions(BM, Posns),
  370        maplist([Pos,Tok]>>token_index(Tok,Pos), Posns, Toks).
 bm_positions(+AV:int, ?AL:list)
True if AV is an integer bit vector with the attributes in AL set
  375bm_positions(AV,AL) :-
  376        bm_positions(AV,AL,65536).
  377
  378bm_positions(AV,AL,Window) :-
  379        Mask is 2**Window -1,
  380        bm_positions(AV,ALx,0,Window,Mask),
  381        flatten(ALx,AL).
 bm_positions(+AV:int, ?AL:list, +Pos, +Window, +Mask) is det
Mask must = Window^2 -1 (not checked) shifts AV down Window bits at a time. If there are any bits in the window, use bm_positions_lo/2 to get the attribute list from this window. note resulting list must be flattened. todo: difference list impl?
  389bm_positions(0,[],_,_,_) :- !.
  390bm_positions(AV,AL,Pos,Window,Mask) :-
  391        !,
  392        NextBit is AV /\ Mask,
  393        AVShift is AV >> Window,
  394        NextPos is Pos+Window,
  395        (   NextBit=0
  396        ->  bm_positions(AVShift,AL,NextPos,Window,Mask)
  397        ;   bm_positions_lo(NextBit,ALNew,Pos),
  398            AL=[ALNew|AL2],
  399            bm_positions(AVShift,AL2,NextPos,Window,Mask)).
  400
  401:- table bm_positions_lo/2.  402
  403% as bm_positions/2, but checks one bit at a time
  404bm_positions_lo(AV,AL) :-
  405        bm_positions_lo(AV,AL,0).
  406
  407bm_positions_lo(0,[],_) :- !.
  408bm_positions_lo(AV,AL,Pos) :-
  409        NextBit is AV /\ 1,
  410        AVShift is AV >> 1,
  411        NextPos is Pos+1,
  412        (   NextBit=1
  413        ->  AL=[Pos|AL2]
  414        ;   AL=AL2),
  415        !,
  416        bm_positions_lo(AVShift,AL2,NextPos).
  417
  418%% pair_match(?Class1, ?Class2, ?SharedVal, Info) is nondet
  419%
  420% Info = ?AP1, ?AP2, ?Triple1, ?Triple2, ?MutFunc
  421:- rdf_meta pair_match(r,r,-,-).
  422:- rdf_meta pair_match(r,r).
  423pair_match(C1,C2,V,Info) :-
  424        Info=info(P1-P2,T1-T2,MutFunc),
  425        tr_annot(C1,P1,V,T1,MutFunc,_),
  426        tr_annot(C2,P2,V,T2,MutFunc,_),
  427        \+ excluded(C1,C2),
  428        C1\=C2.
  429pair_match(C1,C2) :- pair_match(C1,C2,_,_).
  430
  431pair_cmatch(C1,C2,V,Info) :-
  432        rdf_global_id(C1,C1x),
  433        rdf_global_id(C2,C2x),
  434        pair_match(C1x,C2x,V,Info).
  435
  436
  437:- rdf_meta inter_pair_match(r,r,-,-).
  438inter_pair_match(C1,C2,V,Info) :-
  439        pair_match(C1,C2,V,Info),
  440        has_prefix(C1,Pfx1),
  441        has_prefix(C2,Pfx2),
  442        Pfx1 \= Pfx2.
  443inter_pair_match(null,null,null,null).
  444
  445inter_pair_cmatch(C1,C2,V,Info) :-
  446        rdf_global_id(C1,C1x),
  447        rdf_global_id(C2,C2x),
  448        inter_pair_match(C1x,C2x,V,Info).
  449
  450
  451%:- rdf_meta new_pair_match(r,r,-,-).
  452new_pair_match(C1,C2,V,Info) :-
  453        inter_pair_match(C1,C2,V,Info),
  454        has_prefix(C1,Pfx1),
  455        has_prefix(C2,Pfx2),
  456        unmatched_in(C1, Pfx2),
  457        unmatched_in(C2, Pfx1).
  458
  459new_pair_cmatch(C1,C2,V,Info) :-
  460        rdf_global_id(C1,C1x),
  461        rdf_global_id(C2,C2x),
  462        new_pair_match(C1x,C2x,V,Info).
  463
  464% satisfied if C1 and C2 match
  465% AND C2 has no other match in the same namespace as C1
  466:- rdf_meta rightnew_pair_match(r,r,-,-).
  467rightnew_pair_match(C1,C2,V,Info) :-
  468        inter_pair_match(C1,C2,V,Info),
  469        has_prefix(C1,Pfx1),
  470        unmatched_in(C2, Pfx1).
  471
  472rightnew_pair_cmatch(C1,C2,V,Info) :-
  473        rdf_global_id(C1,C1x),
  474        rdf_global_id(C2,C2x),
  475        rightnew_pair_match(C1x,C2x,V,Info).
  476
  477
  478% satisfied if C1 and C2 match
  479% AND C2 has no other match in the same namespace as C1
  480% AND C1 has no other match in the same namespace as C2
  481:- rdf_meta new_unique_pair_match(r,r,-,-).
  482new_unique_pair_match(C1,C2,V,Info) :-
  483        new_pair_match(C1,C2,V,Info),
  484        has_prefix(C1,Pfx1),
  485        has_prefix(C2,Pfx2),
  486        \+ ((pair_match(C1,AltC2,_,_)),
  487            AltC2\=C2,
  488            has_prefix(AltC2,Pfx2)),
  489        \+ ((pair_match(AltC1,C2,_,_)),
  490            AltC1\=C1,
  491            has_prefix(AltC1,Pfx1)).
  492
  493tri_match(C1,C2,C3,Info) :-
  494        new_unique_pair_match(C1,C2,_,_),
  495        new_unique_pair_match(C2,C3,_,_),
  496        C3\=C1,
  497        (   equivalent(C1,C3)
  498        ->  Info=agree
  499        ;   (   new_unique_pair_match(C1,C3,_,_)
  500            ->  Info=all_match
  501            ;   Info=mismatch)).
  502
  503        
  504
  505new_unique_pair_cmatch(C1,C2,V,Info) :-
  506        rdf_global_id(C1,C1x),
  507        rdf_global_id(C2,C2x),
  508        new_unique_pair_match(C1x,C2x,V,Info).
  509
  510
  511new_ambiguous_pair_match(C1,C2,AltC1,AltC2,V,Info) :-
  512        new_pair_match(C1,C2,V,Info),
  513        has_prefix(C1,Pfx1),
  514        has_prefix(C2,Pfx2),
  515        (   pair_match(C1,AltC2,_,_),
  516            AltC2\=C2,
  517            has_prefix(AltC2,Pfx2),
  518            AltC1='-'
  519        ;   pair_match(AltC1,C2,_,_),
  520            AltC1\=C1,
  521            has_prefix(AltC1,Pfx1),
  522            AltC2='-').
  523
  524new_ambiguous_pair_cmatch(C1,C2,AltC1,AltC2,V,Info) :-
  525        rdf_global_id(C1,C1x),
  526        rdf_global_id(C2,C2x),
  527        new_ambiguous_pair_match(C1x,C2x,AltC1,AltC2,V,Info).
  528
  529
  530exact_inter_pair_match(C,X,V,Info) :-
  531        inter_pair_match(C,X,V,Info),
  532        \+ match_is_inexact(Info).
  533        
  534exact_inter_pair_match(C,X,CParents,XParents,Conf,V,Info,AltCs,AltXs,IgnoredCs,IgnoredXs) :-
  535        exact_inter_pair_match(C,X,V,Info),
  536        findall(X2,alt_exact_inter_pair_match(C,X,X2),AltXs),
  537        findall(C2,alt_exact_inter_pair_match(X,C,C2),AltCs),
  538        findall(X2,alt_inexact_inter_pair_match(C,X,X2),IgnoredXs),
  539        findall(C2,alt_inexact_inter_pair_match(X,C,C2),IgnoredCs),
  540        (   AltXs=[],
  541            AltCs=[]
  542        ->  (   IgnoredXs=[],
  543                IgnoredCs=[]
  544            ->  Conf=high
  545            ;   Conf=medium)
  546        ;   Conf=low),
  547        findall(Parent,entity_parent(C,Parent),CParents),
  548        findall(Parent,entity_parent(X,Parent),XParents).
  549
  550entity_parent(X,Parent) :-
  551        rdf(X,rdfs:subClassOf,Parent),
  552        rdf_is_iri(Parent).
  553entity_parent(X,Parent) :-
  554        subclass_of_some(X,R,Parent),
  555        parent_relation(R).
  556entity_parent(X,Parent) :-
  557        rdf(X,R,Parent),
  558        parent_relation(R).
  559entity_parent(X,Parent) :-
  560        rdf(X,rdf:type,Parent),
  561        \+ rdf_global_id(rdf:_,Parent),
  562        \+ rdf_global_id(rdfs:_,Parent),
  563        \+ rdf_global_id(owl:_,Parent).
  564
  565
  566alt_exact_inter_pair_match(C,X,X2) :-
  567        exact_inter_pair_match(C,X2,_,_),
  568        X2\=X.
  569alt_inexact_inter_pair_match(C,X,X2) :-
  570        inter_pair_match(C,X2,_,Info),
  571        X2\=X,
  572        match_is_inexact(Info).
  573
  574
  575
  576/*
  577  UNIQUE MATCH CLUSTERS
  578*/
  579
  580%:- table transitive_unique_match/2.
  581transitive_unique_match(A,B) :-
  582        new_unique_pair_match(A,Z,_,_),
  583        transitive_unique_match(Z,B).
  584transitive_unique_match(A,B) :-
  585        new_unique_pair_match(A,B,_,_).
  586
  587
  588new_unique_match_triad_nc(A,B,C) :-
  589        new_unique_pair_match(A,B,_,_),
  590        A @< B,
  591        \+ equivalent(A,_),
  592        \+ equivalent(B,_),
  593        new_unique_pair_match(B,C,_,_),
  594        B @< C,
  595        \+ equivalent(C,_).
  596
  597
  598
  599
  600transitive_unique_match_set(Bs) :-
  601        obj(A),
  602        recursive_expand(A,transitive_unique_match,Bs),
  603        %setof(B,transitive_unique_match(A,B),Bs),
  604        Bs=[_,_,_|_],
  605        \+ ((member(Z,Bs),
  606            equivalent(Z,_))).
 transitive_unique_match_set_member(?X, ?M) is nondet
X is the reference member of a clique, and M is a member
  613transitive_unique_match_set_member(X,M) :-
  614        transitive_unique_match_set(Set),
  615        member(M,Set),
  616        Set=[X|_].
  617
  618/*
  619  NEW MATCH CLUSTERS
  620*/
  621
  622%:- table transitive_new_match/2.
  623transitive_new_match(A,B) :-
  624        new_pair_match(A,Z,_,_),
  625        transitive_new_match(Z,B).
  626transitive_new_match(A,B) :-
  627        new_pair_match(A,B,_,_).
  628
  629transitive_new_match_set(Bs) :-
  630        obj(A),
  631        recursive_expand(A,transitive_new_match,Bs),
  632        Bs=[_,_,_|_].
  633transitive_new_match_set_member(X,M) :-
  634        transitive_new_match_set(Set),
  635        member(M,Set),
  636        Set=[X|_].
  637transitive_new_match_set_pair(X,A,B) :-
  638        transitive_new_match_set(Set),
  639        Set=[X|_],
  640        member(A,Set),
  641        member(B,Set),
  642        A@<B,
  643        new_pair_match(A,B,_,_).
  644
  645
  646recursive_expand(A,Pred,RSet) :-
  647        set_recursive_expand([A],Pred,[],RSet).
  648set_recursive_expand([],_,RSet,RSet2) :-
  649        sort(RSet,RSet2).
  650set_recursive_expand([A|Seeds],Pred,Visited,RSet) :-
  651        Goal=..[Pred,A,X],
  652        (   setof(X,(Goal,\+member(X,Visited)),Xs)
  653        ->  ord_union(Seeds,Xs,Seeds2),
  654            set_recursive_expand(Seeds2,Pred,[A|Visited],RSet)
  655        ;   set_recursive_expand(Seeds,Pred,[A|Visited],RSet)).
 eq_from_match(?ClsA, ?ClsB, ?SynPredA, ?SynPredB, ?MutateOp, ?OntA, ?OntB) is nondet
true if pair ClsA and ClsB match using the given predicate pair (e.g. exact,exact) after MutateOp performed, and belong to OntA and OntB (prefixes)
  662eq_from_match(A,B,APred,BPred,Mut,OntA,OntB) :-
  663        inter_pair_match(A,B,_,info(APred-BPred, _, Mut)),
  664        has_prefix(A,OntA),
  665        has_prefix(B,OntB).
 eq_from_shared_xref(?ClsA, ?ClsB, ?SharedXrefOnt, ?OntA, ?OntB) is nondet
  668eq_from_shared_xref(A,B,OntX,OntA,OntB) :-
  669        inter_pair_match(A,B,X,info(xref-xref, _, null)),
  670        has_prefix(A,OntA),
  671        has_prefix(B,OntB),
  672        has_prefix(X,OntX).
  673
  674used_prefixes(Ps) :-
  675        setof(P,used_prefix1(P),Ps).
  676used_prefix(P) :-
  677        setof(P,used_prefix1(P),Ps),
  678        member(P,Ps).
  679used_prefix1(P) :-
  680        rdf(X,rdf:type,_),
  681        has_prefix(X,P).
  682
  683guess_uribase(X,U) :-
  684        rdf(X,_,_),
  685        defrag(X,U,_).
  686
  687defrag(X,U,Frag) :-
  688        concat_atom(Parts,'/',X),
  689        reverse(Parts,[Frag|Rev]),
  690        reverse(Rev,Parts2),
  691        concat_atom(Parts2,'/',U).
  692
  693declare_additional_prefixes :-
  694        rdf(X,'http://www.w3.org/ns/shacl#prefix',^^(Prefix1,_)),
  695        rdf(X,'http://www.w3.org/ns/shacl#namespace', ^^(NS1,_)),
  696        atom_string(Prefix,Prefix1),
  697        atom_string(NS,NS1),
  698        rdf_register_prefix(Prefix, NS),
  699        debug(rdf_matcher,'Registered ~w ~w',[Prefix,NS]),
  700        fail.
  701declare_additional_prefixes.
  702
  703
  704inject_prefixes :-
  705        declare_additional_prefixes,
  706        (   used_prefixes(Ps),
  707            debug(rdf_matcher,'Found these prefixes: ~w',[Ps]),
  708            Ps\=[_,_|_]
  709        ->  force_inject_prefixes
  710        ;   true).
  711force_inject_prefixes :-
  712        debug(rdf_matcher,'Injecting prefixes',[]),
  713        setof(U,X^guess_uribase(X,U),Us),
  714        debug(rdf_matcher,'Bases = ~w',[Us]),
  715        Us=[_,_|_],
  716        forall((member(U,Us),defrag(U,_,Prefix)),
  717               (   debug(rdf_matcher,'Register: ~w : ~w',[Prefix,U]),
  718                   rdf_register_prefix(Prefix, U))),
  719        !.
  720force_inject_prefixes :-
  721        throw(error(cannot_guess_prefixes)).
  722
  723        
  724filter_mapped_classes :-
  725        setof(C,C2^equivalent(C,C2),MappedCs),
  726        forall(member(C,MappedCs),
  727               rdf_retractall(C,_,_,_)).
  728        
  729equivalent(C1,C2) :- rdf(C1,owl:equivalentClass,C2).
  730equivalent(C1,C2) :- rdf(C2,owl:equivalentClass,C1).
  731equivalent(C1,C2) :- rdf(C1,skos:exactMatch,C2).
  732equivalent(C1,C2) :- rdf(C2,skos:exactMatch,C1).
  733
  734obj_has_prefix(C,P) :- obj(C),has_prefix(C,P).
  735
  736has_prefix(C,P) :- atomic(C), rdf_global_id(P:_, C).
  737has_prefix(C,P) :- atomic(C), \+ rdf_global_id(_:_, C), concat_atom([P,_],:,C).
  738has_prefix(P:_,P).
 unmatched(?Cls) is nondet
true if Cls has no equivalent class
  743unmatched(C) :-
  744        obj(C),
  745        \+ equivalent(C,_).
 unmatched_in(?Cls, ?ExtPrefix) is nondet
true if Cls has no equivalent class with prefix ExtPrefix
  750unmatched_in(C, ExtPrefix) :-
  751        obj(C),  
  752        \+ ((equivalent(C,C2),
  753             has_prefix(C2, ExtPrefix))).
  754
  755
  756remove_inexact_synonyms :-
  757        T=rdf(_,P,_),
  758        findall(T,
  759                (   inexact(PN),
  760                    pmap(PN,P),
  761                    T),
  762                Ts),
  763        forall(member(rdf(S,P,O),Ts),
  764               (   debug(rdf_matcher,'Removing: ~w ~w ~w',[S,P,O]),
  765                   rdf_retractall(S,P,O))).
  766
  767
  768parent_relation('http://purl.obolibrary.org/obo/gaz#located_in').
  769parent_relation('http://purl.obolibrary.org/obo/RO:0001025').
  770parent_relation('http://purl.obolibrary.org/obo/BFO_0000050')