1/*  Author:        Jan Wielemaker
    2    E-mail:        J.Wielemaker@vu.nl
    3    WWW:           http://www.swi-prolog.org
    4    Copyright (c)  2017, VU University Amsterdam
    5    All rights reserved.
    6
    7    Redistribution and use in source and binary forms, with or without
    8    modification, are permitted provided that the following conditions
    9    are met:
   10
   11    1. Redistributions of source code must retain the above copyright
   12       notice, this list of conditions and the following disclaimer.
   13
   14    2. Redistributions in binary form must reproduce the above copyright
   15       notice, this list of conditions and the following disclaimer in
   16       the documentation and/or other materials provided with the
   17       distribution.
   18
   19    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   20    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   21    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   22    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   23    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   24    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   25    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   26    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   27    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   29    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30    POSSIBILITY OF SUCH DAMAGE.
   31*/
   32
   33:- module(wordnet,
   34	  [ wn_s/6,			% basic Wordnet relations
   35	    wn_g/2,
   36	    wn_hyp/2,
   37	    wn_ins/2,
   38	    wn_ent/2,
   39	    wn_sim/2,
   40	    wn_mm/2,
   41	    wn_ms/2,
   42	    wn_mp/2,
   43	    wn_der/4,
   44	    wn_cls/5,
   45	    wn_cs/2,
   46	    wn_vgp/4,
   47	    wn_at/2,
   48	    wn_ant/4,
   49	    wn_sa/4,
   50	    wn_sk/3,
   51	    wn_syntax/3,
   52	    wn_ppl/4,
   53	    wn_per/4,
   54	    wn_fr/3,
   55
   56	    wn_cat/3,			% +SynSet, -SyntacticCategory, -Offset
   57	    ss_type/2,			% +Code, -Type
   58
   59	    load_wordnet/0		% force loading everything
   60	  ]).

Wordnet lexical and semantic database

This module discloses the Wordnet Prolog files is a more SWI-Prolog friendly manner. It exploits SWI-Prolog demand-loading and SWI-Prolog Quick Load Files to load `just-in-time' and as quickly as possible.

The system creates Quick Load Files for each wordnet file needed if the .qlf file doesn't exist and the wordnet directory is writeable. For shared installations it is adviced to run load_wordnet/0 as user with sufficient privileges to create the Quick Load Files.

This library defines a portray/1 rule to explain synset ids.

Some more remarks:

author
- Originally by Jan Wielemaker. Partly documented by Samer Abdallah. Current comments copied from prologdb.5WN.html file from the sources.
See also
- Wordnet is a lexical database for the English language. See http://www.cogsci.princeton.edu/~wn/ */
   95		 /*******************************
   96		 *          FIND WORDNET	*
   97		 *******************************/
   98
   99:- multifile user:file_search_path/2.  100
  101user:file_search_path(wndb, WNDB) :-
  102    (   getenv('WNDB', WNDB)
  103    ->  true
  104    ;   current_prolog_flag(windows, true)
  105    ->  WNDB = 'C:\\Program Files\\WordNet\\3.0'
  106    ;   WNDB = '/usr/local/WordNet-3.0'
  107    ).
  108
  109haswndb :-
  110    absolute_file_name(wndb(wn_s), _,
  111                       [ file_type(prolog),
  112                         access(read),
  113                         file_errors(fail)
  114                       ]).
  115checkwndb :-
  116    haswndb,
  117    !.
  118checkwndb :-
  119    print_message(error, wordnet(nodb)).
  120
  121:- initialization
  122    checkwndb.
 wn_op(PredSpec) is nondet
Definition of wordnet operator types.
  129wn_op(ant(synset_id, w_num, synset_id, w_num)).
  130wn_op(at(synset_id, synset_id)).
  131wn_op(cls(synset_id, w_num, synset_id, wn_num, class_type)).
  132wn_op(cs(synset_id, synset_id)).
  133wn_op(der(synset_id, w_num, synset_id, wn_num)).
  134wn_op(ent(synset_id, synset_id)).
  135wn_op(fr(synset_id, w_num, f_num)).
  136wn_op(g(synset_id, '(gloss)')).
  137wn_op(hyp(synset_id, synset_id)).
  138wn_op(ins(synset_id, synset_id)).
  139wn_op(mm(synset_id, synset_id)).
  140wn_op(mp(synset_id, synset_id)).
  141wn_op(ms(synset_id, synset_id)).
  142wn_op(per(synset_id, w_num, synset_id, w_num)).
  143wn_op(ppl(synset_id, w_num, synset_id, w_num)).
  144wn_op(s(synset_id, w_num, 'word', ss_type, sense_number, tag_count)).
  145wn_op(sa(synset_id, w_num, synset_id, w_num)).
  146wn_op(sim(synset_id, synset_id)).
  147wn_op(sk(synset_id, w_num, sense_key)).
  148wn_op(syntax(synset_id, w_num, syntax)).
  149wn_op(vgp(synset_id, w_num, synset_id, w_num)).
  150
  151:- if(current_prolog_flag(xref, true)).  152
  153% this  declaration  ensures  that  the  (ClioPatria)  cross  referencer
  154% considers this predicates defined. We should not really define them as
  155% handling these predicates is based on autoloading.
  156
  157:- dynamic
  158    ant/4,
  159    at/2,
  160    cls/5,
  161    cs/2,
  162    der/4,
  163    ent/2,
  164    fr/3,
  165    g/2,
  166    hyp/2,
  167    ins/2,
  168    mm/2,
  169    ms/2,
  170    per/4,
  171    ppl/4,
  172    s/6,
  173    sa/4,
  174    sim/2,
  175    sk/3,
  176    syntax/3,
  177    vgp/4.  178
  179:- endif.  180
  181		 /*******************************
  182		 *    WORDNET BASIC RELATIONS   *
  183		 *******************************/
 wn_ant(?Antonym1, ?Wnum1, ?Antonym2, ?WNum2) is nondet
The ant operator specifies antonymous word s. This is a lexical relation that holds for all syntactic categories. For each antonymous pair, both relations are listed (ie. each synset_id,w_num pair is both a source and target word.)
  192wn_ant(Antonym1, Wnum1, Antonym2, WNum2) :- ant(Antonym1, Wnum1, Antonym2, WNum2).
 wn_at(?Noun, ?Adjective) is nondet
The at operator defines the attribute relation between noun and adjective synset pairs in which the adjective is a value of the noun. For each pair, both relations are listed (ie. each synset_id is both a source and target).
  201wn_at(Noun, Adjective) :- at(Noun, Adjective).
 wn_cls(?SynSet, ?W1, ?Class, ?W2, ?ClassType) is nondet
The cls operator specifies that the first synset has been classified as a member of the class represented by the second synset. Either of the w_num's can be 0, reflecting that the pointer is semantic in the original WordNet database.
  210wn_cls(SynSet, W1, Class, W2, ClassType) :-
  211    cls(SynSet, W1, Class, W2, ClassType).
 wn_cs(?SynSet, ?Causes) is nondet
First kind of event is caused by second.

The cs operator specifies that the second synset is a cause of the first synset. This relation only holds for verbs.

  220wn_cs(SynSet, Causes) :-
  221    cs(SynSet, Causes).
 wn_der(?SynSet1, ?W1, ?SynSet2, ?W2) is nondet
The der operator specifies that there exists a reflexive lexical morphosemantic relation between the first and second synset terms representing derivational morphology.
  229wn_der(SynSet1, W1, SynSet2, W2) :-
  230    der(SynSet1, W1, SynSet2, W2).
 wn_ent(?SynSet, ?Entailment) is nondet
The ent operator specifies that the second synset is an entailment of first synset. This relation only holds for verbs.
  237wn_ent(SynSet, Entailment) :-
  238    ent(SynSet, Entailment).
 wn_fr(?Synset, ?Wnum, ?Fnum) is nondet
fr operator specifies a generic sentence frame for one or all words in a synset. The operator is defined only for verbs.
  245wn_fr(Synset, Wnum, Fnum) :-
  246    fr(Synset, Wnum, Fnum).
 wn_g(?SynSet, ?Gloss) is nondet
The g operator specifies the gloss for a synset.
  252wn_g(SynSet, Gloss) :-
  253    g(SynSet, Gloss).
 wn_hyp(?Hyponym, ?HyperNym) is nondet
The hyp operator specifies that the second synset is a hypernym of the first synset. This relation holds for nouns and verbs. The reflexive operator, hyponym, implies that the first synset is a hyponym of the second synset.
  262wn_hyp(Hyponym, HyperNym) :-
  263    hyp(Hyponym, HyperNym).
 wn_ins(?A, ?B) is nondet
The ins operator specifies that the first synset is an instance of the second synset. This relation holds for nouns. The reflexive operator, has_instance, implies that the second synset is an instance of the first synset.
  272wn_ins(A,B) :- ins(A,B).
 wn_mm(?SynSet, ?MemberMeronym) is nondet
The mm operator specifies that the second synset is a member meronym of the first synset. This relation only holds for nouns. The reflexive operator, member holonym, can be implied.
  280wn_mm(SynSet, MemberMeronym) :-
  281    mm(SynSet, MemberMeronym).
 wn_mp(?SynSet, ?PartMeronym) is nondet
The mp opeQrator specifies that the second synset is a part meronym of the first synset. This relation only holds for nouns. The reflexive operator, part holonym, can be implied.
  289wn_mp(SynSet, PartMeronym) :-
  290    ms(SynSet, PartMeronym).
 wn_ms(?SynSet, ?SubstanceMeronym) is nondet
The ms operator specifies that the second synset is a substance meronym of the first synset. This relation only holds for nouns. The reflexive operator, substance holonym, can be implied.
  298wn_ms(SynSet, SubstanceMeronym) :-
  299    ms(SynSet, SubstanceMeronym).
 wn_per(?Synset1, ?WNum1, ?Synset2, ?WNum2) is nondet
The per operator specifies two different relations based on the parts of speech involved. If the first word is in an adjective synset, that word pertains to either the noun or adjective second word. If the first word is in an adverb synset, that word is derived from the adjective second word.
  309wn_per(Synset1, WNum1, Synset2, WNum2) :-
  310    per(Synset1, WNum1, Synset2, WNum2).
 wn_ppl(?Synset1, ?WNum1, ?Synset2, ?WNum2) is nondet
ppl operator specifies that the adjective first word is a participle of the verb second word. The reflexive operator can be implied.
  317wn_ppl(Synset1, WNum1, Synset2, WNum2) :-
  318    ppl(Synset1, WNum1, Synset2, WNum2).
 wn_s(?SynSet, ?WNum, ?Word, ?SynSetType, ?Sense, ?Tag) is nondet
A s operator is present for every word sense in WordNet. In wn_s.pl, w_num specifies the word number for word in the synset.
  325wn_s(SynSet, WNum, Word, SynSetType, Sense, Tag) :-
  326    s(SynSet, WNum, Word, SynSetType, Sense, Tag).
 wn_sa(?Synset1, ?WNum1, ?Synset2, ?WNum2) is nondet
The sa operator specifies that additional information about the first word can be obtained by seeing the second word. This operator is only defined for verbs and adjectives. There is no reflexive relation (ie. it cannot be inferred that the additional information about the second word can be obtained from the first word).
  336wn_sa(Synset1, WNum1, Synset2, WNum2) :-
  337    sa(Synset1, WNum1, Synset2, WNum2).
 wn_sim(?SynSet, ?Similar) is nondet
The sim operator specifies that the second synset is similar in meaning to the first synset. This means that the second synset is a satellite the first synset, which is the cluster head. This relation only holds for adjective synsets contained in adjective clusters.
  346wn_sim(SynSet, Similar) :-
  347    sim(SynSet, Similar).
 wn_sk(?A, ?B, ?C) is nondet
A sk operator is present for every word sense in WordNet. This gives the WordNet sense key for each word sense.
  354wn_sk(A,B,C) :-
  355    sk(A,B,C).
 wn_syntax(?A, ?B, ?C) is nondet
The syntax operator specifies the syntactic marker for a given word sense if one is specified.
  362wn_syntax(A,B,C) :-
  363    syntax(A,B,C).
 wn_vgp(?Verb, ?W1, ?Similar, ?W2) is nondet
vgp operator specifies verb synsets that are similar in meaning and should be grouped together when displayed in response to a grouped synset search.
  371wn_vgp(Verb, W1, Similar, W2) :-
  372    vgp(Verb, W1, Similar, W2).
  373
  374
  375		 /*******************************
  376		 *	   CODE MAPPINGS	*
  377		 *******************************/
 wn_cat(+SynSet, -SyntacticCategory, -Offset) is det
Break the synset id into its syntactic category and offset as defined in the manpage prologdb.5
  384wn_cat(SynSet, Category, Small) :-
  385	Small is SynSet mod 100000000,
  386	CatNum is SynSet // 100000000,
  387	wn_cat(CatNum, Category).
  388
  389wn_cat(1, noun).
  390wn_cat(2, verb).
  391wn_cat(3, adjective).
  392wn_cat(4, adverb).
 ss_type(+Code, -Type) is det
ss_type(-Code, -Type) is nondet
Mapping between readable syntactic category and code.
  399ss_type(n, noun).
  400ss_type(v, verb).
  401ss_type(a, adjective).
  402ss_type(s, adjective_satellite).
  403ss_type(r, adverb).
 load_wordnet is det
Load all of wordnet. This must be used to create all .QLF files or before creating a stand-alone saved state
  411load_wordnet :-
  412	(   wn_op(O),
  413	    functor(O, Name, _),
  414	    load_op(Name),
  415	    fail
  416	;   true
  417	).
  418
  419load_op(Name) :-
  420	atom_concat('wn_', Name, File),
  421        load_files(wndb(File),
  422                   [ qcompile(auto)
  423                   ]).
  424
  425
  426		 /*******************************
  427		 *     JUST IN TIME LOADING	*
  428		 *******************************/
  429
  430:- multifile user:exception/3.  431
  432user:exception(undefined_predicate, wordnet:Name/Arity, retry) :-
  433	functor(Op, Name, Arity),
  434	wn_op(Op),
  435	load_op(Name).
  436
  437
  438		 /*******************************
  439		 *            MESSAGES		*
  440		 *******************************/
  441
  442:- multifile prolog:message//1.  443
  444prolog:message(wordnet(nodb)) -->
  445    [ 'Cannot find WordNet data files.  Please set the environment'-[], nl,
  446      'variable WNDB to point at the directory holding the WordNet files'-[]
  447    ]