1/*  Author:        Jan Wielemaker
    2    E-mail:        J.Wielemaker@vu.nl
    3    WWW:           http://www.swi-prolog.org
    4    Copyright (c)  2017, VU University Amsterdam
    5    All rights reserved.
    6
    7    Redistribution and use in source and binary forms, with or without
    8    modification, are permitted provided that the following conditions
    9    are met:
   10
   11    1. Redistributions of source code must retain the above copyright
   12       notice, this list of conditions and the following disclaimer.
   13
   14    2. Redistributions in binary form must reproduce the above copyright
   15       notice, this list of conditions and the following disclaimer in
   16       the documentation and/or other materials provided with the
   17       distribution.
   18
   19    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   20    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   21    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   22    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   23    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   24    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   25    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   26    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   27    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   29    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30    POSSIBILITY OF SUCH DAMAGE.
   31*/
   32
   33:- module(wordnet,
   34	  [ wn_s/6,			% basic Wordnet relations
   35	    wn_g/2,
   36	    wn_hyp/2,
   37	    wn_ins/2,
   38	    wn_ent/2,
   39	    wn_sim/2,
   40	    wn_mm/2,
   41	    wn_ms/2,
   42	    wn_mp/2,
   43	    wn_der/4,
   44	    wn_cls/5,
   45	    wn_cs/2,
   46	    wn_vgp/4,
   47	    wn_at/2,
   48	    wn_ant/4,
   49	    wn_sa/4,
   50	    wn_sk/3,
   51	    wn_syntax/3,
   52	    wn_ppl/4,
   53	    wn_per/4,
   54	    wn_fr/3,
   55
   56	    wn_cat/3,			% +SynSet, -SyntacticCategory, -Offset
   57	    ss_type/2,			% +Code, -Type
   58
   59	    load_wordnet/0		% force loading everything
   60	  ]).

Wordnet lexical and semantic database

This module discloses the Wordnet Prolog files is a more SWI-Prolog friendly manner. It exploits SWI-Prolog demand-loading and SWI-Prolog Quick Load Files to load `just-in-time' and as quickly as possible.

The system creates Quick Load Files for each wordnet file needed if the .qlf file doesn't exist and the wordnet directory is writeable. For shared installations it is adviced to run load_wordnet/0 as user with sufficient privileges to create the Quick Load Files.

This library defines a portray/1 rule to explain synset ids.

Some more remarks:

author
- Originally by Jan Wielemaker. Partly documented by an unknown author. Current commens copied from prologdb.5WN.html file from the sources.
See also
- Wordnet is a lexical database for the English language. See http://www.cogsci.princeton.edu/~wn/ */
   95		 /*******************************
   96		 *          FIND WORDNET	*
   97		 *******************************/
   98
   99:- multifile user:file_search_path/2.  100
  101user:file_search_path(wndb, WNDB) :-
  102    (   getenv('WNDB', WNDB)
  103    ->  true
  104    ;   current_prolog_flag(windows, true)
  105    ->  WNDB = 'C:\\Program Files\\WordNet\\3.0'
  106%    ;   WNDB = '/usr/local/WordNet-3.0'
  107    ;   WNDB = '/mnt/BPL/WordNet-3.0/prolog'
  108    ).
  109
  110haswndb :-
  111    absolute_file_name(wndb(wn_s), _,
  112                       [ file_type(prolog),
  113                         access(read),
  114                         file_errors(fail)
  115                       ]).
  116checkwndb :-
  117    haswndb,
  118    !.
  119checkwndb :-
  120    print_message(error, wordnet(nodb)).
  121
  122:- initialization
  123    checkwndb.
 wn_op(PredSpec) is nondet
Definition of wordnet operator types.
  130wn_op(ant(synset_id, w_num, synset_id, w_num)).
  131wn_op(at(synset_id, synset_id)).
  132wn_op(cls(synset_id, w_num, synset_id, wn_num, class_type)).
  133wn_op(cs(synset_id, synset_id)).
  134wn_op(der(synset_id, w_num, synset_id, wn_num)).
  135wn_op(ent(synset_id, synset_id)).
  136wn_op(fr(synset_id, w_num, f_num)).
  137wn_op(g(synset_id, '(gloss)')).
  138wn_op(hyp(synset_id, synset_id)).
  139wn_op(ins(synset_id, synset_id)).
  140wn_op(mm(synset_id, synset_id)).
  141wn_op(mp(synset_id, synset_id)).
  142wn_op(ms(synset_id, synset_id)).
  143wn_op(per(synset_id, w_num, synset_id, w_num)).
  144wn_op(ppl(synset_id, w_num, synset_id, w_num)).
  145wn_op(s(synset_id, w_num, 'word', ss_type, sense_number, tag_count)).
  146wn_op(sa(synset_id, w_num, synset_id, w_num)).
  147wn_op(sim(synset_id, synset_id)).
  148wn_op(sk(synset_id, w_num, sense_key)).
  149wn_op(syntax(synset_id, w_num, syntax)).
  150wn_op(vgp(synset_id, w_num, synset_id, w_num)).
  151
  152
  153		 /*******************************
  154		 *    WORDNET BASIC RELATIONS   *
  155		 *******************************/
 wn_ant(?Antonym1, ?Wnum1, ?Antonym2, ?WNum2) is nondet
The ant operator specifies antonymous word s. This is a lexical relation that holds for all syntactic categories. For each antonymous pair, both relations are listed (ie. each synset_id,w_num pair is both a source and target word.)
  164wn_ant(Antonym1, Wnum1, Antonym2, WNum2) :- ant(Antonym1, Wnum1, Antonym2, WNum2).
 wn_at(?Noun, ?Adjective) is nondet
The at operator defines the attribute relation between noun and adjective synset pairs in which the adjective is a value of the noun. For each pair, both relations are listed (ie. each synset_id is both a source and target).
  173wn_at(Noun, Adjective) :- at(Noun, Adjective).
 wn_cls(?SynSet, ?W1, ?Class, ?W2, ?ClassType) is nondet
The cls operator specifies that the first synset has been classified as a member of the class represented by the second synset. Either of the w_num's can be 0, reflecting that the pointer is semantic in the original WordNet database.
  182wn_cls(SynSet, W1, Class, W2, ClassType) :-
  183    cls(SynSet, W1, Class, W2, ClassType).
 wn_cs(?SynSet, ?Causes) is nondet
First kind of event is caused by second.

The cs operator specifies that the second synset is a cause of the first synset. This relation only holds for verbs.

  192wn_cs(SynSet, Causes) :-
  193    cs(SynSet, Causes).
 wn_der(?SynSet1, ?W1, ?SynSet2, ?W2) is nondet
The der operator specifies that there exists a reflexive lexical morphosemantic relation between the first and second synset terms representing derivational morphology.
  201wn_der(SynSet1, W1, SynSet2, W2) :-
  202    der(SynSet1, W1, SynSet2, W2).
 wn_ent(?SynSet, ?Entailment) is nondet
The ent operator specifies that the second synset is an entailment of first synset. This relation only holds for verbs.
  209wn_ent(SynSet, Entailment) :-
  210    ent(SynSet, Entailment).
 wn_fr(?Synset, ?Wnum, ?Fnum) is nondet
fr operator specifies a generic sentence frame for one or all words in a synset. The operator is defined only for verbs.
  217wn_fr(Synset, Wnum, Fnum) :-
  218    fr(Synset, Wnum, Fnum).
 wn_g(?SynSet, ?Gloss) is nondet
The g operator specifies the gloss for a synset.
  224wn_g(SynSet, Gloss) :-
  225    g(SynSet, Gloss).
 wn_hyp(?Hyponym, ?HyperNym) is nondet
The hyp operator specifies that the second synset is a hypernym of the first synset. This relation holds for nouns and verbs. The reflexive operator, hyponym, implies that the first synset is a hyponym of the second synset.
  234wn_hyp(Hyponym, HyperNym) :-
  235    hyp(Hyponym, HyperNym).
 wn_ins(?A, ?B) is nondet
The ins operator specifies that the first synset is an instance of the second synset. This relation holds for nouns. The reflexive operator, has_instance, implies that the second synset is an instance of the first synset.
  244wn_ins(A,B) :- ins(A,B).
 wn_mm(?SynSet, ?MemberMeronym) is nondet
The mm operator specifies that the second synset is a member meronym of the first synset. This relation only holds for nouns. The reflexive operator, member holonym, can be implied.
  252wn_mm(SynSet, MemberMeronym) :-
  253    mm(SynSet, MemberMeronym).
 wn_mp(?SynSet, ?PartMeronym) is nondet
The mp opeQrator specifies that the second synset is a part meronym of the first synset. This relation only holds for nouns. The reflexive operator, part holonym, can be implied.
  261wn_mp(SynSet, PartMeronym) :-
  262    ms(SynSet, PartMeronym).
 wn_ms(?SynSet, ?SubstanceMeronym) is nondet
The ms operator specifies that the second synset is a substance meronym of the first synset. This relation only holds for nouns. The reflexive operator, substance holonym, can be implied.
  270wn_ms(SynSet, SubstanceMeronym) :-
  271    ms(SynSet, SubstanceMeronym).
 wn_per(?Synset1, ?WNum1, ?Synset2, ?WNum2) is nondet
The per operator specifies two different relations based on the parts of speech involved. If the first word is in an adjective synset, that word pertains to either the noun or adjective second word. If the first word is in an adverb synset, that word is derived from the adjective second word.
  281wn_per(Synset1, WNum1, Synset2, WNum2) :-
  282    per(Synset1, WNum1, Synset2, WNum2).
 wn_ppl(?Synset1, ?WNum1, ?Synset2, ?WNum2) is nondet
ppl operator specifies that the adjective first word is a participle of the verb second word. The reflexive operator can be implied.
  289wn_ppl(Synset1, WNum1, Synset2, WNum2) :-
  290    ppl(Synset1, WNum1, Synset2, WNum2).
 wn_s(?SynSet, ?WNum, ?Word, ?SynSetType, ?Sense, ?Tag) is nondet
A s operator is present for every word sense in WordNet. In wn_s.pl, w_num specifies the word number for word in the synset.
  297wn_s(SynSet, WNum, Word, SynSetType, Sense, Tag) :-
  298    s(SynSet, WNum, Word, SynSetType, Sense, Tag).
 wn_sa(?Synset1, ?WNum1, ?Synset2, ?WNum2) is nondet
The sa operator specifies that additional information about the first word can be obtained by seeing the second word. This operator is only defined for verbs and adjectives. There is no reflexive relation (ie. it cannot be inferred that the additional information about the second word can be obtained from the first word).
  308wn_sa(Synset1, WNum1, Synset2, WNum2) :-
  309    sa(Synset1, WNum1, Synset2, WNum2).
 wn_sim(?SynSet, ?Similar) is nondet
The sim operator specifies that the second synset is similar in meaning to the first synset. This means that the second synset is a satellite the first synset, which is the cluster head. This relation only holds for adjective synsets contained in adjective clusters.
  318wn_sim(SynSet, Similar) :-
  319    sim(SynSet, Similar).
 wn_sk(?A, ?B, ?C) is nondet
A sk operator is present for every word sense in WordNet. This gives the WordNet sense key for each word sense.
  326wn_sk(A,B,C) :-
  327    sk(A,B,C).
 wn_syntax(?A, ?B, ?C) is nondet
The syntax operator specifies the syntactic marker for a given word sense if one is specified.
  334wn_syntax(A,B,C) :-
  335    syntax(A,B,C).
 wn_vgp(?Verb, ?W1, ?Similar, ?W2) is nondet
vgp operator specifies verb synsets that are similar in meaning and should be grouped together when displayed in response to a grouped synset search.
  343wn_vgp(Verb, W1, Similar, W2) :-
  344    vgp(Verb, W1, Similar, W2).
  345
  346
  347		 /*******************************
  348		 *	   CODE MAPPINGS	*
  349		 *******************************/
 wn_cat(+SynSet, -SyntacticCategory, -Offset) is det
Break the synset id into its syntactic category and offset as defined in the manpage prologdb.5
  356wn_cat(SynSet, Category, Small) :-
  357	Small is SynSet mod 100000000,
  358	CatNum is SynSet // 100000000,
  359	wn_cat(CatNum, Category).
  360
  361wn_cat(1, noun).
  362wn_cat(2, verb).
  363wn_cat(3, adjective).
  364wn_cat(4, adverb).
 ss_type(+Code, -Type) is det
ss_type(-Code, -Type) is nondet
Mapping between readable syntactic category and code.
  371ss_type(n, noun).
  372ss_type(v, verb).
  373ss_type(a, adjective).
  374ss_type(s, adjective_satellite).
  375ss_type(r, adverb).
 load_wordnet is det
Load all of wordnet. This must be used to create all .QLF files or before creating a stand-alone saved state
  383load_wordnet :-
  384	(   wn_op(O),
  385	    functor(O, Name, _),
  386	    load_op(Name),
  387	    fail
  388	;   true
  389	).
  390
  391load_op(Name) :-
  392	atom_concat('wn_', Name, File),
  393	absolute_file_name(wndb(File),
  394			   [ access(read),
  395			     file_type(prolog)
  396			   ],
  397			   PlFile),
  398	file_name_extension(Base, _Ext, PlFile),
  399	file_name_extension(Base, qlf, QlfFile),
  400	(   exists_file(QlfFile),
  401	    time_file(QlfFile, QlfTime),
  402	    time_file(PlFile, PlTime),
  403	    QlfTime >= PlTime
  404	->  load_files(QlfFile)
  405	;   access_file(QlfFile, write)
  406	->  qcompile(PlFile)
  407	;   load_files(PlFile)
  408	).
  409
  410
  411		 /*******************************
  412		 *     JUST IN TIME LOADING	*
  413		 *******************************/
  414
  415:- multifile user:exception/3.  416
  417user:exception(undefined_predicate, wordnet:Name/Arity, retry) :-
  418	functor(Op, Name, Arity),
  419	wn_op(Op),
  420	load_op(Name).
  421
  422
  423		 /*******************************
  424		 *            MESSAGES		*
  425		 *******************************/
  426
  427:- multifile prolog:message//1.  428
  429prolog:message(wordnet(nodb)) -->
  430    [ 'Cannot find WordNet data files.  Please set the environment'-[], nl,
  431      'variable WNDB to point at the directory holding the WordNet files'-[]
  432    ]