1/*   bibtex_dcg
    2     Author: cnngimenez.
    3
    4     Copyright (C) 2020 cnngimenez
    5
    6     This program is free software: you can redistribute it and/or modify
    7     it under the terms of the GNU General Public License as published by
    8     the Free Software Foundation, either version 3 of the License, or
    9     at your option) any later version.
   10
   11     This program is distributed in the hope that it will be useful,
   12     but WITHOUT ANY WARRANTY; without even the implied warranty of
   13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   14     GNU General Public License for more details.
   15
   16     You should have received a copy of the GNU General Public License
   17     along with this program.  If not, see <http://www.gnu.org/licenses/>.
   18
   19     06 Jun 2020
   20*/
   21
   22
   23:- module(bibtex_dcg, [
   24              fields//1, field//2,
   25	      entry//1,
   26              author//2, authors//1,
   27              k_sep/1,
   28              keyword_spaces//1,
   29              keyword_sep//1
   30	  ]).

bibtex_dcg: BibTeX DCG rules.

DCG rules that can parse BibTeX elements.

In this text, a pseudo-EBNF syntax is used to explain some structures.

BibTeX entry format and token names

The following illustrates a BibTex entry and its format. The name of each element is used as a DCG rule's predicate name.

"@" name "{" label ","
    field ","
    field ","
    ...
"}"

Each field's format is as follows:

key "= {" value "},"

Important fields

Fields like author and keywords have their own format. DCG rules for them are included here. However, consult the bibtex_fields library for more information.

Author value

The author field format is as follows:

`author "and" author "and" author "and" ...`

There are two possible formats for each author's name:

Keywords

Keywords are phrases written in the article. There are other fields that use the same syntaxs for user-defined keywords.

The format is the same:

`a_phrase "," a_phrase "," ...`

The keywords separators supported are "," and ";". They can be mixed.

Important terms

   87:- license(gplv3).   88
   89:- use_module(library(dcg/basics)).
 entry(-Entry:pred)//
Return an entry/3 term corresponding with the codes parsed.
Arguments:
Entry- entry(Name:atom, Label:string, Lst: list).
See also
- field//2 */
   99entry(entry(AEntryName, SLabel, Lst)) -->
  100    blanks, "@", string_without("{",EntryName), blanks, "{",
  101	       string_without(",", Label), blanks, ",",
  102	       fields(Lst), blanks,
  103	       "}",
  104	       {atom_codes(AEntryName, EntryName),
  105		string_codes(SLabel, Label)}.
 field(-Key:codes, -Value:codes)// is det
Return the Key = Value field from a bibtex field. */
  112field(Key, Value) --> string_without(" =", Key),
  113		      whites, "=", whites,
  114		      value(Value).
  115
  116
  117inside_value(Value) --> "{", !, inside_value(Val1), "}", inside_value(Val2),
  118                      { append([`{`, Val1, `}`, Val2], Value) }.
  119
  120inside_value(Value) --> [C], { C \= 0'} }, !, inside_value(R),
  121                           { append([[C], R], Value) }.
  122inside_value([]) --> [].
 value(-Value:codes)//
Parse and retrieve the field's value. The value is returned without any parsing. Consider using a function from bibtex_fields library.
Arguments:
Value- the value of the field.
See also
- field//2 */
  133value(Value) --> "{", !, inside_value(Value), "}". 
  136value(Value) --> string_without(" ,}\n\t", Value), !.
 fields(-Lst:list)// is det
Return a list of field/2 terms parsed from a bibtex fields string. Each element format is field(+Key: atom, +Value: string). */
  145fields(Lst) --> blanks, field(Key, Value), blanks, ",",!, blanks,
  146		fields(LstRest),
  147		{atom_codes(AKey, Key), string_codes(SValue, Value),
  148		    append([field(AKey, SValue)], LstRest, Lst)}.
  149fields([field(AKey, SValue)]) --> blanks, field(Key, Value), blanks,
  150				  {atom_codes(AKey, Key), string_codes(SValue, Value)}.
 authorname(Name)//
Is true if and is not part of the Name.

Dividing like this will make a more recent backtrack instead backtracking a whole predicate like author//2. */

  164authorname(Name), " and" --> string(Name), blank, blanks, "and",
  165			     {\+ append([_, ` and`, _], Name)},!.
  166authorname(Name) --> string(Name), eos,
  167		     {\+ append([_, ` and`, _], Name)},!.
 author_names(-Names:codes, -Surname:codes)//
Split the string into names and surname. It is considered that the last word is the surname.

For example: J. R. Tolkien is spitted into J. R. as Name and Tolkien as Surname.

Arguments:
Names- A code with all the names.
Surname- A code with the surname. */
  182author_names([], Surname), ` and` --> string_without(" ", Surname),
  183                                    blank, blanks, `and`, !.
  184author_names([], Surname) --> string_without(" ", Surname),
  185                            blanks, eos, !.
  186author_names(Name, Surname) --> string_without(" ", Name1), white, whites,
  187                              author_names(Rest, Surname),
  188                              {
  190                                  Rest \= [],
  191                                  append([Name1, ` `, Rest], Name) ;
  192
  193                                  Name1 = Name
  194                              }
  194.
  195
 author(-Surname:list, -Name:list)//
Two types of author field's value:
  206author(Surname, Names) --> string_without(",", Surname), ",", !,
  207                        whites, authorname(Names).
  208author(Surname, Names), ` and` --> author_names(Names, Surname), blank, blanks, `and`, !.
  209author(Surname, Names) --> author_names(Names, Surname), blanks, eos, !.
 authors(-Lst:list)//
Process a list of authors (i.e. author field value). Use it with a value string only.
Arguments:
Lst- is a list of author/2 terms: author(Surname:string, Name:string). */
  219authors(Lst) --> author(Surname, Name), blank, blanks, "and", !, blank, blanks, authors(LstRest),
  220		 {string_codes(SurnameS, Surname), string_codes(NameS, Name),
  221		  append([author(SurnameS, NameS)], LstRest, Lst)}.
  222authors([author(SurnameS, NameS)]) --> author(Surname, Name),
  223				       {string_codes(SurnameS, Surname),
  224					string_codes(NameS, Name)}.
 k_sep(?Char:char)
True iff char is considered as separators. */
  235k_sep(0',).
  236k_sep(0';).
 keyword_separator//
Parse any separator declared in k_sep/1. */
  243keyword_separator --> [C], {k_sep(C)}. 
 keyword_sep(-Keywords:list)//
True iff Keywords is a list of keywords parsed from the input. The separator should be one of k_sep/1.
Arguments:
Keywords- a list of strings. */
See also
- keyword_spaces//1
  254keyword_sep(Keywords) -->
  255    whites, string_without(";,", S), whites, keyword_separator,!,
  256    keyword_sep(Rest),
  257    {string_codes(SS, S),
  258     append([SS], Rest, Keywords)}.
  259keyword_sep([SS]) --> whites, string_without(";,", S), whites, eos,
  260		     {string_codes(SS, S)}.
 keyword_spaces(-Keywords:list)//
Retrieve all the keywords using spaces as separator.
Arguments:
Keywords- is a list of string codes. */
  269keyword_spaces(Keywords) -->
  270    whites, string_without(" \t", S), white, !, keyword_spaces(Rest),
  271    {string_codes(SS, S),
  272     append([SS], Rest, Keywords)}.
  273keyword_spaces([SS]) -->
  274    whites, string_without(" \t", S), whites, eos,
  275    {string_codes(SS, S)}