1/*   rfc5322
    2     Author: Gimenez, Christian.
    3
    4     Copyright (C) 2016 Gimenez, Christian
    5
    6     This program is free software: you can redistribute it and/or modify
    7     it under the terms of the GNU General Public License as published by
    8     the Free Software Foundation, either version 3 of the License, or
    9     at your option) any later version.
   10
   11     This program is distributed in the hope that it will be useful,
   12     but WITHOUT ANY WARRANTY; without even the implied warranty of
   13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   14     GNU General Public License for more details.
   15
   16     You should have received a copy of the GNU General Public License
   17     along with this program.  If not, see <http://www.gnu.org/licenses/>.
   18
   19     09 nov 2016
   20*/
   21
   22
   23:- module(imf, [
   24	      message//1, message_str//1,
   25	      header//1, header_str//1,
   26	      header_field//2,
   27	      field_body//1,
   28	      body//1,
   29	      folded_string//1,
   30	      imf_parse_file/2,
   31	      date_time//1
   32	  ]).

rfc5322: Internet Message Format

This modules gives DCGs for using with the Internet Message Format(IMF) specified on the RFC 5322.

Unsupported

Line characters 998/78 limits specified on section 2.1.1 are not supported in this DCG implementation.

Section 4 corresponding for the obsolete syntax is currently unsupported.

MIME support is not provided, instead you can use mime_pack/3 from library(http/mimepack) (HTTP-support package) for creating a MIME message and mime_parse/2 from the library(mime) (C-library package).

Common Characters

For characters like WSP, CR, LF, etc. they are defined at Appendix B.1 at the RFC5234 ABNF specification.

author
- Gimenez, Christian
license
- GPLv3 */
   60:- license(gplv3).   61
   62:- ensure_loaded(library(dcg/basics)).
 crlf//
Carriage return and Line feed.

Support for only LF is given. Some mails files use only LF.

See also
- IMF Section 2.1
- ABNF Appendix B.1 */
   77crlf --> [10],!.
   78crlf --> [13,10].
 wsp//
White space characters.
See also
- IMF Section 2.2
- ABNF Appendix B.1 */
   86wsp --> [32],!.
   87wsp --> [9].
 wsps//
Cero or more white spaces characters. */
   93wsps --> wsp,!, wsps.
   94wsps --> [].
 not_wsp//
A non white space character */
  100not_wsp --> [A], { \+ wsp(A, []) }.
 message(?Message)//
A message format.
See also
- IMF Section 2 */
  112message(message(header(H),body(B))) --> header(H), crlf, body(B),!.
  113message(message(header(H),body(``))) --> header(H), crlf.
 message_str(?Message)//
Same as message//1 but returning Strings instead of codes. */
  120message_str(message(header(H),body(B))) --> header_str(H), crlf, body(BC),!,
  121					{string_codes(B, BC)}.
  122message_str(message(header(H),body(""))) --> header_str(H), crlf.
 header(?LstHeaders)//
All the headers fields.
Arguments:
LstHeaders- A list of field/2 terms, like field(FieldName, FieldBody). */
  132header([field(FN,FB)]) --> header_field(FN,FB).
  133header([field(FN,FB)|R]) --> header_field(FN, FB), header(R).
 header_str(?LstHeaders)//
Same as header//1 but returning Strings instead of codes. */
  140header_str([field(FN,FB)]) --> header_field(FNC,FBC),
  141			       {string_codes(FN, FNC), string_codes(FB, FBC)}.
  142header_str([field(FN,FB)|R]) --> header_field(FNC, FBC), header_str(R),
  143				 {string_codes(FN, FNC), string_codes(FB, FBC)}.
 header_field(?FieldName, ?FieldBody)//
See also
- IMF Section 2.2 */
  150header_field(FieldName, FieldBody) --> string_without(`:`, FieldName), `:`, field_body(FieldBody), crlf.
 field_body(?Body)//
See also
- IMF Section 3
- IMF Section 4 */
  157field_body(Body) --> wsps,!, folded_string(Body).
  158field_body(Body) --> folded_string(Body).
 body(?Str)//
The body of the IMF. No MIME processing!
See also
- IMF Section 2.3 */
  166body(Str) --> string(Str), eos.
 fws//
A Folded White Space used for folding strings.
See also
- IMF Section 3.2.2 */
  178fws --> wsps, crlf, !, wsp, wsps.
  179fws --> wsp, wsps.
 cfws//
See also
- fws//
- IMF Section 3.2.2 */
  188cfws --> fws.
 folded_string(?Str)//
A folded string is a string wich can be writed into various lines. For example:
Folding text
  like this
  and this

Each CRLF is translated into space:

Folding text like this and this
See also
- IMF Section 2.2.3
- IMF Section 3.2.2 */
  213folded_string(Str) --> string_without([13,10], Str1), fws,!, % red cut
  214						      folded_string(Str2),
  215						      {append(Str1, ` `, Str3),
  216						       append(Str3, Str2, Str)}.
  217folded_string(Str) --> string_without([13,10], Str).
 imf_parse_file(+Path:atom, -Message:pred)
Read the file and parse it returning a Prolog predicate with all the information.

Message Output

Example of message/2:

message(
    header([
        field("From", "from@fromhost.com"),
        field("To", "to@tohost.com"),
        field("Subject","Subject part"),
        field("Date","Wed,  9 Dec 2015 17:27:50 -0300 (ART)")]),
    body("Hi world"))
Arguments:
Path- The file path.
Message- A message/2 predicate with the mail header/1 and body/1 information. */
  241imf_parse_file(Path, Message) :-
  242    read_file_to_codes(Path, Codes, []),
  243    message_str(Message, Codes, []).
 date_time(-Date:pred)//
Return a date predicate from a string header's value. Useful for the Date header.

The syntax is explained in section 3.3 of the IMF standard.

See also
- IMF Section 3.3 */
  258date_time(date(Year, Month, Day, Hour, Min, Sec, Tz, -, -)) -->
  259    (day_of_week, ",",! | []),
  260    date(date(Year, Month, Day)),
  261    time(time(Hour, Min, Sec, Tz)),
  262    (cfws,! | []).
  263
  264day_of_week -->  (fws,! | []), day_name.
  267day_name --> "Mon", !.
  268day_name --> "Tue", !.
  269day_name --> "Wed", !.
  270day_name --> "Thu", !.
  271day_name --> "Fri", !.
  272day_name --> "Sat", !.
  273day_name --> "Sun", !.
  274
  275date(date(Year, Month, Day)) --> day(Day), month(Month), year(Year).
  276
  277day(Day) --> (fws,! | []), digits(CDay), fws,
  278	     {number_codes(Day, CDay)}.
  281month(1) --> "Jan", !.
  282month(2) --> "Feb", !.
  283month(3) --> "Mar", !.
  284month(4) --> "Apr", !.
  285month(5) --> "May", !.
  286month(6) --> "Jun", !.
  287month(7) --> "Jul", !.
  288month(8) --> "Aug", !.
  289month(9) --> "Sep", !.
  290month(10) --> "Oct", !.
  291month(11) --> "Nov", !.
  292month(12) --> "Dec", !.
  293
  294year(Year) --> fws, digits(CYear), fws,
  295	       {number_codes(Year, CYear)}.
  298time(time(Hour, Min, Secs, Tz)) -->
  299    time_of_day(Hour, Min, Secs), zone(Tz).
  300
  301time_of_day(H, M, S) --> hour(H), ":", minute(M), ":", second(S), !.
  302time_of_day(H, M, 0) --> hour(H), ":", minute(M).
  303
  304hour(H) --> digits(CH),
  305	    {number_codes(H, CH)}.
  308minute(M) --> digits(CM),
  309	      {number_codes(M, CM)}.
  312second(S) --> digits(CS),
  313	      {number_codes(S, CS)}.
 zone(-Tz)//
Parse the timezone number and convert into a number: the amount of seconds to the west of Greenwich.

The Tz parameter is defined acording to the stamp_date_time/3 predicate and date/9 structure.

Arguments:
Tz- Timezone expressed in seconds west of Greenwich.
See also
- stamp_date_time/3 */
  329zone(Tz) --> fws, "-", !, digits(T),
  330	     {number_codes(NumT, T),
  331	      Tz is (NumT / 100) * 3600}.
  332zone(Tz) --> fws, "+", !, digits(T),
  333	     {number_codes(NumT, T),
  334	      Tz is (NumT / 100) * -3600}.