View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        jan@swi-prolog.org
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2020, SWI-Prolog Solutions b.v
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(strings,
   36          [ dedent_lines/3,             % +In,-Out,+Options
   37            indent_lines/3,             % +Prefix,+In,-Out
   38            indent_lines/4,             % :Pred,+Prefix,+In,-Out
   39            interpolate_string/4,       % +In,-Out,+Map,+Options
   40            string_lines/2,             % ?In,?Lines
   41            string/4                    % Quasi quotation support
   42          ]).   43:- autoload(library(apply), [include/3, foldl/4, maplist/3, maplist/2]).   44:- autoload(library(error), [existence_error/2, must_be/2]).   45:- autoload(library(lists), [member/2, append/3]).   46:- autoload(library(option), [option/3]).   47:- autoload(library(quasi_quotations),
   48            [quasi_quotation_syntax/1, with_quasi_quotation_input/3]).   49:- autoload(library(dcg/basics),
   50            [string/3, prolog_var_name/3, string_without/4, eos//0]).   51
   52:- meta_predicate
   53    interpolate_string(:, -, +, +),
   54    indent_lines(1, +, +, -).   55
   56:- quasi_quotation_syntax(string).   57
   58/** <module> String utilities
   59
   60This module provides string handling   utilities,  currently notably for
   61dealing  with  multi-line  strings  and   _interpolation_.  The  library
   62provides a couple of primitives  as   well  definitions for the `string`
   63_quasi quotation_ syntax. The latter allows for constructing both single
   64line and multi-line long strings based  on template interpolation. Below
   65is a simple example using the quasi quotation syntax.
   66
   67
   68```
   69test(To) :-
   70    write({|string(To)||
   71           | Dear {To},
   72           |
   73           | I'm happy to announce a string interpolation quasi quoter.
   74           |}.
   75```
   76
   77__Warning__
   78
   79The general purpose string  interpolation   implemented  by this library
   80should __not__ be used to create strings   for a formal language such as
   81HTML, JavaScript, SQL, etc.  because  the   result  will  be  subject to
   82__injection attacks__, providing a serious   __security risc__. The core
   83idea of quasi quotation  is  to  know   about  the  target  language and
   84interpolate Prolog data into the template  __while respecting the syntax
   85of the target language__, notable to   __escape certain characters where
   86needed__. See also library(http/html_write)   and library(http/js_write)
   87which define quasi quotation rules for HTML and JavaScript.
   88
   89@see format/3 can format to a string as well.  The library(lynx/format)
   90provides primitive to wrap long strings.
   91@see The core system provides many additional string processing
   92predicates.
   93@tbd There are probably many other high level string predicates that
   94belong in this library. For example, predicates similar to the
   95functions in https://docs.python.org/3/library/textwrap.html
   96*/
   97
   98%!  string(+Content, +Args, +Binding, -DOM)
   99%
  100%   Implements  the  quasi  quotation  syntax  `string`.  If  the  first
  101%   character of the content is a  newline   (i.e.,  there  is a newline
  102%   _immediately_   after   the   ``||``   token)    this   first   uses
  103%   dedent_lines/3 to the remove common  white   space  prefix from the
  104%   lines. This is called with  the   option  chars("\s\t|"), i.e., also
  105%   removing ``|`` characters and tab(8).
  106%
  107%   If the quasi quotation syntax  carries arguments (e.g., string(To)),
  108%   the string is compiled into a function   that produces the result of
  109%   interpolating the arguments into the template. See user functions on
  110%   dict objects. If there are no arguments,   the  result is simply the
  111%   final string.
  112%
  113%   @see interpolate_string/4 for the interpolation syntax.
  114%   @see Section for examples and discussion.
  115%   @tbd Specify tab width and allow for {@Goal} templates.
  116
  117string(Content, Args, Binding, DOM) :-
  118    must_be(list, Binding),
  119    include(qq_var(Args), Binding, QQDict),
  120    with_quasi_quotation_input(Content, Stream,
  121                               read_string(Stream, _, String)),
  122    (   string_concat("\n", String1, String)
  123    ->  dedent_lines(String1, String2, [tab(8), chars("\s\t|")])
  124    ;   String2 = String
  125    ),
  126    (   prolog_load_context(module, Module)
  127    ->  true
  128    ;   Module = user                   % typein?
  129    ),
  130    (   Args == []
  131    ->  DOM = String2
  132    ;   comp_interpolate(String2, Compiled, QQDict, [module(Module)]),
  133        DOM =.. ['.',strings{type:string},exec(Compiled, QQDict)]
  134    ).
  135
  136qq_var(Vars, _=Var) :- member(V, Vars), V == Var, !.
  137
  138_Dict.exec(Compiled, Map) := String :-
  139    exec_interpolate(Compiled, String, Map).
  140
  141%!  interpolate_string(:In, -Out, +Map, +Options)
  142%
  143%   Establish a string from a template  by replacing patterns. Supported
  144%   patterns are:
  145%
  146%     - {Name}
  147%       If Map contains `Name=Value`, insert `Value` using write/1.
  148%       If `Name` does not appear in Map, raise an existence error.
  149%       `Name` must satisfy the rules for a Prolog variable.
  150%     - {Name,Default}
  151%       As above, but if `Name` does not appear in Map, use `Value`
  152%     - {@Goal}
  153%       Insert the output (to `current_output`) of `Goal` here.
  154%       For safety reasons only accepted if Options contains
  155%       `goals(true)`
  156
  157
  158interpolate_string(Module:In, Out, Map, Options) :-
  159    comp_interpolate(In, Compiled, Map, [module(Module)|Options]),
  160    exec_interpolate(Compiled, Out, Map).
  161
  162comp_interpolate(In, Compiled, Map, Options) :-
  163    string_codes(In, Codes),
  164    phrase(interpolate(Compiled, [], Map, Options), Codes).
  165
  166interpolate([PreS,Action|T0], T, Map, Options) -->
  167    string(Pre),
  168    "{", interpolate_pattern(Action, Options), "}",
  169    !,
  170    { string_codes(PreS, Pre) },
  171    interpolate(T0, T, Map, Options).
  172interpolate(T0, T, _Map, _Options) -->
  173    string(Pre),
  174    eos,
  175    (   { Pre == [] }
  176    ->  { T0 = T }
  177    ;   { string_codes(PreS, Pre),
  178          T0 = [PreS|T]
  179        }
  180    ).
  181
  182interpolate_pattern(Pattern, _) -->
  183    prolog_var_name(Name),
  184    !,
  185    (   ","
  186    ->  default_value(Default),
  187        { Pattern = var(Name, Default) }
  188    ;   { Pattern = var(Name) }
  189    ).
  190interpolate_pattern(goal(Goal), Options) -->
  191    { option(goals(true), Options, false) },
  192    "@",
  193    !,
  194    goal(Goal, Options).
  195
  196default_value(String) -->
  197    string_without("}", Codes),
  198    { string_codes(String, Codes) }.
  199
  200goal(M:Goal, Options) -->
  201    string_without("}", Codes),
  202    { option(module(M), Options, user),
  203      string_codes(String, Codes),
  204      term_string(Goal, String)
  205    }.
  206
  207exec_interpolate(Compiled, String, Map) :-
  208    maplist(exec_interpolate1(Map), Compiled, Parts),
  209    atomics_to_string(Parts, String).
  210
  211exec_interpolate1(Map, var(Var), Out) :-
  212    !,
  213    (   memberchk(Var = Value, Map)
  214    ->  format(string(Out), '~w', Value)
  215    ;   existence_error(template_var, Var)
  216    ).
  217exec_interpolate1(Map, var(Var, Default), Out) :-
  218    !,
  219    (   memberchk(Var = Value, Map)
  220    ->  true
  221    ;   Value = Default
  222    ),
  223    format(string(Out), '~w', Value).
  224exec_interpolate1(_Map, goal(Goal), Out) :-
  225    !,
  226    format(string(Out), '~@', [Goal]).
  227exec_interpolate1(_, String, String).
  228
  229%!  string_lines(?String, ?Lines) is det.
  230%
  231%   True when String represents Lines.  This   follows  the  normal text
  232%   convention that a  line  is  defined   as  a  possible  empty string
  233%   followed by a newline character ("\n").  E.g.
  234%
  235%   ```
  236%   ?- string_lines("a\nb\n", L).
  237%   L = ["a", "b"].
  238%   ?- string_lines(S, ["a", "b"]).
  239%   S = "a\nb\n".
  240%   ```
  241%
  242%   This predicate is  a  true  _relation_   if  both  arguments  are in
  243%   canonical form, i.e. all text  is   represented  as  strings and the
  244%   first argument ends with  a   newline.  The implementation tolerates
  245%   non-canonical input: other  types  than   strings  are  accepted and
  246%   String does not need to end with a newline.
  247%
  248%   @see split_string/4. Using split_string(String, "\n",  "", Lines) on
  249%   a string that ends in a  newline   adds  an  additional empty string
  250%   compared to string_lines/2.
  251
  252string_lines(String, Lines) :-
  253    (   var(String)
  254    ->  must_be(list, Lines),
  255        append(Lines, [""], Lines1),
  256        atomics_to_string(Lines1, "\n", String)
  257    ;   split_string(String, "\n", "", Lines0),
  258        (   append(Lines, [""], Lines0)
  259        ->  true
  260        ;   Lines = Lines0
  261        )
  262    ).
  263
  264%!  dedent_lines(+In, -Out, +Options)
  265%
  266%   Remove shared indentation for all lines in a string. Lines are separated
  267%   by "\n" -- conversion to and from  external forms  (such as "\r\n")  are
  268%   typically done by the I/O predicates.
  269%   A final "\n" is preserved.
  270%
  271%   Options:
  272%
  273%     - tab(N)
  274%       Assume tabs at columns of with N.  When omitted, tabs are
  275%       taken literally and only exact matches are removed.
  276%     - chars(CodesOrString)
  277%       Characters to remove.  This can notably be used to remove
  278%       additional characters such as `*` or `|`.  Default is
  279%       `" \t"`.
  280
  281dedent_lines(In, Out, Options) :-
  282    option(tab(Tab), Options, 0),
  283    option(chars(Chars), Options, "\s\t"),
  284    string_codes(Sep, Chars),
  285    How = s(Tab,Sep),
  286    split_string(In, "\n", "", Lines),
  287    foldl(common_indent(How), Lines, _, Indent0),
  288    (   prepare_delete(Indent0, Indent)
  289    ->  maplist(dedent_line(Tab, Indent), Lines, Dedented),
  290        atomics_to_string(Dedented, "\n", Out)
  291    ;   length(Lines, NLines),
  292        NewLines is NLines - 1,
  293        length(Codes, NewLines),
  294        maplist(=(0'\n), Codes),
  295        string_codes(Out, Codes)
  296    ).
  297
  298prepare_delete(Var, _) :-               % All blank lines
  299    var(Var),
  300    !,
  301    fail.
  302prepare_delete(Width, Width) :-
  303    integer(Width),
  304    !.
  305prepare_delete(Codes, String) :-
  306    string_codes(String, Codes).
  307
  308common_indent(s(0,Sep), Line, Indent0, Indent) :-
  309    !,
  310    line_indent(Line, Indent1, Sep),
  311    join_indent(Indent0, Indent1, Indent).
  312common_indent(s(Tab,Sep), Line, Indent0, Indent) :-
  313    !,
  314    line_indent_width(Line, Indent1, Tab, Sep),
  315    join_indent_width(Indent0, Indent1, Indent).
  316
  317%!  line_indent(+Line, -Indent, +Sep) is det.
  318%
  319%   Determine the indentation as a list of character codes.  If the
  320%   line only holds white space Indent is left unbound.
  321
  322line_indent(Line, Indent, Sep) :-
  323    string_codes(Line, Codes),
  324    code_indent(Codes, Indent0, Sep),
  325    (   is_list(Indent0)
  326    ->  Indent = Indent0
  327    ;   true
  328    ).
  329
  330code_indent([H|T0], [H|T], Sep) :-
  331    string_code(_, Sep, H),
  332    !,
  333    code_indent(T0, T, Sep).
  334code_indent([], _, _) :-
  335    !.
  336code_indent(_, [], _).
  337
  338join_indent(Var, Indent, Indent) :-
  339    var(Var),
  340    !.
  341join_indent(Indent, Var, Indent) :-
  342    var(Var),
  343    !.
  344join_indent(Indent1, Indent2, Indent) :-
  345    shared_prefix(Indent1, Indent2, Indent).
  346
  347shared_prefix(Var, Prefix, Prefix) :-
  348    var(Var),
  349    !.
  350shared_prefix(Prefix, Var, Prefix) :-
  351    var(Var),
  352    !.
  353shared_prefix([H|T0], [H|T1], [H|T]) :-
  354    !,
  355    shared_prefix(T0, T1, T).
  356shared_prefix(_, _, []).
  357
  358%!  line_indent_width(+Line, -Indent, +Tab, +Sep) is det.
  359%
  360%   Determine the indentation as a  column,   compensating  for  the Tab
  361%   width.  This is used if the tab(Width) option is provided.
  362
  363line_indent_width(Line, Indent, Tab, Sep) :-
  364    string_codes(Line, Codes),
  365    code_indent_width(Codes, 0, Indent, Tab, Sep).
  366
  367code_indent_width([H|T], Indent0, Indent, Tab, Sep) :-
  368    string_code(_, Sep, H),
  369    !,
  370    update_pos(H, Indent0, Indent1, Tab),
  371    code_indent_width(T, Indent1, Indent, Tab, Sep).
  372code_indent_width([], _, _, _, _) :-
  373    !.
  374code_indent_width(_, Indent, Indent, _, _).
  375
  376join_indent_width(Var, Indent, Indent) :-
  377    var(Var),
  378    !.
  379join_indent_width(Indent, Var, Indent) :-
  380    var(Var),
  381    !.
  382join_indent_width(Indent0, Indent1, Indent) :-
  383    Indent is min(Indent0, Indent1).
  384
  385%!  dedent_line(+Tab, +Indent, +String, -Dedented)
  386%
  387%   Dedent a single line according to Tab   and Indent. Indent is either
  388%   an integer, deleting the  first  Indent   characters  or  a  string,
  389%   deleting the string literally.
  390
  391dedent_line(_Tab, Indent, String, Dedented) :-
  392    string(Indent),
  393    !,
  394    (   string_concat(Indent, Dedented, String)
  395    ->  true
  396    ;   Dedented = ""               % or ""?
  397    ).
  398dedent_line(Tab, Indent, String, Dedented) :-
  399    string_codes(String, Codes),
  400    delete_width(0, Indent, Codes, Codes1, Tab),
  401    string_codes(Dedented, Codes1).
  402
  403delete_width(Here, Indent, Codes, Codes, _) :-
  404    Here =:= Indent,
  405    !.
  406delete_width(Here, Indent, Codes0, Codes, _) :-
  407    Here > Indent,
  408    !,
  409    NSpaces is Here-Indent,
  410    length(Spaces, NSpaces),
  411    maplist(=(0'\s), Spaces),
  412    append(Spaces, Codes0, Codes).
  413delete_width(Here, Indent, [H|T0], T, Tab) :-
  414    !,
  415    update_pos(H, Here, Here1, Tab),
  416    delete_width(Here1, Indent, T0, T, Tab).
  417delete_width(_, _, [], [], _).
  418
  419update_pos(0'\t, Here0, Here, Tab) :-
  420    !,
  421    Here is ((Here0+Tab)//Tab)*Tab.
  422update_pos(_, Here0, Here, _) :-
  423    Here is Here0 + 1.
  424
  425%!  indent_lines(+Prefix, +In, -Out) is det.
  426%
  427%   Add Prefix to the beginning of lines   in In. Lines are separated by
  428%   "\n" -- conversion to and from external   forms (such as "\r\n") are
  429%   typically done by the I/O predicates. Lines that consist entirely of
  430%   whitespace are left as-is.
  431
  432indent_lines(Prefix, In, Out) :-
  433    indent_lines(ignore_whitespace_line, Prefix, In, Out).
  434
  435%!  indent_lines(:Filter, +Prefix, +In, -Out) is det.
  436%
  437%   Similar to indent_lines/3, but only adds   Prefix to lines for which
  438%   call(Filter, Line) succeeds.
  439
  440indent_lines(Pred, Prefix, In, Out) :-
  441    % Use split_string/4 rather than string_lines/2, to preserve final "\n".
  442    split_string(In, "\n", "", Lines0),
  443    (   append(Lines, [""], Lines0)
  444    ->  maplist(concat_to_string(Pred, Prefix), Lines, IndentedLines0),
  445        append(IndentedLines0, [""], IndentedLines),
  446        atomics_to_string(IndentedLines, "\n", Out)
  447    ;   Lines = Lines0,
  448        maplist(concat_to_string(Pred, Prefix), Lines, IndentedLines),
  449        atomics_to_string(IndentedLines, "\n", Out)
  450    ).
  451
  452ignore_whitespace_line(Str) :-
  453    \+ split_string(Str, "", " \t", [""]).
  454
  455:- meta_predicate concat_to_string(:, +, +, -).  456
  457concat_to_string(Pred, Prefix, Line, Out) :-
  458    (   call(Pred, Line)
  459    ->  atomics_to_string([Prefix, Line], Out)
  460    ;   Out = Line
  461    )