View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        jan@swi-prolog.org
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2020, SWI-Prolog Solutions b.v
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(strings,
   36          [ dedent_lines/3,             % +In,-Out,+Options
   37            indent_lines/3,             % +Prefix,+In,-Out
   38            indent_lines/4,             % :Pred,+Prefix,+In,-Out
   39            interpolate_string/4,       % +In,-Out,+Map,+Options
   40            string_lines/2,             % ?In,?Lines
   41            string/4                    % Quasi quotation support
   42          ]).   43:- autoload(library(apply), [include/3, foldl/4, maplist/3, maplist/2]).   44:- autoload(library(error), [existence_error/2, must_be/2]).   45:- autoload(library(lists), [member/2, append/3]).   46:- autoload(library(option), [option/3]).   47:- autoload(library(quasi_quotations),
   48            [quasi_quotation_syntax/1, with_quasi_quotation_input/3]).   49:- autoload(library(dcg/basics),
   50            [string/3, prolog_var_name/3, string_without/4, eos//0]).   51
   52:- meta_predicate
   53    interpolate_string(:, -, +, +),
   54    indent_lines(1, +, +, -).   55
   56:- quasi_quotation_syntax(string).

String utilities

This module provides string handling utilities, currently notably for dealing with multi-line strings and interpolation. The library provides a couple of primitives as well definitions for the string quasi quotation syntax. The latter allows for constructing both single line and multi-line long strings based on template interpolation. Below is a simple example using the quasi quotation syntax.

test(To) :-
    write({|string(To)||
           | Dear {To},
           |
           | I'm happy to announce a string interpolation quasi quoter.
           |}.

Warning

The general purpose string interpolation implemented by this library should not be used to create strings for a formal language such as HTML, JavaScript, SQL, etc. because the result will be subject to injection attacks, providing a serious security risc. The core idea of quasi quotation is to know about the target language and interpolate Prolog data into the template while respecting the syntax of the target language, notable to escape certain characters where needed. See also library(http/html_write) and library(http/js_write) which define quasi quotation rules for HTML and JavaScript.

See also
- format/3 can format to a string as well. The library(lynx/format) provides primitive to wrap long strings.
- The core system provides many additional string processing predicates.
To be done
- There are probably many other high level string predicates that belong in this library. For example, predicates similar to the functions in https://docs.python.org/3/library/textwrap.html */
 string(+Content, +Args, +Binding, -DOM)
Implements the quasi quotation syntax string. If the first character of the content is a newline (i.e., there is a newline immediately after the || token) this first uses dedent_lines/3 to the remove common white space prefix from the lines. This is called with the option chars("\s\t|"), i.e., also removing | characters and tab(8).

If the quasi quotation syntax carries arguments (e.g., string(To)), the string is compiled into a function that produces the result of interpolating the arguments into the template. See user functions on dict objects. If there are no arguments, the result is simply the final string.

See also
- interpolate_string/4 for the interpolation syntax.
- Section for examples and discussion.
To be done
- Specify tab width and allow for {@Goal} templates.
  117string(Content, Args, Binding, DOM) :-
  118    must_be(list, Binding),
  119    include(qq_var(Args), Binding, QQDict),
  120    with_quasi_quotation_input(Content, Stream,
  121                               read_string(Stream, _, String)),
  122    (   string_concat("\n", String1, String)
  123    ->  dedent_lines(String1, String2, [tab(8), chars("\s\t|")])
  124    ;   String2 = String
  125    ),
  126    (   prolog_load_context(module, Module)
  127    ->  true
  128    ;   Module = user                   % typein?
  129    ),
  130    (   Args == []
  131    ->  DOM = String2
  132    ;   comp_interpolate(String2, Compiled, QQDict, [module(Module)]),
  133        DOM =.. ['.',strings{type:string},exec(Compiled, QQDict)]
  134    ).
  135
  136qq_var(Vars, _=Var) :- member(V, Vars), V == Var, !.
  137
  138_Dict.exec(Compiled, Map) := String :-
  139    exec_interpolate(Compiled, String, Map).
 interpolate_string(:In, -Out, +Map, +Options)
Establish a string from a template by replacing patterns. Supported patterns are:
{Name}
If Map contains Name=Value, insert Value using write/1. If Name does not appear in Map, raise an existence error. Name must satisfy the rules for a Prolog variable.
{Name,Default}
As above, but if Name does not appear in Map, use Value
{@Goal}
Insert the output (to current_output) of Goal here. For safety reasons only accepted if Options contains goals(true)
  158interpolate_string(Module:In, Out, Map, Options) :-
  159    comp_interpolate(In, Compiled, Map, [module(Module)|Options]),
  160    exec_interpolate(Compiled, Out, Map).
  161
  162comp_interpolate(In, Compiled, Map, Options) :-
  163    string_codes(In, Codes),
  164    phrase(interpolate(Compiled, [], Map, Options), Codes).
  165
  166interpolate([PreS,Action|T0], T, Map, Options) -->
  167    string(Pre),
  168    "{", interpolate_pattern(Action, Options), "}",
  169    !,
  170    { string_codes(PreS, Pre) },
  171    interpolate(T0, T, Map, Options).
  172interpolate(T0, T, _Map, _Options) -->
  173    string(Pre),
  174    eos,
  175    (   { Pre == [] }
  176    ->  { T0 = T }
  177    ;   { string_codes(PreS, Pre),
  178          T0 = [PreS|T]
  179        }
  180    ).
  181
  182interpolate_pattern(Pattern, _) -->
  183    prolog_var_name(Name),
  184    !,
  185    (   ","
  186    ->  default_value(Default),
  187        { Pattern = var(Name, Default) }
  188    ;   { Pattern = var(Name) }
  189    ).
  190interpolate_pattern(goal(Goal), Options) -->
  191    { option(goals(true), Options, false) },
  192    "@",
  193    !,
  194    goal(Goal, Options).
  195
  196default_value(String) -->
  197    string_without("}", Codes),
  198    { string_codes(String, Codes) }.
  199
  200goal(M:Goal, Options) -->
  201    string_without("}", Codes),
  202    { option(module(M), Options, user),
  203      string_codes(String, Codes),
  204      term_string(Goal, String)
  205    }.
  206
  207exec_interpolate(Compiled, String, Map) :-
  208    maplist(exec_interpolate1(Map), Compiled, Parts),
  209    atomics_to_string(Parts, String).
  210
  211exec_interpolate1(Map, var(Var), Out) :-
  212    !,
  213    (   memberchk(Var = Value, Map)
  214    ->  format(string(Out), '~w', Value)
  215    ;   existence_error(template_var, Var)
  216    ).
  217exec_interpolate1(Map, var(Var, Default), Out) :-
  218    !,
  219    (   memberchk(Var = Value, Map)
  220    ->  true
  221    ;   Value = Default
  222    ),
  223    format(string(Out), '~w', Value).
  224exec_interpolate1(_Map, goal(Goal), Out) :-
  225    !,
  226    format(string(Out), '~@', [Goal]).
  227exec_interpolate1(_, String, String).
 string_lines(?String, ?Lines) is det
True when String represents Lines. This follows the normal text convention that a line is defined as a possible empty string followed by a newline character ("\n"). E.g.
?- string_lines("a\nb\n", L).
L = ["a", "b"].
?- string_lines(S, ["a", "b"]).
S = "a\nb\n".

This predicate is a true relation if both arguments are in canonical form, i.e. all text is represented as strings and the first argument ends with a newline. The implementation tolerates non-canonical input: other types than strings are accepted and String does not need to end with a newline.

See also
- split_string/4. Using split_string(String, "\n", "", Lines) on a string that ends in a newline adds an additional empty string compared to string_lines/2.
  252string_lines(String, Lines) :-
  253    (   var(String)
  254    ->  must_be(list, Lines),
  255        append(Lines, [""], Lines1),
  256        atomics_to_string(Lines1, "\n", String)
  257    ;   split_string(String, "\n", "", Lines0),
  258        (   append(Lines, [""], Lines0)
  259        ->  true
  260        ;   Lines = Lines0
  261        )
  262    ).
 dedent_lines(+In, -Out, +Options)
Remove shared indentation for all lines in a string. Lines are separated by "\n" -- conversion to and from external forms (such as "\r\n") are typically done by the I/O predicates. A final "\n" is preserved.

Options:

tab(N)
Assume tabs at columns of with N. When omitted, tabs are taken literally and only exact matches are removed.
chars(CodesOrString)
Characters to remove. This can notably be used to remove additional characters such as * or `|`. Default is " \t".
  281dedent_lines(In, Out, Options) :-
  282    option(tab(Tab), Options, 0),
  283    option(chars(Chars), Options, "\s\t"),
  284    string_codes(Sep, Chars),
  285    How = s(Tab,Sep),
  286    split_string(In, "\n", "", Lines),
  287    foldl(common_indent(How), Lines, _, Indent0),
  288    (   prepare_delete(Indent0, Indent)
  289    ->  maplist(dedent_line(Tab, Indent), Lines, Dedented),
  290        atomics_to_string(Dedented, "\n", Out)
  291    ;   length(Lines, NLines),
  292        NewLines is NLines - 1,
  293        length(Codes, NewLines),
  294        maplist(=(0'\n), Codes),
  295        string_codes(Out, Codes)
  296    ).
  297
  298prepare_delete(Var, _) :-               % All blank lines
  299    var(Var),
  300    !,
  301    fail.
  302prepare_delete(Width, Width) :-
  303    integer(Width),
  304    !.
  305prepare_delete(Codes, String) :-
  306    string_codes(String, Codes).
  307
  308common_indent(s(0,Sep), Line, Indent0, Indent) :-
  309    !,
  310    line_indent(Line, Indent1, Sep),
  311    join_indent(Indent0, Indent1, Indent).
  312common_indent(s(Tab,Sep), Line, Indent0, Indent) :-
  313    !,
  314    line_indent_width(Line, Indent1, Tab, Sep),
  315    join_indent_width(Indent0, Indent1, Indent).
 line_indent(+Line, -Indent, +Sep) is det
Determine the indentation as a list of character codes. If the line only holds white space Indent is left unbound.
  322line_indent(Line, Indent, Sep) :-
  323    string_codes(Line, Codes),
  324    code_indent(Codes, Indent0, Sep),
  325    (   is_list(Indent0)
  326    ->  Indent = Indent0
  327    ;   true
  328    ).
  329
  330code_indent([H|T0], [H|T], Sep) :-
  331    string_code(_, Sep, H),
  332    !,
  333    code_indent(T0, T, Sep).
  334code_indent([], _, _) :-
  335    !.
  336code_indent(_, [], _).
  337
  338join_indent(Var, Indent, Indent) :-
  339    var(Var),
  340    !.
  341join_indent(Indent, Var, Indent) :-
  342    var(Var),
  343    !.
  344join_indent(Indent1, Indent2, Indent) :-
  345    shared_prefix(Indent1, Indent2, Indent).
  346
  347shared_prefix(Var, Prefix, Prefix) :-
  348    var(Var),
  349    !.
  350shared_prefix(Prefix, Var, Prefix) :-
  351    var(Var),
  352    !.
  353shared_prefix([H|T0], [H|T1], [H|T]) :-
  354    !,
  355    shared_prefix(T0, T1, T).
  356shared_prefix(_, _, []).
 line_indent_width(+Line, -Indent, +Tab, +Sep) is det
Determine the indentation as a column, compensating for the Tab width. This is used if the tab(Width) option is provided.
  363line_indent_width(Line, Indent, Tab, Sep) :-
  364    string_codes(Line, Codes),
  365    code_indent_width(Codes, 0, Indent, Tab, Sep).
  366
  367code_indent_width([H|T], Indent0, Indent, Tab, Sep) :-
  368    string_code(_, Sep, H),
  369    !,
  370    update_pos(H, Indent0, Indent1, Tab),
  371    code_indent_width(T, Indent1, Indent, Tab, Sep).
  372code_indent_width([], _, _, _, _) :-
  373    !.
  374code_indent_width(_, Indent, Indent, _, _).
  375
  376join_indent_width(Var, Indent, Indent) :-
  377    var(Var),
  378    !.
  379join_indent_width(Indent, Var, Indent) :-
  380    var(Var),
  381    !.
  382join_indent_width(Indent0, Indent1, Indent) :-
  383    Indent is min(Indent0, Indent1).
 dedent_line(+Tab, +Indent, +String, -Dedented)
Dedent a single line according to Tab and Indent. Indent is either an integer, deleting the first Indent characters or a string, deleting the string literally.
  391dedent_line(_Tab, Indent, String, Dedented) :-
  392    string(Indent),
  393    !,
  394    (   string_concat(Indent, Dedented, String)
  395    ->  true
  396    ;   Dedented = ""               % or ""?
  397    ).
  398dedent_line(Tab, Indent, String, Dedented) :-
  399    string_codes(String, Codes),
  400    delete_width(0, Indent, Codes, Codes1, Tab),
  401    string_codes(Dedented, Codes1).
  402
  403delete_width(Here, Indent, Codes, Codes, _) :-
  404    Here =:= Indent,
  405    !.
  406delete_width(Here, Indent, Codes0, Codes, _) :-
  407    Here > Indent,
  408    !,
  409    NSpaces is Here-Indent,
  410    length(Spaces, NSpaces),
  411    maplist(=(0'\s), Spaces),
  412    append(Spaces, Codes0, Codes).
  413delete_width(Here, Indent, [H|T0], T, Tab) :-
  414    !,
  415    update_pos(H, Here, Here1, Tab),
  416    delete_width(Here1, Indent, T0, T, Tab).
  417delete_width(_, _, [], [], _).
  418
  419update_pos(0'\t, Here0, Here, Tab) :-
  420    !,
  421    Here is ((Here0+Tab)//Tab)*Tab.
  422update_pos(_, Here0, Here, _) :-
  423    Here is Here0 + 1.
 indent_lines(+Prefix, +In, -Out) is det
Add Prefix to the beginning of lines in In. Lines are separated by "\n" -- conversion to and from external forms (such as "\r\n") are typically done by the I/O predicates. Lines that consist entirely of whitespace are left as-is.
  432indent_lines(Prefix, In, Out) :-
  433    indent_lines(ignore_whitespace_line, Prefix, In, Out).
 indent_lines(:Filter, +Prefix, +In, -Out) is det
Similar to indent_lines/3, but only adds Prefix to lines for which call(Filter, Line) succeeds.
  440indent_lines(Pred, Prefix, In, Out) :-
  441    % Use split_string/4 rather than string_lines/2, to preserve final "\n".
  442    split_string(In, "\n", "", Lines0),
  443    (   append(Lines, [""], Lines0)
  444    ->  maplist(concat_to_string(Pred, Prefix), Lines, IndentedLines0),
  445        append(IndentedLines0, [""], IndentedLines),
  446        atomics_to_string(IndentedLines, "\n", Out)
  447    ;   Lines = Lines0,
  448        maplist(concat_to_string(Pred, Prefix), Lines, IndentedLines),
  449        atomics_to_string(IndentedLines, "\n", Out)
  450    ).
  451
  452ignore_whitespace_line(Str) :-
  453    \+ split_string(Str, "", " \t", [""]).
  454
  455:- meta_predicate concat_to_string(:, +, +, -).  456
  457concat_to_string(Pred, Prefix, Line, Out) :-
  458    (   call(Pred, Line)
  459    ->  atomics_to_string([Prefix, Line], Out)
  460    ;   Out = Line
  461    )