1:- module(md_blocks, [
    2    md_blocks//1 % -Blocks
    3]).

Block-level parser for Markdown

Parses Markdown block-level constructs like paragraphs, lists, code blocks, blockquotes etc. Applies span-level parsing for all blocks. */

   12:- use_module(library(dcg/basics)).   13
   14:- use_module(md_list_item).   15:- use_module(md_header).   16:- use_module(md_span).   17:- use_module(md_trim).   18:- use_module(md_line).   19:- use_module(md_hr).
 md_blocks(-Blocks)// is det
Parses given Markdown into a structure accepted by html//1.
   26md_blocks(Blocks) -->
   27    blocks(top, [], Blocks).
   28
   29% Contextified block parsing.
   30% Some types of blocks are not
   31% allowed in contexts other than top.
   32% Currently used contexts are: top,
   33% list and bq.
   34
   35md_blocks(Ctx, Blocks) -->
   36    blocks(Ctx, [], Blocks).
   37
   38% Recognizes all blocks
   39% in the input. When a block is not
   40% recognized, one line as removed and
   41% added into accumulator. These accumulated
   42% lines are added as paragraph blocks.
   43% This matches better the sematics of
   44% http://daringfireball.net/projects/markdown/dingus
   45
   46blocks(Ctx, Acc, Result) -->
   47    empty_lines,
   48    block(Ctx, Block), !,
   49    {
   50        (   Acc = []
   51        ->  Result = [Block|Blocks]
   52        ;   acc_block(Acc, AccBlock),
   53            Result = [AccBlock,Block|Blocks])
   54    },
   55    blocks(Ctx, [], Blocks).
   56
   57blocks(Ctx, Acc, Blocks) -->
   58    non_empty_line(Line), !,
   59    blocks(Ctx, [Line|Acc], Blocks).
   60
   61blocks(_, Acc, Result) -->
   62    empty_lines,
   63    eos, !,
   64    {
   65        (   Acc = []
   66        ->  Result = []
   67        ;   Result = [Block],
   68            acc_block(Acc, Block))
   69    }.
   70
   71blocks(Ctx, Acc, Result) -->
   72    empty_line,
   73    {
   74        (   Acc = []
   75        ->  Result = Blocks
   76        ;   Result = [Block|Blocks],
   77            acc_block(Acc, Block))
   78    },
   79    blocks(Ctx, [], Blocks).
   80
   81% Converts lines into a <p>
   82% element and applies span-level
   83% parsing.
   84
   85acc_block(Acc, p(Span)):-
   86    reverse(Acc, AccLines),
   87    merge_lines(AccLines, Block),
   88    md_span_codes(Block, Span).
   89
   90% Recognizes a single block.
   91% Tries to parse in the following
   92% order: headers, horisontal ruler,
   93% lists, blockquote, html.
   94
   95block(_, Block) -->
   96    md_header(Block), !.
   97
   98block(_, Block) -->
   99    code(Block), !.
  100
  101block(top, hr([])) -->
  102    md_hr, !.
  103
  104block(_, Block) -->
  105    list(Block), !.
  106
  107block(top, Block) -->
  108    blockquote(Block), !.
  109
  110block(_, Block) -->
  111    html(Block), !.
  112
  113block(_, Block) -->
  114    fenced_code(Block).
  115
  116code(pre(code(String))) -->
  117    indented_lines(Codes), !,
  118    {
  119        trim_right(Codes, Trimmed),
  120        string_codes(String, Trimmed)
  121    }.
  122
  123% Recognizes fenced code blocks.
  124% The language is put into the
  125% `data-language` attribute of the
  126% `code` tag.
  127
  128fenced_code(Block) -->
  129    "```", inline_string(LangCodes), ln,
  130    string(Codes),
  131    ln, "```", whites, ln_or_eos, !,
  132    {
  133        trim(LangCodes, Trimmed),
  134        atom_codes(Lang, Trimmed),
  135        string_codes(Code, Codes),
  136        (   Lang = ''
  137        ->  Block = pre(code(Code))
  138        ;   Block = pre(code(['data-language'=Lang], Code)))
  139    }.
  140
  141% Optimizes generated HTML structure.
  142% Applied after parsing different blocks.
  143% Mostly deals with excessive <p> elements
  144% removal.
  145
  146optimize(blockquote([p(Block)]), blockquote(Block)):- !.
  147
  148optimize(li([p(Block)]), li(Block)):- !.
  149
  150optimize(li([p(Block1), ul(Block2)]), li(Block)):- !,
  151    append(Block1, [ul(Block2)], Block).
  152
  153optimize(li([p(Block1), ol(Block2)]), li(Block)):- !,
  154    append(Block1, [ol(Block2)], Block).
  155
  156optimize(Block, Block).
  157
  158% Recognizes a sequence of one or more
  159% indented lines. Gives back codes of
  160% whole sequence.
  161
  162indented_lines(Codes) -->
  163    indented_lines_collect(Lines),
  164    {
  165        Lines \= [],
  166        merge_lines(Lines, Codes)
  167    }.
  168
  169% Recognizes a sequence of indented lines.
  170% There might be empty lines between
  171% indented lines.
  172
  173indented_lines_collect([Line|Lines]) -->
  174    indented_line(Line), !,
  175    indented_lines_collect(Lines).
  176
  177indented_lines_collect([]) -->
  178    eos, !.
  179
  180indented_lines_collect([[]|Lines]) -->
  181    empty_line, !,
  182    indented_lines_collect(Lines).
  183
  184indented_lines_collect([]) --> "".
  185
  186indented_line(Line) -->
  187    indent, inline_string(Line), ln_or_eos.
  188
  189% Recognizes block-level HTML.
  190% No Markdown inside it is processed.
  191% Gives term that write_html's html//1
  192% does not escape.
  193
  194html(\[String]) -->
  195    [0'<, Code], { code_type(Code, alpha) }, !,
  196    non_empty_lines(Html),
  197    { string_codes(String, [0'<,Code|Html]) }.
  198
  199% Recognizes either ordered list
  200% or bulleted list.
  201
  202list(List) -->
  203    bullet_list(List), !.
  204
  205list(List) -->
  206    ordered_list(List).
  207
  208% Recognizes ordered list.
  209% Gives term like ol(Term)
  210% where Items is non-empty list.
  211
  212ordered_list(ol(Items)) -->
  213    ordered_list_collect(Items, _), !,
  214    { Items \= [] }.
  215
  216ordered_list_collect([Item|Items], Mode) -->
  217    ordered_list_item(Item, Mode), !,
  218    empty_lines,
  219    ordered_list_collect(Items, Mode).
  220
  221ordered_list_collect([], _) --> "".
  222
  223% Recognizes a single ordered list item.
  224
  225ordered_list_item(Item, ListMode) -->
  226    md_ordered_list_item(Codes, ItemMode),
  227    { postproc_list_item(Codes, ItemMode, ListMode, Item) }.
  228
  229% Recognizes bulleted list.
  230% Gives a term like ul(Items)
  231% where Items is non-empty list.
  232
  233bullet_list(ul(Items)) -->
  234    bullet_list_collect(Items, _), !,
  235    { Items \= [] }.
  236
  237bullet_list_collect([Item|Items], Mode) -->
  238    bullet_list_item(Item, Mode), !,
  239    empty_lines,
  240    bullet_list_collect(Items, Mode).
  241
  242bullet_list_collect([], _) --> "".
  243
  244% Recognizes a single bulleted list item.
  245
  246bullet_list_item(Item, ListMode) -->
  247    md_bullet_list_item(Codes, ItemMode),
  248    { postproc_list_item(Codes, ItemMode, ListMode, Item) }.
  249
  250% Postprocesses a list item.
  251% In paragraph mode, no optimizations are
  252% applied (preserves `<p>` in `<li>`).
  253% The actual list mode is set by the first
  254% list item.
  255
  256postproc_list_item(Codes, ItemMode, ListMode, Item):-
  257    phrase(md_blocks(list, Blocks), Codes),
  258    list_mode(ListMode, ItemMode, Mode),
  259    (   Mode = normal
  260    ->  optimize(li(Blocks), Item)
  261    ;   Item = li(Blocks)).
  262
  263% List mode setup. When ListMode
  264% is set, its value is used. Otherwise
  265% ListMode is set to ItemMode.
  266
  267list_mode(ListMode, ItemMode, Mode):-
  268    (   var(ListMode)
  269    ->  ListMode = ItemMode
  270    ;   true),
  271    Mode = ListMode.
  272
  273% Recognizes a blockquote.
  274% Strips > from line beginnings.
  275% Output is a term like blockquote(Blocks).
  276
  277blockquote(Opt) -->
  278    ">", string(Codes),
  279    empty_line,
  280    empty_line, !,
  281    {
  282        trim_left(Codes, Trimmed),
  283        phrase(bq_strip(Stripped), Trimmed), !,
  284        phrase(md_blocks(top, Blocks), Stripped),
  285        optimize(blockquote(Blocks), Opt)
  286    }.
  287
  288% Strips > from blockquote line
  289% beginnings.
  290
  291bq_strip([0'\n|Codes]) -->
  292    ln, "> ", !, bq_strip(Codes).
  293
  294bq_strip([0'\n|Codes]) -->
  295    ln, ">", !, bq_strip(Codes).
  296
  297bq_strip([Code|Codes]) -->
  298    [Code], !, bq_strip(Codes).
  299
  300bq_strip([]) -->
  301    eos.
  302
  303% List of consequtive non-empty lines.
  304% Consumes as many non-empty lines
  305% as possible. Gives flattened list
  306% of codes.
  307
  308non_empty_lines(Codes) -->
  309    non_empty_lines_collect(Lines),
  310    { merge_lines(Lines, Codes) }, !.
  311
  312non_empty_lines_collect([Line|Lines]) -->
  313    non_empty_line(Line), !,
  314    non_empty_lines_collect(Lines).
  315
  316non_empty_lines_collect([]) --> ""