1:- module(dynarray_persistence,
    2    [
    3        dynarray_clone/2,
    4        dynarray_csv/2,
    5        dynarray_erase/2,
    6        dynarray_persist/2,
    7        dynarray_restore/2,
    8        dynarray_serialize/2
    9    ]).

Persistence for dynarray objects, using Berkeley DB

This module provides persistence for dynarray objects, using the Berkeley DB utility package. Please, refer to bdb_wrapper.pl for details on the SWI-Prolog interface to Berkeley DB.

Additionally, persisting and restoring from `.csv` files is also implemented. Please, refer to the csv_wrapper.pl for details.

The following considerations apply for CSV operations:

1. the dynarray involved must be 2-dimensional, and will be handled as having
   rows (dimension 1) and columns (dimension 2);
2. the stream involved must be of type 'text', and will be read or written from
   its current position;
3. persisting to, or restoring from, the given stream will be attempted,
   depending on whether or not the dynarray exists;
4. input and output are performed through the Prolog platform's built-in CSV
   library;
5. when persisting, the atoms associated with the dynarray's columns, if they
   exist, will be used as field names in the CSV file's header record;
6. when persisting, the data registered as labels, apart from the column names,
   will not be included;
7. when persisting, missing cells will be recorded on the CSV file as empty fields
   (containing the null char '\000\');
8. when restoring, an attempt will be made to extract field names from the CSV
   file's first record and use them as labels; if not possible, the record will be
   treated as regular data.
author
- GT Nunes
version
- 1.3.2
license
- BSD-3-Clause License */
   47%-------------------------------------------------------------------------------------
   48
   49:- use_module(library(apply),
   50    [
   51        convlist/3,
   52        maplist/2,
   53        maplist/3
   54    ]).   55
   56:- use_module(library(lists),
   57    [
   58        nth0/3,
   59        numlist/3
   60    ]).   61
   62:- use_module('bdb_wrapper',
   63    [
   64        bdb_erase/2,
   65        bdb_retrieve/3,
   66        bdb_store/3
   67    ]).   68
   69:- use_module('csv_wrapper',
   70    [
   71        csv_input_records/2,
   72        csv_is_header/1,
   73        csv_output_record/2
   74    ]).   75
   76:- use_module('dynarray_core',
   77    [
   78        dynarray_cells/3,
   79        dynarray_create/2,
   80        dynarray_dims/2,
   81        dynarray_destroy/1,
   82        dynarray_label/3,
   83        dynarray_position_value/3,
   84        dynarray_value/3,
   85        is_dynarray/1
   86    ]).   87
   88%-------------------------------------------------------------------------------------
 dynarray_clone(+Id:atomSource:atom, +Id:atomTarget:atom) is semidet
Clone a dynarray.
Arguments:
IdSource- Atom identifying the source dynarray
IdTarget- Atom identifying the target dynarray
   97dynarray_clone(IdSource, IdTarget) :-
   98
   99    % fail points (source dynarray must exist, target dynarray must not exist)
  100    is_dynarray(IdSource),
  101    \+ is_dynarray(IdTarget),
  102
  103    % serialize the source dynarray
  104    dynarray_serialize(IdSource, Data),
  105
  106    % create target dynarray with serialized data from IdSource
  107    dynarray_serialize(IdTarget, Data).
  108
  109%-------------------------------------------------------------------------------------
 dynarray_csv(+Id:atom, +Stream:ref) is det
Persist or restore a dynarray into/from a CSV file.
Arguments:
Id- Atom identifying the dynarray
Stream- Stream to read from/write to
  118dynarray_csv(Id, Stream) :-
  119
  120    % does Id identify a dynarray ?
  121    (is_dynarray(Id) ->
  122        % yes, so persist it (fail if it is not bi-dimensional)
  123        dynarray_dims(Id, 2),
  124        dynarray_to_csv(Id, Stream)
  125    ;
  126        % no, so restore it
  127        csv_to_dynarray(Id, Stream)
  128    ).
  129
  130%-------------------------------------------------------------------------------------
 dynarray_to_csv(+Id:atom, +Stream:ref)
Persist the dynarray to Stream as a CSV file.
  135%  @param Id     Atom identifying the dynarray
  136%  @param Stream Stream to write to
  137
  138dynarray_to_csv(Id, Stream) :-
  139
  140    % retrieve the number of columns (columns are 0-based)
  141    dynarray_cells(Id, 2, ColCount),
  142    ColLast is ColCount - 1,
  143    numlist(0, ColLast, ColOrdinals),
  144
  145    % are column names registered as labels ?
  146    ( ( convlist(col_label(Id), ColOrdinals, ColNames)
  147      , length(ColNames, ColCount)
  148      , csv_is_header(ColNames) )->
  149        % yes, so write the CSV file header
  150        csv_output_record(Stream, ColNames)
  151    ;
  152        % no, so proceed
  153        true
  154    ),
  155
  156    % persist the dynarray data to a CSV file (rows are 0-based)
  157    dynarray_cells(Id, 2, RowCount),
  158    RowLast is RowCount - 1,
  159    numlist(0, RowLast, RowOrdinals),
  160    maplist(output_record(Id, Stream, ColOrdinals), RowOrdinals).
  161
  162% retrieve the label associated with ColOrdinal
  163col_label(Id, ColOrdinal, Label) :-
  164    % fail point
  165    dynarray_label(Id, Label, ColOrdinal).
  166
  167% build and output the CSV record
  168output_record(Id, Stream, ColOrdinals, RowOrdinal) :-
  169
  170    maplist(output_field(Id, RowOrdinal), ColOrdinals, Record),
  171    csv_output_record(Stream, Record).
  172
  173output_field(Id, RowOrdinal, ColOrdinal, Field) :-
  174    dynarray_value(Id, [RowOrdinal,ColOrdinal], Field).
  175
  176%-------------------------------------------------------------------------------------
 csv_to_dynarray(+Id:atom, +Stream:ref) is det
Restore the dynarray from a CSV file in Stream.
Arguments:
Id- Atom identifying the dynarray
Stream- Stream to read from
  185csv_to_dynarray(Id, Stream) :-
  186
  187    % input CSV records
  188    csv_input_records(Stream, Records),
  189    length(Records, Len),
  190
  191    % set aside head and compute number of columns (columns are 0-based)
  192    [Head|Tail] = Records,
  193    length(Head, ColCount),
  194    ColLast is ColCount - 1,
  195    numlist(0, ColLast, ColOrdinals),
  196
  197    % is it a CSV file header ?-
  198    (csv_is_header(Head) ->
  199
  200        % yes
  201        RowCount is Len - 1,
  202
  203        % create the dynarray
  204        dynarray_create(Id, [RowCount,ColCount]),
  205
  206        % register the column names
  207        maplist(dynarray_label(Id), Head, ColOrdinals),
  208
  209        % establish the data
  210        Data = Tail
  211    ;
  212        % no
  213        RowCount = Len,
  214
  215        % create the dynarray
  216        dynarray_create(Id, [RowCount,ColCount]),
  217
  218        % establish the data
  219        Data = Records
  220    ),
  221
  222    % load the data onto the dynarray (rows are 0-based)
  223    RowLast is RowCount - 1,
  224    numlist(0, RowLast, RowOrdinals),
  225    maplist(load_record(Id, ColOrdinals), RowOrdinals, Data).
  226
  227% restore the CSV Record
  228load_record(Id, ColOrdinals, RowOrdinal, Record) :-
  229    maplist(load_field(Id, RowOrdinal), ColOrdinals, Record).
  230
  231% restore the CSV Field
  232load_field(Id, RowOrdinal, ColOrdinal, Field) :-
  233    dynarray_value(Id, [RowOrdinal,ColOrdinal], Field).
  234
  235%-------------------------------------------------------------------------------------
 dynarray_serialize(+Id:atom, ?Serialized:data) is det
A serialization mechanism, for backup/restore purposes. For a given dynarray containing Nv values and Nb labels, its serialization structure will be:
  [<dims-ranges>],<Nb>,
  [<key-label-1>,<value-label-1>],...,[<key-label-Nb>,<value-label-Nb>],
  [<pos-value-1>,<value-1>],...,[<pos-value-Nv>,<value-Nv>]

The serialized list will thus contain Np + Nv + 2 elements:
<dims-ranges> - the dimensions ranges used for the dynarray creation
<num-labels> - the total number of key-value label pairs
<key-label-j> - the key in the key-value label pair j
<value-label-j> - the value in the key-value label pair j
<pos-value-j> - the linear position of value j within the dynarray
<value-j> - the value j within the dynarray

Arguments:
Id- Atom identifying the dynarray
Serialized- Serialization list containing the dynarray data
  259dynarray_serialize(Id, Serialized) :-
  260
  261    % HAZARD: ground(Serialized) might be very expensive
  262    (var(Serialized) ->
  263        is_dynarray(Id),
  264        dynarray_to_serialized(Id, Serialized)
  265    ;
  266        ( Serialized = []
  267        ; serialized_to_dynarray(Id, Serialized) ),
  268        !
  269    ).
  270
  271%-------------------------------------------------------------------------------------
 dynarray_to_serialized(+Id:atom, +Serialized:data) is det
Serialize the contents (labels and values) of the dynarray.
Arguments:
Id- Atom identifying the dynarray
Serialized- Serialization list containing the dynarray data
  280dynarray_to_serialized(Id, Serialized) :-
  281
  282    % retrieve all labels (key and value pairs) in dynarray
  283    findall([Label,Value],
  284            dynarray_core:dynarr_labels(Id, Label, Value), Labels),
  285
  286    % retrieve all values (position-value pairs) in dynarray
  287    findall([Position,Value],
  288            dynarray_core:dynarr_values(Position, Id, Value), Values),
  289
  290    % join them in a single list
  291    append(Labels, Values, DynData),
  292
  293    % add dimensions ranges and number of labels
  294    memberchk([da_ranges,DimRanges], Labels),
  295    length(Labels, NumLabels),
  296    append([DimRanges,NumLabels], DynData, Serialized).
  297
  298%-------------------------------------------------------------------------------------
 serialized_to_dynarray(+Id:atom, +Serialized:data) is det
Restore the contents (labels and values) of the dynarray.
Arguments:
Id- Atom identifying the dynarray
Serialized- Serialization list containing the dynarray data
  307serialized_to_dynarray(Id, Serialized) :-
  308
  309    % create dynarray
  310    [DimRanges|[NumLabels|_]] = Serialized,
  311    dynarray_destroy(Id),                       % SANITY POINT
  312    dynarray_create(Id, DimRanges),
  313
  314    % restore the labels
  315    LabelsFinal is NumLabels + 2,
  316    serialized_to_labels_(Id, Serialized, 2, LabelsFinal),
  317
  318    % retrieve the positions/values list
  319    length(Serialized, ValuesFinal),
  320    serialized_to_values_(Id, Serialized, LabelsFinal, ValuesFinal).
 serialized_to_labels_(+Id:atom, +Labels, +PosCurr, +PosFinal) is det
Arguments:
Id- Atom identifying the dynarray
Labels- The labels (key-value pairs) to load to the dynarray
PosCurr- The current label position
PosLast- The last label position
  329% (done)
  330serialized_to_labels_(_Id, _Labels, PosFinal, PosFinal) :- !.
  331
  332% (iterate)
  333serialized_to_labels_(Id, Labels, PosCurr, PosFinal) :-
  334
  335    % register the label (da_* labels are not accepted)
  336    nth0(PosCurr, Labels, [Key,Value]),
  337    (dynarray_label(Id, Key, Value) ; true),
  338    !,
  339
  340    % go for the next label
  341    PosNext is PosCurr + 1,
  342    serialized_to_labels_(Id, Labels, PosNext, PosFinal).
 serialized_to_values_(+Id:atom, +Values, +PosCurr, +PosFinal) is det
Arguments:
Id- Atom identifying the dynarray
Value- The value to load to the dynarray
PosCurr- The current value position
PosLast- The last value position
  351% (done)
  352serialized_to_values_(_Id, _Values, PosFinal, PosFinal) :- !.
  353
  354% (iterate)
  355serialized_to_values_(Id, Values, PosCurr, PosFinal) :-
  356
  357    % load the value onto the dynarray
  358    nth0(PosCurr, Values, [Position,Value]),
  359    dynarray_position_value(Id, Position, Value),
  360
  361    % go for the next value
  362    PosNext is PosCurr + 1,
  363    serialized_to_values_(Id, Values, PosNext, PosFinal).
  364
  365%-------------------------------------------------------------------------------------
 dynarray_persist(+Id:atom, +DataSet:atom) is det
Persist the dynarray data to a Berkeley DB external storage.
Arguments:
Id- Atom identifying the dynarray
DataSet- Atom identifyingt the data set
  374dynarray_persist(Id, DataSet) :-
  375
  376    % fail point
  377    is_dynarray(Id),
  378
  379    % fail point (erase the dynarray storage)
  380    bdb_erase(Id, DataSet),
  381
  382    % obtain the dynarray data
  383    dynarray_serialize(Id, Data),
  384
  385    !,
  386    % fail point (persist the dynarray data)
  387    bdb_store(Id, DataSet, Data).
  388
  389%-------------------------------------------------------------------------------------
 dynarray_restore(+Id:atom, +DataSet:atom) is det
Restore the dynarray data from a Berkeley DB external storage.
Arguments:
Id- Atom identifying the dynarray
DataSet- Atom identifyingt the data set
  398dynarray_restore(Id, DataSet) :-
  399
  400    % fail point (retrieve the dynarray data from external storage)
  401    bdb_retrieve(Id, DataSet, Data),
  402
  403    % re-create the dynarray with its contents
  404    dynarray_serialize(Id, Data).
  405
  406%-------------------------------------------------------------------------------------
 dynarray_erase(+Id:atom, +DataSet:atom) is det
Erase the dynarray's persisted data.
Arguments:
Id- Atom identifying the dynarray
DataSet- Atom identifyingt the data set
  415dynarray_erase(Id, DataSet) :-
  416
  417    % fail point (erase the dynarray storage)
  418    bdb_erase(Id, DataSet)