1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2016-2022, VU University Amsterdam 7 SWI-Prolog Solutions b.v. 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in 19 the documentation and/or other materials provided with the 20 distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 POSSIBILITY OF SUCH DAMAGE. 34*/ 35 36:- module(rocksdb, 37 [ rocks_open/3, % +Directory, -RocksDB, +Options 38 rocks_close/1, % +RocksDB 39 40 rocks_put/3, % +RocksDB, +Key, +Value 41 rocks_put/4, % +RocksDB, +Key, +Value, +Options 42 rocks_merge/3, % +RocksDB, +Key, +Value 43 rocks_merge/4, % +RocksDB, +Key, +Value, +Options 44 rocks_delete/2, % +RocksDB, +Key 45 rocks_delete/3, % +RocksDB, +Key, +Options 46 rocks_batch/2, % +RocksDB, +Actions 47 rocks_batch/3, % +RocksDB, +Actions, +Options 48 49 rocks_get/3, % +RocksDB, +Key, -Value 50 rocks_get/4, % +RocksDB, +Key, -Value, +Options 51 rocks_enum/3, % +RocksDB, ?Key, ?Value 52 rocks_enum/4, % +RocksDB, ?Key, ?Value, +Options 53 rocks_enum_from/4, % +RocksDB, ?Key, ?Value, +From 54 rocks_enum_from/5, % +RocksDB, ?Key, ?Value, +From, +Options 55 rocks_enum_prefix/4, % +RocksDB, ?Suffix, ?Value, +Prefix 56 rocks_enum_prefix/5, % +RocksDB, ?Suffix, ?Value, +Prefix, +Options 57 58 rocks_property/2 % +RocksDB, ?Property 59 ]). 60:- use_module(library(option)). 61:- use_module(library(error)). 62:- use_foreign_library(foreign(rocksdb4pl)). 63 64:- meta_predicate 65 rocks_open( , , ). 66 67:- predicate_options(rocks_open/3, 3, 68 [ alias(atom), 69 open(oneof([once])), 70 mode(oneof([read_only,read_write])), 71 key(oneof([atom,string,binary,int32,int64, 72 float,double,term])), 73 value(any), 74 merge(callable), 75 prepare_for_bulk_load(oneof([true])), 76 optimize_for_small_db(oneof([true])), 77 increase_parallelism(oneof([true])), 78 create_if_missing(boolean), 79 create_missing_column_families(boolean), 80 error_if_exists(boolean), 81 paranoid_checks(boolean), 82 track_and_verify_wals_in_manifest(boolean), 83 info_log_level(oneof([debug,info,warn,error,fatal,header])), % default: info 84 env(boolean), 85 max_open_files(integer), 86 max_file_opening_threads(integer), 87 max_total_wal_size(integer), 88 statistics(boolean), % TODO: this only creates a Statistics object 89 use_fsync(boolean), 90 db_log_dir(string), 91 wal_dir(string), 92 delete_obsolete_files_period_micros(integer), 93 max_background_jobs(integer), 94 max_subcompactions(integer), 95 max_log_file_size(integer), 96 log_file_time_to_roll(integer), 97 keep_log_file_num(integer), 98 recycle_log_file_num(integer), 99 max_manifest_file_size(integer), 100 table_cache_numshardbits(integer), 101 wal_ttl_seconds(integer), 102 wal_size_limit_mb(integer), 103 manifest_preallocation_size(integer), 104 allow_mmap_reads(boolean), 105 allow_mmap_writes(boolean), 106 use_direct_reads(boolean), 107 use_direct_io_for_flush_and_compaction(boolean), 108 allow_fallocate(boolean), 109 is_fd_close_on_exec(boolean), 110 stats_dump_period_sec(integer), 111 stats_persist_period_sec(integer), 112 persist_stats_to_disk(boolean), 113 stats_history_buffer_size(integer), 114 advise_random_on_open(boolean), 115 db_write_buffer_size(integer), 116 write_buffer_manager(boolean), 117 % new_table_reader_for_compaction_inputs(boolean), % TODO: removed from rocksdb/options.h? 118 compaction_readahead_size(integer), 119 random_access_max_buffer_size(integer), 120 writable_file_max_buffer_size(integer), 121 use_adaptive_mutex(boolean), 122 bytes_per_sync(integer), 123 wal_bytes_per_sync(integer), 124 strict_bytes_per_sync(integer), 125 enable_thread_tracking(boolean), 126 delayed_write_rate(integer), 127 enable_pipelined_write(boolean), 128 unordered_write(boolean), 129 allow_concurrent_memtable_write(boolean), 130 enable_write_thread_adaptive_yield(boolean), 131 max_write_batch_group_size_bytes(integer), 132 write_thread_max_yield_usec(integer), 133 write_thread_slow_yield_usec(integer), 134 skip_stats_update_on_db_open(boolean), 135 skip_checking_sst_file_sizes_on_db_open(boolean), 136 allow_2pc(boolean), 137 fail_ifoptions_file_error(boolean), 138 dump_malloc_stats(boolean), 139 avoid_flush_during_recovery(boolean), 140 avoid_flush_during_shutdown(boolean), 141 allow_ingest_behind(boolean), 142 % preserve_deletes(boolean), % TODO: removed: https://github.com/facebook/rocksdb/issues/9090 143 two_write_queues(boolean), 144 manual_wal_flush(boolean), 145 atomic_flush(boolean), 146 avoid_unnecessary_blocking_io(boolean), 147 write_dbid_to_manifest(boolean), 148 log_readahead_size(boolean), 149 best_efforts_recovery(boolean), 150 max_bgerror_resume_count(integer), 151 bgerror_resume_retry_interval(integer), 152 allow_data_in_errors(boolean), 153 db_host_id(string) 154 ]). 155:- predicate_options(rocks_get/4, 4, 156 [ 157 readahead_size(integer), 158 max_skippable_internal_keys(integer), 159 verify_checksums(boolean), 160 fill_cache(boolean), 161 tailing(boolean), 162 total_order_seek(boolean), 163 auto_prefix_mode(boolean), 164 prefix_same_as_start(boolean), 165 pin_data(boolean), 166 background_purge_on_iterator_cleanup(boolean), 167 ignore_range_deletions(boolean), 168 % iter_start_seqnum(integer), % TODO: removed https://github.com/facebook/rocksdb/issues/9090 169 io_timeout(integer), 170 value_size_soft_limit(integer) 171 ]). 172:- predicate_options(rocks_enum/4, 4, 173 [ pass_to(rocks_get/4, 4) 174 ]). 175:- predicate_options(rocks_enum_from/5, 5, 176 [ pass_to(rocks_get/4, 4) 177 ]). 178:- predicate_options(rocks_enum_prefix/5, 5, 179 [ pass_to(rocks_get/4, 4) 180 ]). 181:- predicate_options(rocks_put/4, 4, 182 [ sync(boolean), 183 disableWAL(boolean), 184 ignore_missing_column_families(boolean), 185 no_slowdown(boolean), 186 low_pri(boolean), 187 memtable_insert_hint_per_batch(boolean) 188 ]). 189:- predicate_options(rocks_delete/3, 3, 190 [ pass_to(rocks_put/4, 4) 191 ]). 192:- predicate_options(rocks_merge/4, 4, 193 [ pass_to(rocks_put/4, 4) 194 ]). 195:- predicate_options(rocks_batch/4, 4, 196 [ pass_to(rocks_put/4, 4) 197 ]).
rocksdb/include/rocksdb/options.h
are supported, in addition
to the following options:
alias(+Name)
Give the database a name instead of using an anonymous
handle. A named database is not subject to GC and must
be closed explicitly.open(+How)
If How is once
and an alias is given, a second open simply
returns a handle to the already open database.key(+Type)
value(+Type)
Define the type for the key and value. This must be
consistent over multiple invocations. Defined types are:
X = f(a), Key = k(X,X)
is a different
key from Key = k(f(a),f(a))
and X = [a|X]
is a
different key from X = [a,a|X]
. Applications for which
such keys should match must first normalize the key.
Normalization can be based on term_factorized/3 from
library(terms).
In addition, value
accepts one of list(type)
or set(type)
,
currently only for the numeric types. This causes
rocks_put/3 and rocks_get/3 to exchange the value as a
list and installs a built-in merge function.mode(+Mode)
One of read_write
(default) or read_only
. The latter
uses OpenForReadOnly() to open the database.optimize_for_small_db(true)
- Use this if your DB is very
small (like under 1GB) and you don't want tog
spend lots of memory for memtables.increase_parallelism(true)
- see DBOptions::IncreaseParallelism()281% 282% @bug You must call rocks_close(Directory) to ensure clean shutdown 283% Failure to call rdb_close/1 usually doesn't result in data 284% loss because rocksdb can recover, depending on the setting 285% of the `sync` option. 286% @see https://github.com/facebook/rocksdb/wiki/Known-Issues 287 288rocks_open(Dir, DB, Options0) :- 289 meta_options(is_meta, Options0, Options), 290 rocks_open_(Dir, DB, Options). 291 292is_meta(merge).
list(Type)
or set(Type)
, Value must be a list. For
set(Type)
the list is converted into an ordered set.
308rocks_put(RocksDB, Key, Value) :-
309 rocks_put(RocksDB, Key, Value, []).
list(Type)
or set(Type)
to be used when opening the database.
Using rocks_merge/3 rather than rocks_get/2, update and
rocks_put/3 makes the operation atomic and reduces disk
accesses.
Options are the same as for rocks_put/4.
Merger is called as below, where two clauses are required:
one with How set to partial
and one with How set to
full
. If full
, MergeValue is a list of values that need
to be merged, if partial
, MergeValue is a single value.
call(:Merger, +How, +Key, +Value0, +MergeValue, -Value)
If Key is not in RocksDB, Value0 is unified with a value that
depends on the value type. If the value type is an atom, it is
unified with the empty atom; if it is string
or binary
it is
unified with an empty string; if it is int32
or int64
it is
unified with the integer 0; and finally if the type is term
it
is unified with the empty list.
For example, if the value is a set of Prolog values we open the
database with value(term)
to allow for Prolog lists as value and
we define merge_set/5 as below.
merge(partial, _Key, Left, Right, Result) :- ord_union(Left, Right, Result). merge(full, _Key, Initial, Additions, Result) :- append([Initial|Additions], List), sort(List, Result).
If the merge callback fails or raises an exception the merge operation fails and the error is logged through the RocksDB logging facilities. Note that the merge callback can be called in a different thread or even in a temporary created thread if RocksDB decides to merge remaining values in the background.
361rocks_merge(RocksDB, Key, Value) :-
362 rocks_merge(RocksDB, Key, Value, []).
Options are the same as for rocks_put/4.
371rocks_delete(RocksDB, Key) :-
372 rocks_delete(RocksDB, Key, []).
list(Type)
or set(Type)
this
returns a Prolog list.
381rocks_get(RocksDB, Key, Value) :-
382 rocks_get(RocksDB, Key, Value, []).
list(Type)
or set(Type)
Value is a list.
Options are the same as for rocks_get/4.
393rocks_enum(RocksDB, Key, Value) :-
394 rocks_enum(RocksDB, Key, Value, []).
atom
, string
or binary
. To only
iterate all keys with Prefix, use rocks_enum_prefix/4 or the
construct below.
Options are the same as for rocks_get/4.
rocks_enum_from(DB, Key, Value, Prefix), ( sub_atom(Key, 0, _, _, Prefix) -> handle(Key, Value) ; !, fail )
414rocks_enum_from(RocksDB, Key, Value, Prefix) :-
415 rocks_enum_from(RocksDB, Key, Value, Prefix, []).
Options are the same as for rocks_get/4.
427rocks_enum_prefix(RocksDB, Suffix, Value, Prefix) :-
428 rocks_enum_prefix(RocksDB, Suffix, Value, Prefix, []).
Options are the same as for rocks_put/4.
Actions is a list of:
The following example is translated from the RocksDB documentation:
rocks_get(RocksDB, key1, Value), rocks_batch(RocksDB, [ delete(key1), put(key2, Value) ])
457rocks_batch(RocksDB, Actions) :-
458 rocks_batch(RocksDB, Actions, []).
463rocks_property(RocksDB, Property) :- 464 var(Property), !, 465 rocks_property(P), 466 rocks_property(RocksDB, P, Value), 467 Property =.. [P,Value]. 468rocks_property(RocksDB, Property) :- 469 Property =.. [P,Value], !, 470 rocks_property(RocksDB, P, Value). 471rocks_property(_RocksDB, Property) :- 472 type_error(property, Property). 473 474rocks_property(estimate_num_keys)
RocksDB interface
RocksDB is an embeddable persistent key-value store for fast storage. The store can be used only from one process at the same time. It may be used from multiple Prolog threads though. This library provides a SWI-Prolog binding for RocksDB. RocksDB just associates byte arrays. This interface defines several mappings between Prolog datastructures and byte arrays that may be configured to store both keys and values. See rocks_open/3 for details.