From fbb4810c4f940dda63940ec64c5ccd3ceb4d2ee6 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 14 Sep 2023 19:06:14 +0100 Subject: [PATCH] Configurable %key query on leveled (#8) Can be configured to ignore tombstone keys by default. --- priv/riak_kv.schema | 11 +++- src/riak_kv_leveled_backend.erl | 108 ++++++++++++++++++++++---------- 2 files changed, 84 insertions(+), 35 deletions(-) diff --git a/priv/riak_kv.schema b/priv/riak_kv.schema index 9e5ac234d..469beeab8 100644 --- a/priv/riak_kv.schema +++ b/priv/riak_kv.schema @@ -1478,7 +1478,7 @@ %% @doc Choose to read repair to primary vnodes only %% When fallback vnodes are elected, then read repair will by default repair %% any missing data from the vnode - i.e. every GET while the fallback is in -%% play will lead to a PUT to add the rewuested object to the fallback vnode, +%% play will lead to a PUT to add the requested object to the fallback vnode, %% as the fallback by default starts empty. %% If the expectation is that failed vnodes are replaced quickly, as would be %% possible in a Cloud scenario, this may not be desirable. Read repair to @@ -1508,4 +1508,13 @@ {mapping, "handoff_deletes", "riak_kv.handoff_deletes", [ {datatype, {flag, enabled, disabled}}, {default, disabled} +]}. + +%% @doc For $key index queries, should keys which are tombstones be returned. +%% This config will only make a difference with the leveled backend, it is +%% ignored on other backends. Disable to change default behaviour and stop +%% returning keys of tombstones in $key queries +{mapping, "dollarkey_readtombs", "riak_kv.dollarkey_readtombs", [ + {datatype, {flag, enabled, disabled}}, + {default, enabled} ]}. \ No newline at end of file diff --git a/src/riak_kv_leveled_backend.erl b/src/riak_kv_leveled_backend.erl index 567ca81b0..b17a94cd4 100644 --- a/src/riak_kv_leveled_backend.erl +++ b/src/riak_kv_leveled_backend.erl @@ -319,13 +319,14 @@ fold_keys(FoldKeysFun, Acc, Opts, #state{bookie=Bookie}) -> if Index /= false -> {index, QBucket, Q} = Index, - ?KV_INDEX_Q{filter_field=Field, - start_key=StartKey0, - start_term=StartTerm, - end_term=EndTerm, - return_terms=ReturnTerms, - start_inclusive=StartInc, - term_regex=TermRegex} = riak_index:upgrade_query(Q), + ?KV_INDEX_Q{ + filter_field=Field, + start_key=StartKey0, + start_term=StartTerm, + end_term=EndTerm, + return_terms=ReturnTerms, + start_inclusive=StartInc, + term_regex=TermRegex} = riak_index:upgrade_query(Q), StartKey = case StartInc of @@ -337,44 +338,50 @@ fold_keys(FoldKeysFun, Acc, Opts, #state{bookie=Bookie}) -> % If this is a $key index query, the start key is assumed % to mean the start of the range, and so we want to use % this start key inclusively (and so don't advance it to - % the next_key. + % the next_key). case Field of <<"$bucket">> -> - leveled_bookie:book_keylist(Bookie, - ?RIAK_TAG, - QBucket, - {StartKey, null}, - {FoldKeysFun, Acc}, - TermRegex); + leveled_bookie:book_keylist( + Bookie, + ?RIAK_TAG, + QBucket, + {StartKey, null}, + {FoldKeysFun, Acc}, + TermRegex); <<"$key">> -> - leveled_bookie:book_keylist(Bookie, - ?RIAK_TAG, - QBucket, - {StartKey, EndTerm}, - {FoldKeysFun, Acc}, - TermRegex); + ReadTombs = + application:get_env( + riak_kv, dollarkey_readtombs, true), + FoldHeadsFun = + dollarkey_foldfun( + FoldKeysFun, ReadTombs, TermRegex), + leveled_bookie:book_headfold( + Bookie, + ?RIAK_TAG, + {range, QBucket, {StartKey, EndTerm}}, + {FoldHeadsFun, Acc}, + false, + SnapPreFold, + false + ); _ -> - leveled_bookie:book_indexfold(Bookie, - {QBucket, StartKey}, - {FoldKeysFun, Acc}, - {Field, - StartTerm, - EndTerm}, - {ReturnTerms, - TermRegex}) + leveled_bookie:book_indexfold( + Bookie, + {QBucket, StartKey}, + {FoldKeysFun, Acc}, + {Field, StartTerm, EndTerm}, + {ReturnTerms, TermRegex}) end; Bucket /= false -> % Equivalent to $bucket query, but without the StartKey {bucket, B} = Bucket, - leveled_bookie:book_keylist(Bookie, - ?RIAK_TAG, B, - {FoldKeysFun, Acc}); + leveled_bookie:book_keylist( + Bookie, ?RIAK_TAG, B, {FoldKeysFun, Acc}); true -> % All key query - don't constrain by bucket - leveled_bookie:book_keylist(Bookie, - ?RIAK_TAG, - {FoldKeysFun, Acc}) + leveled_bookie:book_keylist( + Bookie, ?RIAK_TAG, {FoldKeysFun, Acc}) end, case {lists:member(async_fold, Opts), SnapPreFold} of @@ -638,6 +645,39 @@ callback(Ref, UnexpectedCallback, State) -> %% =================================================================== +-spec dollarkey_foldfun( + riak_kv_backend:fold_keys_fun(), boolean(), re:mp()|undefined) + -> riak_kv_backend:fold_objects_fun(). +dollarkey_foldfun(FoldKeysFun, ReadTombs, TermRegex) -> + FilteredFoldKeysFun = + fun(B, K, Acc) -> + case TermRegex of + undefined -> + FoldKeysFun(B, K, Acc); + TermRegex -> + case re:run(K, TermRegex) of + nomatch -> + Acc; + _ -> + FoldKeysFun(B, K, Acc) + end + end + end, + fun(B, K, HeadObj, KeyAcc) -> + case ReadTombs of + true -> + FilteredFoldKeysFun(B, K, KeyAcc); + false -> + MetaBin = element(5, riak_object:summary_from_binary(HeadObj)), + case riak_object:is_aae_object_deleted(MetaBin, false) of + {true, undefined} -> + KeyAcc; + _ -> + FilteredFoldKeysFun(B, K, KeyAcc) + end + end + end. + -spec log_fragmentation(eheap_alloc|binary_alloc) -> ok. log_fragmentation(Allocator) -> {MB_BS, MB_CS, SB_BS, SB_CS} =