This is an automated email from the ASF dual-hosted git repository. davisp pushed a commit to branch feature/user-partitioned-databases-davisp in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 0dccfd9662316649d47546b9b9bdcecefe2973ac Author: Paul J. Davis <paul.joseph.da...@gmail.com> AuthorDate: Thu Oct 25 16:58:48 2018 -0500 Implement configurable hash functions This provides the capability for features to specify alternative hash functions for placing documents in a given shard range. While the functionality exists with this implementation it is not yet actually used. --- src/mem3/src/mem3.erl | 8 ++--- src/mem3/src/mem3_hash.erl | 76 ++++++++++++++++++++++++++++++++++++++++++++ src/mem3/src/mem3_shards.erl | 4 +-- src/mem3/src/mem3_util.erl | 21 ++++++++---- 4 files changed, 96 insertions(+), 13 deletions(-) diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl index de63300..ae52104 100644 --- a/src/mem3/src/mem3.erl +++ b/src/mem3/src/mem3.erl @@ -234,15 +234,15 @@ dbname(_) -> %% @doc Determine if DocId belongs in shard (identified by record or filename) belongs(#shard{}=Shard, DocId) when is_binary(DocId) -> [Begin, End] = range(Shard), - belongs(Begin, End, DocId); + belongs(Begin, End, Shard, DocId); belongs(<<"shards/", _/binary>> = ShardName, DocId) when is_binary(DocId) -> [Begin, End] = range(ShardName), - belongs(Begin, End, DocId); + belongs(Begin, End, ShardName, DocId); belongs(DbName, DocId) when is_binary(DbName), is_binary(DocId) -> true. -belongs(Begin, End, DocId) -> - HashKey = mem3_util:hash(DocId), +belongs(Begin, End, Shard, DocId) -> + HashKey = mem3_hash:calculate(Shard, DocId), Begin =< HashKey andalso HashKey =< End. range(#shard{range = Range}) -> diff --git a/src/mem3/src/mem3_hash.erl b/src/mem3/src/mem3_hash.erl new file mode 100644 index 0000000..4003aca --- /dev/null +++ b/src/mem3/src/mem3_hash.erl @@ -0,0 +1,76 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(mem3_hash). + +-export([ + calculate/2, + + get_hash_fun/1, + + crc32/1 +]). + + +-include_lib("mem3/include/mem3.hrl"). + + +calculate(#shard{opts = Opts} = Shard, DocId) -> + Props = couch_util:get_value(props, Opts, []), + MFA = get_hash_fun_int(Props), + hash(MFA, DocId); + +calculate(#ordered_shard{opts = Opts}, DocId) -> + Props = couch_util:get_value(props, Opts, []), + MFA = get_hash_fun_int(Props), + hash(MFA, DocId); + +calculate(DbName, DocId) when is_binary(DbName) -> + MFA = get_hash_fun(DbName), + hash(MFA, DocId); + +calculate({Mod, Fun, Args}, DocId) -> + erlang:apply(Mod, Fun, [DocId | Args]). + + +get_hash_fun(#shard{opts = Opts}) -> + get_hash_fun_int(Opts); + +get_hash_fun(#ordered_shard{opts = Opts}) -> + get_hash_fun_int(Opts); + +get_hash_fun(DbName0) when is_binary(DbName0) -> + DbName = mem3:dbname(DbName0), + try + [Shard | _] = mem3_shards:for_db(DbName), + get_hash_fun_int(Shard#shard.opts) + catch error:database_does_not_exist -> + {?MODULE, crc32, []} + end. + + +crc32(Item) when is_binary(Item) -> + erlang:crc32(Item); +crc32(Item) -> + erlang:crc32(term_to_binary(Item)). + + + +get_hash_fun_int(Opts) when is_list(Opts) -> + case lists:keyfind(hash, 1, Opts) of + {hash, [Mod, Fun, Args]} -> + {Mod, Fun, Args}; + _ -> + {?MODULE, hash, []} + end. + + diff --git a/src/mem3/src/mem3_shards.erl b/src/mem3/src/mem3_shards.erl index 183f28f..18fca23 100644 --- a/src/mem3/src/mem3_shards.erl +++ b/src/mem3/src/mem3_shards.erl @@ -67,7 +67,7 @@ for_docid(DbName, DocId) -> for_docid(DbName, DocId, []). for_docid(DbName, DocId, Options) -> - HashKey = mem3_util:hash(DocId), + HashKey = mem3_hash:hash(DbName, DocId), ShardHead = #shard{ dbname = DbName, range = ['$1', '$2'], @@ -397,7 +397,7 @@ load_shards_from_db(ShardDb, DbName) -> load_shards_from_disk(DbName, DocId)-> Shards = load_shards_from_disk(DbName), - HashKey = mem3_util:hash(DocId), + HashKey = mem3_hash:hash(hd(Shards), Options), [S || S <- Shards, in_range(S, HashKey)]. in_range(Shard, HashKey) -> diff --git a/src/mem3/src/mem3_util.erl b/src/mem3/src/mem3_util.erl index 254a6df..c6a8494 100644 --- a/src/mem3/src/mem3_util.erl +++ b/src/mem3/src/mem3_util.erl @@ -12,7 +12,7 @@ -module(mem3_util). --export([hash/1, name_shard/2, create_partition_map/5, build_shards/2, +-export([name_shard/2, create_partition_map/5, build_shards/2, n_val/2, q_val/1, to_atom/1, to_integer/1, write_db_doc/1, delete_db_doc/1, shard_info/1, ensure_exists/1, open_db_doc/1]). -export([is_deleted/1, rotate_list/2]). @@ -29,10 +29,6 @@ -include_lib("mem3/include/mem3.hrl"). -include_lib("couch/include/couch_db.hrl"). -hash(Item) when is_binary(Item) -> - erlang:crc32(Item); -hash(Item) -> - erlang:crc32(term_to_binary(Item)). name_shard(Shard) -> name_shard(Shard, ""). @@ -162,7 +158,7 @@ build_shards_by_node(DbName, DocProps) -> dbname = DbName, node = to_atom(Node), range = [Beg, End], - opts = get_engine_opt(DocProps) + opts = get_shard_opts(DocProps) }, Suffix) end, Ranges) end, ByNode). @@ -180,7 +176,7 @@ build_shards_by_range(DbName, DocProps) -> node = to_atom(Node), range = [Beg, End], order = Order, - opts = get_engine_opt(DocProps) + opts = get_shard_opts(DocProps) }, Suffix) end, lists:zip(Nodes, lists:seq(1, length(Nodes)))) end, ByRange). @@ -197,6 +193,9 @@ to_integer(N) when is_binary(N) -> to_integer(N) when is_list(N) -> list_to_integer(N). +get_shard_opts(DocProps) -> + get_engine_opt(DocProps) ++ get_props_opt(DocProps). + get_engine_opt(DocProps) -> case couch_util:get_value(<<"engine">>, DocProps) of Engine when is_binary(Engine) -> @@ -205,6 +204,14 @@ get_engine_opt(DocProps) -> [] end. +get_props_opt(DocProps) -> + case couch_util:get_value(<<"props">>, DocProps) of + {Props} when is_list(Props) -> + [{props, Props}]; + _ -> + [] + end. + n_val(undefined, NodeCount) -> n_val(config:get("cluster", "n", "3"), NodeCount); n_val(N, NodeCount) when is_list(N) ->